mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
radeon/llvm: Remove backend code from Mesa
This code now lives in an external tree. For the next Mesa release fetch the code from the master branch of this LLVM repo: http://cgit.freedesktop.org/~tstellar/llvm/ For all subsequent Mesa releases, fetch the code from the official LLVM project: www.llvm.org
This commit is contained in:
parent
05c143cc04
commit
aed37cbee8
99 changed files with 0 additions and 19168 deletions
18
src/gallium/drivers/radeon/.gitignore
vendored
18
src/gallium/drivers/radeon/.gitignore
vendored
|
|
@ -1,18 +0,0 @@
|
|||
AMDGPUInstrEnums.h.include
|
||||
AMDGPUInstrEnums.include
|
||||
AMDGPUInstrEnums.td
|
||||
AMDILGenAsmWriter.inc
|
||||
AMDILGenCallingConv.inc
|
||||
AMDILGenCodeEmitter.inc
|
||||
AMDILGenDAGISel.inc
|
||||
AMDILGenEDInfo.inc
|
||||
AMDILGenInstrInfo.inc
|
||||
AMDILGenIntrinsics.inc
|
||||
AMDILGenRegisterInfo.inc
|
||||
AMDILGenSubtargetInfo.inc
|
||||
R600HwRegInfo.include
|
||||
R600Intrinsics.td
|
||||
R600RegisterInfo.td
|
||||
SIRegisterGetHWRegNum.inc
|
||||
SIRegisterInfo.td
|
||||
loader
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPU_H
|
||||
#define AMDGPU_H
|
||||
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class FunctionPass;
|
||||
class AMDGPUTargetMachine;
|
||||
|
||||
// R600 Passes
|
||||
FunctionPass* createR600KernelParametersPass(const TargetData* TD);
|
||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
||||
|
||||
// SI Passes
|
||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
||||
FunctionPass *createSILowerFlowControlPass(TargetMachine &tm);
|
||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
||||
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
|
||||
|
||||
// Passes common to R600 and SI
|
||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
namespace ShaderType {
|
||||
enum Type {
|
||||
PIXEL = 0,
|
||||
VERTEX = 1,
|
||||
GEOMETRY = 2,
|
||||
COMPUTE = 3
|
||||
};
|
||||
}
|
||||
|
||||
#endif // AMDGPU_H
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
|
||||
// Include AMDIL TD files
|
||||
include "AMDILBase.td"
|
||||
|
||||
|
||||
def AMDGPUInstrInfo : InstrInfo {}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declare the target which we are implementing
|
||||
//===----------------------------------------------------------------------===//
|
||||
def AMDGPUAsmWriter : AsmWriter {
|
||||
string AsmWriterClassName = "InstPrinter";
|
||||
int Variant = 0;
|
||||
bit isMCAsmWriter = 1;
|
||||
}
|
||||
|
||||
def AMDGPU : Target {
|
||||
// Pull in Instruction Info:
|
||||
let InstructionSet = AMDGPUInstrInfo;
|
||||
let AssemblyWriters = [AMDGPUAsmWriter];
|
||||
}
|
||||
|
||||
// Include AMDGPU TD files
|
||||
include "R600Schedule.td"
|
||||
include "SISchedule.td"
|
||||
include "Processors.td"
|
||||
include "AMDGPUInstrInfo.td"
|
||||
include "AMDGPUIntrinsics.td"
|
||||
include "AMDGPURegisterInfo.td"
|
||||
include "AMDGPUInstructions.td"
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The AMDGPUAsmPrinter is used to print both assembly string and also binary
|
||||
// code. When passed an MCAsmStreamer it prints assembly and when passed
|
||||
// an MCObjectStreamer it outputs binary code.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
||||
|
||||
#include "AMDGPUAsmPrinter.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
||||
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
|
||||
MCStreamer &Streamer) {
|
||||
return new AMDGPUAsmPrinter(tm, Streamer);
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
|
||||
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
|
||||
}
|
||||
|
||||
/// runOnMachineFunction - We need to override this function so we can avoid
|
||||
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
|
||||
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
||||
if (STM.dumpCode()) {
|
||||
MF.dump();
|
||||
}
|
||||
SetupMachineFunction(MF);
|
||||
if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
EmitProgramInfo(MF);
|
||||
}
|
||||
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
|
||||
EmitFunctionBody();
|
||||
return false;
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
|
||||
unsigned MaxSGPR = 0;
|
||||
unsigned MaxVGPR = 0;
|
||||
bool VCCUsed = false;
|
||||
const SIRegisterInfo * RI =
|
||||
static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
|
||||
unsigned numOperands = MI.getNumOperands();
|
||||
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
|
||||
MachineOperand & MO = MI.getOperand(op_idx);
|
||||
unsigned maxUsed;
|
||||
unsigned width = 0;
|
||||
bool isSGPR = false;
|
||||
unsigned reg;
|
||||
unsigned hwReg;
|
||||
if (!MO.isReg()) {
|
||||
continue;
|
||||
}
|
||||
reg = MO.getReg();
|
||||
if (reg == AMDGPU::VCC) {
|
||||
VCCUsed = true;
|
||||
continue;
|
||||
}
|
||||
switch (reg) {
|
||||
default: break;
|
||||
case AMDGPU::EXEC:
|
||||
case AMDGPU::SI_LITERAL_CONSTANT:
|
||||
case AMDGPU::SREG_LIT_0:
|
||||
case AMDGPU::M0:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (AMDGPU::SReg_32RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 1;
|
||||
} else if (AMDGPU::VReg_32RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 1;
|
||||
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 2;
|
||||
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 2;
|
||||
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 4;
|
||||
} else if (AMDGPU::VReg_128RegClass.contains(reg)) {
|
||||
isSGPR = false;
|
||||
width = 4;
|
||||
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
|
||||
isSGPR = true;
|
||||
width = 8;
|
||||
} else {
|
||||
assert(!"Unknown register class");
|
||||
}
|
||||
hwReg = RI->getHWRegNum(reg);
|
||||
maxUsed = hwReg + width - 1;
|
||||
if (isSGPR) {
|
||||
MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
|
||||
} else {
|
||||
MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (VCCUsed) {
|
||||
MaxSGPR += 2;
|
||||
}
|
||||
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
|
||||
OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
|
||||
OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// AMDGPU Assembly printer class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPU_ASMPRINTER_H
|
||||
#define AMDGPU_ASMPRINTER_H
|
||||
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUAsmPrinter : public AsmPrinter {
|
||||
|
||||
public:
|
||||
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
||||
: AsmPrinter(TM, Streamer) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "AMDGPU Assembly Printer";
|
||||
}
|
||||
|
||||
/// EmitProgramInfo - Emit register usage information so that the GPU driver
|
||||
/// can correctly setup the GPU state.
|
||||
void EmitProgramInfo(MachineFunction &MF);
|
||||
|
||||
/// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp
|
||||
virtual void EmitInstruction(const MachineInstr *MI);
|
||||
};
|
||||
|
||||
} // End anonymous llvm
|
||||
|
||||
#endif //AMDGPU_ASMPRINTER_H
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// CodeEmitter interface for R600 and SI codegen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUCODEEMITTER_H
|
||||
#define AMDGPUCODEEMITTER_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUCodeEmitter {
|
||||
public:
|
||||
uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
|
||||
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
||||
const MachineOperand &MO) const { return 0; }
|
||||
virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const {
|
||||
return 0;
|
||||
}
|
||||
virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const {
|
||||
return 0;
|
||||
}
|
||||
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
|
||||
uint64_t Value) const {
|
||||
return Value;
|
||||
}
|
||||
virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
|
||||
unsigned OpNo) const {
|
||||
return 0;
|
||||
}
|
||||
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
||||
const {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPUCODEEMITTER_H
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass lowers AMDIL machine instructions to the appropriate hardware
|
||||
// instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPUConvertToISAPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
TargetMachine &TM;
|
||||
|
||||
public:
|
||||
AMDGPUConvertToISAPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TM(tm) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char AMDGPUConvertToISAPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
|
||||
return new AMDGPUConvertToISAPass(tm);
|
||||
}
|
||||
|
||||
bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
|
||||
{
|
||||
const AMDGPUInstrInfo * TII =
|
||||
static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
MachineInstr &MI = *I;
|
||||
TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1,351 +0,0 @@
|
|||
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This is the parent TargetLowering class for hardware code gen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
||||
TargetLowering(TM, new TargetLoweringObjectFileELF())
|
||||
{
|
||||
|
||||
// Initialize target lowering borrowed from AMDIL
|
||||
InitAMDILLowering();
|
||||
|
||||
// We need to custom lower some of the intrinsics
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
// Library functions. These default to Expand, but we have instructions
|
||||
// for them.
|
||||
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FEXP2, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FPOW, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FLOG2, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FABS, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
|
||||
setOperationAction(ISD::FRINT, MVT::f32, Legal);
|
||||
|
||||
setOperationAction(ISD::UDIV, MVT::i32, Expand);
|
||||
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
|
||||
setOperationAction(ISD::UREM, MVT::i32, Expand);
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// TargetLowering Callbacks
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerFormalArguments(
|
||||
SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const
|
||||
{
|
||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
||||
InVals.push_back(SDValue());
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerReturn(
|
||||
SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
DebugLoc DL, SelectionDAG &DAG) const
|
||||
{
|
||||
return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Target specific lowering
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
||||
const
|
||||
{
|
||||
switch (Op.getOpcode()) {
|
||||
default:
|
||||
Op.getNode()->dump();
|
||||
assert(0 && "Custom lowering code for this"
|
||||
"instruction is not implemented yet!");
|
||||
break;
|
||||
// AMDIL DAG lowering
|
||||
case ISD::SDIV: return LowerSDIV(Op, DAG);
|
||||
case ISD::SREM: return LowerSREM(Op, DAG);
|
||||
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
|
||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
||||
// AMDGPU DAG lowering
|
||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
||||
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
switch (IntrinsicID) {
|
||||
default: return Op;
|
||||
case AMDGPUIntrinsic::AMDIL_abs:
|
||||
return LowerIntrinsicIABS(Op, DAG);
|
||||
case AMDGPUIntrinsic::AMDIL_exp:
|
||||
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
|
||||
case AMDGPUIntrinsic::AMDGPU_lrp:
|
||||
return LowerIntrinsicLRP(Op, DAG);
|
||||
case AMDGPUIntrinsic::AMDIL_fraction:
|
||||
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
||||
case AMDGPUIntrinsic::AMDIL_mad:
|
||||
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
case AMDGPUIntrinsic::AMDIL_max:
|
||||
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_imax:
|
||||
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_umax:
|
||||
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDIL_min:
|
||||
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_imin:
|
||||
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDGPU_umin:
|
||||
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2));
|
||||
case AMDGPUIntrinsic::AMDIL_round_nearest:
|
||||
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
|
||||
}
|
||||
}
|
||||
|
||||
///IABS(a) = SMAX(sub(0, a), a)
|
||||
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
||||
Op.getOperand(1));
|
||||
|
||||
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
|
||||
}
|
||||
|
||||
/// Linear Interpolation
|
||||
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
|
||||
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
|
||||
DAG.getConstantFP(1.0f, MVT::f32),
|
||||
Op.getOperand(1));
|
||||
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
|
||||
Op.getOperand(3));
|
||||
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2),
|
||||
OneSubAC);
|
||||
}
|
||||
|
||||
|
||||
|
||||
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
SDValue Num = Op.getOperand(0);
|
||||
SDValue Den = Op.getOperand(1);
|
||||
|
||||
SmallVector<SDValue, 8> Results;
|
||||
|
||||
// RCP = URECIP(Den) = 2^32 / Den + e
|
||||
// e is rounding error.
|
||||
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
|
||||
|
||||
// RCP_LO = umulo(RCP, Den) */
|
||||
SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
|
||||
|
||||
// RCP_HI = mulhu (RCP, Den) */
|
||||
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
|
||||
|
||||
// NEG_RCP_LO = -RCP_LO
|
||||
SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
||||
RCP_LO);
|
||||
|
||||
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
|
||||
SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
||||
NEG_RCP_LO, RCP_LO,
|
||||
ISD::SETEQ);
|
||||
// Calculate the rounding error from the URECIP instruction
|
||||
// E = mulhu(ABS_RCP_LO, RCP)
|
||||
SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
|
||||
|
||||
// RCP_A_E = RCP + E
|
||||
SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
|
||||
|
||||
// RCP_S_E = RCP - E
|
||||
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
|
||||
|
||||
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
|
||||
SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
||||
RCP_A_E, RCP_S_E,
|
||||
ISD::SETEQ);
|
||||
// Quotient = mulhu(Tmp0, Num)
|
||||
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
|
||||
|
||||
// Num_S_Remainder = Quotient * Den
|
||||
SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
|
||||
|
||||
// Remainder = Num - Num_S_Remainder
|
||||
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
|
||||
|
||||
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
|
||||
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
|
||||
DAG.getConstant(-1, VT),
|
||||
DAG.getConstant(0, VT),
|
||||
ISD::SETGE);
|
||||
// Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
|
||||
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
|
||||
DAG.getConstant(0, VT),
|
||||
DAG.getConstant(-1, VT),
|
||||
DAG.getConstant(0, VT),
|
||||
ISD::SETGE);
|
||||
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
|
||||
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
|
||||
Remainder_GE_Zero);
|
||||
|
||||
// Calculate Division result:
|
||||
|
||||
// Quotient_A_One = Quotient + 1
|
||||
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
|
||||
DAG.getConstant(1, VT));
|
||||
|
||||
// Quotient_S_One = Quotient - 1
|
||||
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
|
||||
DAG.getConstant(1, VT));
|
||||
|
||||
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
|
||||
SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
||||
Quotient, Quotient_A_One, ISD::SETEQ);
|
||||
|
||||
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
|
||||
Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
||||
Quotient_S_One, Div, ISD::SETEQ);
|
||||
|
||||
// Calculate Rem result:
|
||||
|
||||
// Remainder_S_Den = Remainder - Den
|
||||
SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
|
||||
|
||||
// Remainder_A_Den = Remainder + Den
|
||||
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
|
||||
|
||||
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
|
||||
SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
||||
Remainder, Remainder_S_Den, ISD::SETEQ);
|
||||
|
||||
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
|
||||
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
||||
Remainder_A_Den, Rem, ISD::SETEQ);
|
||||
|
||||
DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
|
||||
DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
|
||||
|
||||
return Op;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Helper functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
|
||||
{
|
||||
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
return CFP->isExactlyValue(1.0);
|
||||
}
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
||||
return C->isAllOnesValue();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
|
||||
{
|
||||
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
return CFP->getValueAPF().isZero();
|
||||
}
|
||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
||||
return C->isNullValue();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
|
||||
const TargetRegisterClass *RC,
|
||||
unsigned Reg, EVT VT) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned VirtualRegister;
|
||||
if (!MRI.isLiveIn(Reg)) {
|
||||
VirtualRegister = MRI.createVirtualRegister(RC);
|
||||
MRI.addLiveIn(Reg, VirtualRegister);
|
||||
} else {
|
||||
VirtualRegister = MRI.getLiveInVirtReg(Reg);
|
||||
}
|
||||
return DAG.getRegister(VirtualRegister, VT);
|
||||
}
|
||||
|
||||
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
|
||||
|
||||
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||
{
|
||||
switch (Opcode) {
|
||||
default: return 0;
|
||||
// AMDIL DAG nodes
|
||||
NODE_NAME_CASE(MAD);
|
||||
NODE_NAME_CASE(CALL);
|
||||
NODE_NAME_CASE(UMUL);
|
||||
NODE_NAME_CASE(DIV_INF);
|
||||
NODE_NAME_CASE(RET_FLAG);
|
||||
NODE_NAME_CASE(BRANCH_COND);
|
||||
|
||||
// AMDGPU DAG nodes
|
||||
NODE_NAME_CASE(FRACT)
|
||||
NODE_NAME_CASE(FMAX)
|
||||
NODE_NAME_CASE(SMAX)
|
||||
NODE_NAME_CASE(UMAX)
|
||||
NODE_NAME_CASE(FMIN)
|
||||
NODE_NAME_CASE(SMIN)
|
||||
NODE_NAME_CASE(UMIN)
|
||||
NODE_NAME_CASE(URECIP)
|
||||
NODE_NAME_CASE(INTERP)
|
||||
NODE_NAME_CASE(INTERP_P0)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,142 +0,0 @@
|
|||
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the interface defintiion of the TargetLowering class
|
||||
// that is common to all AMD GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUISELLOWERING_H
|
||||
#define AMDGPUISELLOWERING_H
|
||||
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MachineRegisterInfo;
|
||||
|
||||
class AMDGPUTargetLowering : public TargetLowering
|
||||
{
|
||||
private:
|
||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
protected:
|
||||
|
||||
/// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
|
||||
/// of the DAG's MachineFunction. This returns a Register SDNode representing
|
||||
/// Reg.
|
||||
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
|
||||
unsigned Reg, EVT VT) const;
|
||||
|
||||
bool isHWTrueValue(SDValue Op) const;
|
||||
bool isHWFalseValue(SDValue Op) const;
|
||||
|
||||
public:
|
||||
AMDGPUTargetLowering(TargetMachine &TM);
|
||||
|
||||
virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
DebugLoc DL, SelectionDAG &DAG) const;
|
||||
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
|
||||
virtual const char* getTargetNodeName(unsigned Opcode) const;
|
||||
|
||||
// Functions defined in AMDILISelLowering.cpp
|
||||
public:
|
||||
|
||||
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
|
||||
/// in Mask are known to be either zero or one and return them in the
|
||||
/// KnownZero/KnownOne bitsets.
|
||||
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
|
||||
APInt &KnownZero,
|
||||
APInt &KnownOne,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth = 0) const;
|
||||
|
||||
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
const CallInst &I, unsigned Intrinsic) const;
|
||||
|
||||
/// isFPImmLegal - We want to mark f32/f64 floating point values as legal.
|
||||
bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
|
||||
|
||||
/// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants.
|
||||
bool ShouldShrinkFPConstant(EVT VT) const;
|
||||
|
||||
private:
|
||||
void InitAMDILLowering();
|
||||
SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
||||
EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
|
||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
};
|
||||
|
||||
namespace AMDGPUISD
|
||||
{
|
||||
|
||||
enum
|
||||
{
|
||||
// AMDIL ISD Opcodes
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||
MAD, // 32bit Fused Multiply Add instruction
|
||||
CALL, // Function call based on a single integer
|
||||
UMUL, // 32bit unsigned multiplication
|
||||
DIV_INF, // Divide with infinity returned on zero divisor
|
||||
RET_FLAG,
|
||||
BRANCH_COND,
|
||||
// End AMDIL ISD Opcodes
|
||||
BITALIGN,
|
||||
FRACT,
|
||||
FMAX,
|
||||
SMAX,
|
||||
UMAX,
|
||||
FMIN,
|
||||
SMIN,
|
||||
UMIN,
|
||||
URECIP,
|
||||
INTERP,
|
||||
INTERP_P0,
|
||||
LAST_AMDGPU_ISD_NUMBER
|
||||
};
|
||||
|
||||
|
||||
} // End namespace AMDGPUISD
|
||||
|
||||
namespace SIISD {
|
||||
|
||||
enum {
|
||||
SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
|
||||
VCC_AND,
|
||||
VCC_BITCAST
|
||||
};
|
||||
|
||||
} // End namespace SIISD
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPUISELLOWERING_H
|
||||
|
|
@ -1,258 +0,0 @@
|
|||
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the implementation of the TargetInstrInfo class that is
|
||||
// common to all AMD GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_CTOR
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
|
||||
: AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
|
||||
|
||||
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
|
||||
return RI;
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
LiveVariables *LV) const {
|
||||
// TODO: Implement this function
|
||||
return NULL;
|
||||
}
|
||||
bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
||||
MachineBasicBlock &MBB) const {
|
||||
while (iter != MBB.end()) {
|
||||
switch (iter->getOpcode()) {
|
||||
default:
|
||||
break;
|
||||
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
||||
case AMDGPU::BRANCH:
|
||||
return true;
|
||||
};
|
||||
++iter;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
|
||||
MachineBasicBlock::iterator tmp = MBB->end();
|
||||
if (!MBB->size()) {
|
||||
return MBB->end();
|
||||
}
|
||||
while (--tmp) {
|
||||
if (tmp->getOpcode() == AMDGPU::ENDLOOP
|
||||
|| tmp->getOpcode() == AMDGPU::ENDIF
|
||||
|| tmp->getOpcode() == AMDGPU::ELSE) {
|
||||
if (tmp == MBB->begin()) {
|
||||
return tmp;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
return ++tmp;
|
||||
}
|
||||
}
|
||||
return MBB->end();
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned SrcReg, bool isKill,
|
||||
int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
assert(!"Not Implemented");
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned DestReg, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
assert(!"Not Implemented");
|
||||
}
|
||||
|
||||
MachineInstr *
|
||||
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
int FrameIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
MachineInstr*
|
||||
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
MachineInstr *LoadMI) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
bool
|
||||
AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops) const
|
||||
{
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
bool
|
||||
AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
||||
unsigned Reg, bool UnfoldLoad,
|
||||
bool UnfoldStore,
|
||||
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
||||
SmallVectorImpl<SDNode*> &NewNodes) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned
|
||||
AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
|
||||
bool UnfoldLoad, bool UnfoldStore,
|
||||
unsigned *LoadRegIndex) const {
|
||||
// TODO: Implement this function
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||
int64_t Offset1, int64_t Offset2,
|
||||
unsigned NumLoads) const {
|
||||
assert(Offset2 > Offset1
|
||||
&& "Second offset should be larger than first offset!");
|
||||
// If we have less than 16 loads in a row, and the offsets are within 16,
|
||||
// then schedule together.
|
||||
// TODO: Make the loads schedule near if it fits in a cacheline
|
||||
return (NumLoads < 16 && (Offset2 - Offset1) < 16);
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
|
||||
const {
|
||||
// TODO: Implement this function
|
||||
return true;
|
||||
}
|
||||
void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const {
|
||||
// TODO: Implement this function
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
bool
|
||||
AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2)
|
||||
const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const {
|
||||
// TODO: Implement this function
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
|
||||
// TODO: Implement this function
|
||||
return MI->getDesc().isPredicable();
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
||||
// TODO: Implement this function
|
||||
return true;
|
||||
}
|
||||
|
||||
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||
DebugLoc DL) const
|
||||
{
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const AMDGPURegisterInfo & RI = getRegisterInfo();
|
||||
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
|
||||
MachineOperand &MO = MI.getOperand(i);
|
||||
// Convert dst regclass to one that is supported by the ISA
|
||||
if (MO.isReg() && MO.isDef()) {
|
||||
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
|
||||
const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
|
||||
const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
|
||||
|
||||
assert(newRegClass);
|
||||
|
||||
MRI.setRegClass(MO.getReg(), newRegClass);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the definition of a TargetInstrInfo class that is common
|
||||
// to all AMD GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUINSTRUCTIONINFO_H_
|
||||
#define AMDGPUINSTRUCTIONINFO_H_
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
#define OPCODE_IS_ZERO_INT 0x00000042
|
||||
#define OPCODE_IS_NOT_ZERO_INT 0x00000045
|
||||
#define OPCODE_IS_ZERO 0x00000020
|
||||
#define OPCODE_IS_NOT_ZERO 0x00000023
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
class MachineInstrBuilder;
|
||||
|
||||
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
|
||||
private:
|
||||
const AMDGPURegisterInfo RI;
|
||||
TargetMachine &TM;
|
||||
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
||||
MachineBasicBlock &MBB) const;
|
||||
public:
|
||||
explicit AMDGPUInstrInfo(TargetMachine &tm);
|
||||
|
||||
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
|
||||
|
||||
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DstReg, unsigned &SubIdx) const;
|
||||
|
||||
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
||||
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const;
|
||||
bool hasLoadFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const;
|
||||
unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
||||
unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
||||
int &FrameIndex) const;
|
||||
bool hasStoreFromStackSlot(const MachineInstr *MI,
|
||||
const MachineMemOperand *&MMO,
|
||||
int &FrameIndex) const;
|
||||
|
||||
MachineInstr *
|
||||
convertToThreeAddress(MachineFunction::iterator &MFI,
|
||||
MachineBasicBlock::iterator &MBBI,
|
||||
LiveVariables *LV) const;
|
||||
|
||||
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const = 0;
|
||||
|
||||
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned SrcReg, bool isKill, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
unsigned DestReg, int FrameIndex,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
protected:
|
||||
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
int FrameIndex) const;
|
||||
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
||||
MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops,
|
||||
MachineInstr *LoadMI) const;
|
||||
public:
|
||||
bool canFoldMemoryOperand(const MachineInstr *MI,
|
||||
const SmallVectorImpl<unsigned> &Ops) const;
|
||||
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
||||
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
|
||||
SmallVectorImpl<MachineInstr *> &NewMIs) const;
|
||||
bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
||||
SmallVectorImpl<SDNode *> &NewNodes) const;
|
||||
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
|
||||
bool UnfoldLoad, bool UnfoldStore,
|
||||
unsigned *LoadRegIndex = 0) const;
|
||||
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
||||
int64_t Offset1, int64_t Offset2,
|
||||
unsigned NumLoads) const;
|
||||
|
||||
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
|
||||
void insertNoop(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const;
|
||||
bool isPredicated(const MachineInstr *MI) const;
|
||||
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2) const;
|
||||
bool DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const;
|
||||
bool isPredicable(MachineInstr *MI) const;
|
||||
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
|
||||
|
||||
// Helper functions that check the opcode for status information
|
||||
bool isLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isSExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isZExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
|
||||
bool isStoreInst(llvm::MachineInstr *MI) const;
|
||||
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
|
||||
|
||||
virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const = 0;
|
||||
virtual unsigned getIEQOpcode() const = 0;
|
||||
virtual bool isMov(unsigned opcode) const = 0;
|
||||
|
||||
/// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
|
||||
/// MachineInstr
|
||||
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
|
||||
DebugLoc DL) const;
|
||||
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // AMDGPUINSTRINFO_H_
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains DAG node defintions for the AMDGPU target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPU DAG Nodes
|
||||
//
|
||||
|
||||
// out = ((a << 32) | b) >> c)
|
||||
//
|
||||
// Can be used to optimize rtol:
|
||||
// rotl(a, b) = bitalign(a, a, 32 - b)
|
||||
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
|
||||
|
||||
// out = a - floor(a)
|
||||
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
|
||||
|
||||
// out = max(a, b) a and b are floats
|
||||
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = max(a, b) a and b are signed ints
|
||||
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = max(a, b) a and b are unsigned ints
|
||||
def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = min(a, b) a and b are floats
|
||||
def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = min(a, b) a snd b are signed ints
|
||||
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// out = min(a, b) a and b are unsigned ints
|
||||
def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// urecip - This operation is a helper for integer division, it returns the
|
||||
// result of 1 / a as a fractional unsigned integer.
|
||||
// out = (2^32 / a) + e
|
||||
// e is rounding error
|
||||
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
|
||||
|
||||
def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
|
||||
|
|
@ -1,183 +0,0 @@
|
|||
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains instruction defs that are common to all hw codegen
|
||||
// targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
|
||||
field bits<16> AMDILOp = 0;
|
||||
field bits<3> Gen = 0;
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
let OutOperandList = outs;
|
||||
let InOperandList = ins;
|
||||
let AsmString = asm;
|
||||
let Pattern = pattern;
|
||||
let Itinerary = NullALU;
|
||||
let TSFlags{42-40} = Gen;
|
||||
let TSFlags{63-48} = AMDILOp;
|
||||
}
|
||||
|
||||
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
|
||||
: AMDGPUInst<outs, ins, asm, pattern> {
|
||||
|
||||
field bits<32> Inst = 0xffffffff;
|
||||
|
||||
}
|
||||
|
||||
def COND_EQ : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
case ISD::SETOEQ: case ISD::SETUEQ:
|
||||
case ISD::SETEQ: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_NE : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
case ISD::SETONE: case ISD::SETUNE:
|
||||
case ISD::SETNE: return true;}}}]
|
||||
>;
|
||||
def COND_GT : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
case ISD::SETOGT: case ISD::SETUGT:
|
||||
case ISD::SETGT: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_GE : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
case ISD::SETOGE: case ISD::SETUGE:
|
||||
case ISD::SETGE: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_LT : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
case ISD::SETOLT: case ISD::SETULT:
|
||||
case ISD::SETLT: return true;}}}]
|
||||
>;
|
||||
|
||||
def COND_LE : PatLeaf <
|
||||
(cond),
|
||||
[{switch(N->get()){{default: return false;
|
||||
case ISD::SETOLE: case ISD::SETULE:
|
||||
case ISD::SETLE: return true;}}}]
|
||||
>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load/Store Pattern Fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
|
||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
|
||||
class Constants {
|
||||
int TWO_PI = 0x40c90fdb;
|
||||
int PI = 0x40490fdb;
|
||||
int TWO_PI_INV = 0x3e22f983;
|
||||
}
|
||||
def CONST : Constants;
|
||||
|
||||
def FP_ZERO : PatLeaf <
|
||||
(fpimm),
|
||||
[{return N->getValueAPF().isZero();}]
|
||||
>;
|
||||
|
||||
def FP_ONE : PatLeaf <
|
||||
(fpimm),
|
||||
[{return N->isExactlyValue(1.0);}]
|
||||
>;
|
||||
|
||||
let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
|
||||
|
||||
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
"CLAMP $dst, $src0",
|
||||
[(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
|
||||
>;
|
||||
|
||||
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
"FABS $dst, $src0",
|
||||
[(set rc:$dst, (fabs rc:$src0))]
|
||||
>;
|
||||
|
||||
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
|
||||
(outs rc:$dst),
|
||||
(ins rc:$src0),
|
||||
"FNEG $dst, $src0",
|
||||
[(set rc:$dst, (fneg rc:$src0))]
|
||||
>;
|
||||
|
||||
def SHADER_TYPE : AMDGPUShaderInst <
|
||||
(outs),
|
||||
(ins i32imm:$type),
|
||||
"SHADER_TYPE $type",
|
||||
[(int_AMDGPU_shader_type imm:$type)]
|
||||
>;
|
||||
|
||||
} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
|
||||
|
||||
/* Generic helper patterns for intrinsics */
|
||||
/* -------------------------------------- */
|
||||
|
||||
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
|
||||
RegisterClass rc> : Pat <
|
||||
(fpow rc:$src0, rc:$src1),
|
||||
(exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
|
||||
>;
|
||||
|
||||
/* Other helper patterns */
|
||||
/* --------------------- */
|
||||
|
||||
/* Extract element pattern */
|
||||
class Extract_Element <ValueType sub_type, ValueType vec_type,
|
||||
RegisterClass vec_class, int sub_idx,
|
||||
SubRegIndex sub_reg>: Pat<
|
||||
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
|
||||
(EXTRACT_SUBREG vec_class:$src, sub_reg)
|
||||
>;
|
||||
|
||||
/* Insert element pattern */
|
||||
class Insert_Element <ValueType elem_type, ValueType vec_type,
|
||||
RegisterClass elem_class, RegisterClass vec_class,
|
||||
int sub_idx, SubRegIndex sub_reg> : Pat <
|
||||
|
||||
(vec_type (vector_insert (vec_type vec_class:$vec),
|
||||
(elem_type elem_class:$elem), sub_idx)),
|
||||
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
|
||||
>;
|
||||
|
||||
// Vector Build pattern
|
||||
class Vector_Build <ValueType vecType, RegisterClass vectorClass,
|
||||
ValueType elemType, RegisterClass elemClass> : Pat <
|
||||
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
|
||||
(elemType elemClass:$z), (elemType elemClass:$w))),
|
||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
||||
(vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
|
||||
elemClass:$z, sel_z), elemClass:$w, sel_w)
|
||||
>;
|
||||
|
||||
// bitconvert pattern
|
||||
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
|
||||
(dt (bitconvert (st rc:$src0))),
|
||||
(dt rc:$src0)
|
||||
>;
|
||||
|
||||
include "R600Instructions.td"
|
||||
|
||||
include "SIInstrInfo.td"
|
||||
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines intrinsics that are used by all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
||||
|
||||
def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
|
||||
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
|
||||
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
|
||||
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
|
||||
def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
|
||||
}
|
||||
|
||||
let TargetPrefix = "TGSI", isTarget = 1 in {
|
||||
|
||||
def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
|
||||
}
|
||||
|
||||
include "SIIntrinsics.td"
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains code to lower AMDGPU MachineInstrs to their corresponding
|
||||
// MCInst.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
||||
#include "AMDGPUMCInstLower.h"
|
||||
#include "AMDGPUAsmPrinter.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUMCInstLower::AMDGPUMCInstLower() { }
|
||||
|
||||
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
|
||||
OutMI.setOpcode(MI->getOpcode());
|
||||
|
||||
for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
|
||||
MCOperand MCOp;
|
||||
switch (MO.getType()) {
|
||||
default:
|
||||
llvm_unreachable("unknown operand type");
|
||||
case MachineOperand::MO_FPImmediate: {
|
||||
const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
|
||||
assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
|
||||
"Only floating point immediates are supported at the moment.");
|
||||
MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
|
||||
break;
|
||||
}
|
||||
case MachineOperand::MO_Immediate:
|
||||
MCOp = MCOperand::CreateImm(MO.getImm());
|
||||
break;
|
||||
case MachineOperand::MO_Register:
|
||||
MCOp = MCOperand::CreateReg(MO.getReg());
|
||||
break;
|
||||
}
|
||||
OutMI.addOperand(MCOp);
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
AMDGPUMCInstLower MCInstLowering;
|
||||
|
||||
// Ignore placeholder instructions:
|
||||
if (MI->getOpcode() == AMDGPU::MASK_WRITE) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (MI->isBundle()) {
|
||||
const MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineBasicBlock::const_instr_iterator I = MI;
|
||||
++I;
|
||||
while (I != MBB->end() && I->isInsideBundle()) {
|
||||
MCInst MCBundleInst;
|
||||
const MachineInstr *BundledInst = I;
|
||||
MCInstLowering.lower(BundledInst, MCBundleInst);
|
||||
OutStreamer.EmitInstruction(MCBundleInst);
|
||||
++I;
|
||||
}
|
||||
} else {
|
||||
MCInst TmpInst;
|
||||
MCInstLowering.lower(MI, TmpInst);
|
||||
OutStreamer.EmitInstruction(TmpInst);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPU_MCINSTLOWER_H
|
||||
#define AMDGPU_MCINSTLOWER_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCInst;
|
||||
class MachineInstr;
|
||||
|
||||
class AMDGPUMCInstLower {
|
||||
|
||||
public:
|
||||
AMDGPUMCInstLower();
|
||||
|
||||
/// lower - Lower a MachineInstr to an MCInst
|
||||
void lower(const MachineInstr *MI, MCInst &OutMI) const;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif //AMDGPU_MCINSTLOWER_H
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Parent TargetRegisterInfo class common to all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
|
||||
const TargetInstrInfo &tii)
|
||||
: AMDGPUGenRegisterInfo(0),
|
||||
TM(tm),
|
||||
TII(tii)
|
||||
{ }
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
|
||||
// they are not supported at this time.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
|
||||
|
||||
const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
|
||||
const {
|
||||
return &CalleeSavedReg;
|
||||
}
|
||||
|
||||
void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
int SPAdj,
|
||||
RegScavenger *RS) const {
|
||||
assert(!"Subroutines not supported yet");
|
||||
}
|
||||
|
||||
unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
||||
assert(!"Subroutines not supported yet");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the TargetRegisterInfo interface that is implemented
|
||||
// by all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUREGISTERINFO_H_
|
||||
#define AMDGPUREGISTERINFO_H_
|
||||
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
|
||||
#define GET_REGINFO_HEADER
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class TargetInstrInfo;
|
||||
|
||||
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo
|
||||
{
|
||||
TargetMachine &TM;
|
||||
const TargetInstrInfo &TII;
|
||||
static const uint16_t CalleeSavedReg;
|
||||
|
||||
AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const {
|
||||
assert(!"Unimplemented"); return BitVector();
|
||||
}
|
||||
|
||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
||||
/// ISA reg class that is equivalent to the given AMDIL reg class.
|
||||
virtual const TargetRegisterClass * getISARegClass(
|
||||
const TargetRegisterClass * rc) const {
|
||||
assert(!"Unimplemented"); return NULL;
|
||||
}
|
||||
|
||||
virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
|
||||
assert(!"Unimplemented"); return NULL;
|
||||
}
|
||||
|
||||
const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
|
||||
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
|
||||
RegScavenger *RS) const;
|
||||
unsigned getFrameRegister(const MachineFunction &MF) const;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDIDSAREGISTERINFO_H_
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Tablegen register definitions common to all hw codegen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Namespace = "AMDGPU" in {
|
||||
def sel_x : SubRegIndex;
|
||||
def sel_y : SubRegIndex;
|
||||
def sel_z : SubRegIndex;
|
||||
def sel_w : SubRegIndex;
|
||||
}
|
||||
|
||||
include "R600RegisterInfo.td"
|
||||
include "SIRegisterInfo.td"
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the AMDGPU specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
||||
AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) {
|
||||
InstrItins = getInstrItineraryForCPU(CPU);
|
||||
|
||||
memset(CapsOverride, 0, sizeof(*CapsOverride)
|
||||
* AMDGPUDeviceInfo::MaxNumberCapabilities);
|
||||
// Default card
|
||||
StringRef GPU = CPU;
|
||||
mIs64bit = false;
|
||||
mDefaultSize[0] = 64;
|
||||
mDefaultSize[1] = 1;
|
||||
mDefaultSize[2] = 1;
|
||||
ParseSubtargetFeatures(GPU, FS);
|
||||
mDevName = GPU;
|
||||
mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
|
||||
}
|
||||
|
||||
AMDGPUSubtarget::~AMDGPUSubtarget()
|
||||
{
|
||||
delete mDevice;
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const
|
||||
{
|
||||
assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
|
||||
"Caps index is out of bounds!");
|
||||
return CapsOverride[caps];
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::is64bit() const
|
||||
{
|
||||
return mIs64bit;
|
||||
}
|
||||
bool
|
||||
AMDGPUSubtarget::isTargetELF() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
size_t
|
||||
AMDGPUSubtarget::getDefaultSize(uint32_t dim) const
|
||||
{
|
||||
if (dim > 3) {
|
||||
return 1;
|
||||
} else {
|
||||
return mDefaultSize[dim];
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUSubtarget::getDataLayout() const
|
||||
{
|
||||
if (!mDevice) {
|
||||
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
|
||||
}
|
||||
return mDevice->getDataLayout();
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUSubtarget::getDeviceName() const
|
||||
{
|
||||
return mDevName;
|
||||
}
|
||||
const AMDGPUDevice *
|
||||
AMDGPUSubtarget::device() const
|
||||
{
|
||||
return mDevice;
|
||||
}
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the AMDGPU specific subclass of TargetSubtarget.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _AMDGPUSUBTARGET_H_
|
||||
#define _AMDGPUSUBTARGET_H_
|
||||
#include "AMDILDevice.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
|
||||
#define GET_SUBTARGETINFO_HEADER
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
#define MAX_CB_SIZE (1 << 16)
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo
|
||||
{
|
||||
private:
|
||||
bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
|
||||
const AMDGPUDevice *mDevice;
|
||||
size_t mDefaultSize[3];
|
||||
size_t mMinimumSize[3];
|
||||
std::string mDevName;
|
||||
bool mIs64bit;
|
||||
bool mIs32on64bit;
|
||||
bool mDumpCode;
|
||||
bool mR600ALUInst;
|
||||
|
||||
InstrItineraryData InstrItins;
|
||||
|
||||
public:
|
||||
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
|
||||
virtual ~AMDGPUSubtarget();
|
||||
|
||||
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
|
||||
virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
|
||||
|
||||
bool isOverride(AMDGPUDeviceInfo::Caps) const;
|
||||
bool is64bit() const;
|
||||
|
||||
// Helper functions to simplify if statements
|
||||
bool isTargetELF() const;
|
||||
const AMDGPUDevice* device() const;
|
||||
std::string getDataLayout() const;
|
||||
std::string getDeviceName() const;
|
||||
virtual size_t getDefaultSize(uint32_t dim) const;
|
||||
bool dumpCode() const { return mDumpCode; }
|
||||
bool r600ALUEncoding() const { return mR600ALUInst; }
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPUSUBTARGET_H_
|
||||
|
|
@ -1,143 +0,0 @@
|
|||
//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The AMDGPU target machine contains all of the hardware specific information
|
||||
// needed to emit code for R600 and SI GPUs.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDGPU.h"
|
||||
#include "R600ISelLowering.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "SIISelLowering.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/Analysis/Verifier.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_os_ostream.h"
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include <llvm/CodeGen/Passes.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUTarget() {
|
||||
// Register the target
|
||||
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
|
||||
}
|
||||
|
||||
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
TargetOptions Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OptLevel
|
||||
)
|
||||
:
|
||||
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
|
||||
Subtarget(TT, CPU, FS),
|
||||
DataLayout(Subtarget.getDataLayout()),
|
||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
||||
Subtarget.device()->getStackAlignment(), 0),
|
||||
IntrinsicInfo(this),
|
||||
InstrItins(&Subtarget.getInstrItineraryData()),
|
||||
mDump(false)
|
||||
|
||||
{
|
||||
// TLInfo uses InstrInfo so it must be initialized after.
|
||||
if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
InstrInfo = new R600InstrInfo(*this);
|
||||
TLInfo = new R600TargetLowering(*this);
|
||||
} else {
|
||||
InstrInfo = new SIInstrInfo(*this);
|
||||
TLInfo = new SITargetLowering(*this);
|
||||
}
|
||||
}
|
||||
|
||||
AMDGPUTargetMachine::~AMDGPUTargetMachine()
|
||||
{
|
||||
}
|
||||
|
||||
namespace {
|
||||
class AMDGPUPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
|
||||
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
|
||||
return getTM<AMDGPUTargetMachine>();
|
||||
}
|
||||
|
||||
virtual bool addPreISel();
|
||||
virtual bool addInstSelector();
|
||||
virtual bool addPreRegAlloc();
|
||||
virtual bool addPostRegAlloc();
|
||||
virtual bool addPreSched2();
|
||||
virtual bool addPreEmitPass();
|
||||
};
|
||||
} // End of anonymous namespace
|
||||
|
||||
TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
return new AMDGPUPassConfig(this, PM);
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUPassConfig::addPreISel()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addInstSelector() {
|
||||
PM->add(createAMDGPUPeepholeOpt(*TM));
|
||||
PM->add(createAMDGPUISelDag(getAMDGPUTargetMachine()));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreRegAlloc() {
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
|
||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
||||
PM->add(createSIAssignInterpRegsPass(*TM));
|
||||
}
|
||||
PM->add(createAMDGPUConvertToISAPass(*TM));
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPostRegAlloc() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreSched2() {
|
||||
|
||||
addPass(IfConverterID);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUPassConfig::addPreEmitPass() {
|
||||
PM->add(createAMDGPUCFGPreparationPass(*TM));
|
||||
PM->add(createAMDGPUCFGStructurizerPass(*TM));
|
||||
|
||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
||||
PM->add(createR600ExpandSpecialInstrsPass(*TM));
|
||||
addPass(FinalizeMachineBundlesID);
|
||||
} else {
|
||||
PM->add(createSILowerLiteralConstantsPass(*TM));
|
||||
// piglit is unreliable (VM protection faults, GPU lockups) with this pass:
|
||||
//PM->add(createSILowerFlowControlPass(*TM));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The AMDGPU TargetMachine interface definition for hw codgen targets.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPU_TARGET_MACHINE_H
|
||||
#define AMDGPU_TARGET_MACHINE_H
|
||||
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILFrameLowering.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "R600ISelLowering.h"
|
||||
#include "llvm/ADT/OwningPtr.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
|
||||
|
||||
class AMDGPUTargetMachine : public LLVMTargetMachine {
|
||||
|
||||
AMDGPUSubtarget Subtarget;
|
||||
const TargetData DataLayout;
|
||||
AMDGPUFrameLowering FrameLowering;
|
||||
AMDGPUIntrinsicInfo IntrinsicInfo;
|
||||
const AMDGPUInstrInfo * InstrInfo;
|
||||
AMDGPUTargetLowering * TLInfo;
|
||||
const InstrItineraryData* InstrItins;
|
||||
bool mDump;
|
||||
|
||||
public:
|
||||
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
|
||||
StringRef CPU,
|
||||
TargetOptions Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL);
|
||||
~AMDGPUTargetMachine();
|
||||
virtual const AMDGPUFrameLowering* getFrameLowering() const {
|
||||
return &FrameLowering;
|
||||
}
|
||||
virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
|
||||
return &IntrinsicInfo;
|
||||
}
|
||||
virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
|
||||
virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
|
||||
virtual const AMDGPURegisterInfo *getRegisterInfo() const {
|
||||
return &InstrInfo->getRegisterInfo();
|
||||
}
|
||||
virtual AMDGPUTargetLowering * getTargetLowering() const {
|
||||
return TLInfo;
|
||||
}
|
||||
virtual const InstrItineraryData* getInstrItineraryData() const {
|
||||
return InstrItins;
|
||||
}
|
||||
virtual const TargetData* getTargetData() const { return &DataLayout; }
|
||||
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPU_TARGET_MACHINE_H
|
||||
|
|
@ -1,106 +0,0 @@
|
|||
//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the entry points for global functions defined in the LLVM
|
||||
// AMDGPU back-end.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDIL_H_
|
||||
#define AMDIL_H_
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
#define ARENA_SEGMENT_RESERVED_UAVS 12
|
||||
#define DEFAULT_ARENA_UAV_ID 8
|
||||
#define DEFAULT_RAW_UAV_ID 7
|
||||
#define GLOBAL_RETURN_RAW_UAV_ID 11
|
||||
#define HW_MAX_NUM_CB 8
|
||||
#define MAX_NUM_UNIQUE_UAVS 8
|
||||
#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
|
||||
#define OPENCL_MAX_READ_IMAGES 128
|
||||
#define OPENCL_MAX_WRITE_IMAGES 8
|
||||
#define OPENCL_MAX_SAMPLERS 16
|
||||
|
||||
// The next two values can never be zero, as zero is the ID that is
|
||||
// used to assert against.
|
||||
#define DEFAULT_LDS_ID 1
|
||||
#define DEFAULT_GDS_ID 1
|
||||
#define DEFAULT_SCRATCH_ID 1
|
||||
#define DEFAULT_VEC_SLOTS 8
|
||||
|
||||
#define OCL_DEVICE_RV710 0x0001
|
||||
#define OCL_DEVICE_RV730 0x0002
|
||||
#define OCL_DEVICE_RV770 0x0004
|
||||
#define OCL_DEVICE_CEDAR 0x0008
|
||||
#define OCL_DEVICE_REDWOOD 0x0010
|
||||
#define OCL_DEVICE_JUNIPER 0x0020
|
||||
#define OCL_DEVICE_CYPRESS 0x0040
|
||||
#define OCL_DEVICE_CAICOS 0x0080
|
||||
#define OCL_DEVICE_TURKS 0x0100
|
||||
#define OCL_DEVICE_BARTS 0x0200
|
||||
#define OCL_DEVICE_CAYMAN 0x0400
|
||||
#define OCL_DEVICE_ALL 0x3FFF
|
||||
|
||||
/// The number of function ID's that are reserved for
|
||||
/// internal compiler usage.
|
||||
const unsigned int RESERVED_FUNCS = 1024;
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUInstrPrinter;
|
||||
class FunctionPass;
|
||||
class MCAsmInfo;
|
||||
class raw_ostream;
|
||||
class Target;
|
||||
class TargetMachine;
|
||||
|
||||
/// Instruction selection passes.
|
||||
FunctionPass*
|
||||
createAMDGPUISelDag(TargetMachine &TM);
|
||||
FunctionPass*
|
||||
createAMDGPUPeepholeOpt(TargetMachine &TM);
|
||||
|
||||
/// Pre emit passes.
|
||||
FunctionPass*
|
||||
createAMDGPUCFGPreparationPass(TargetMachine &TM);
|
||||
FunctionPass*
|
||||
createAMDGPUCFGStructurizerPass(TargetMachine &TM);
|
||||
|
||||
extern Target TheAMDGPUTarget;
|
||||
} // end namespace llvm;
|
||||
|
||||
/// Include device information enumerations
|
||||
#include "AMDILDeviceInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
/// OpenCL uses address spaces to differentiate between
|
||||
/// various memory regions on the hardware. On the CPU
|
||||
/// all of the address spaces point to the same memory,
|
||||
/// however on the GPU, each address space points to
|
||||
/// a seperate piece of memory that is unique from other
|
||||
/// memory locations.
|
||||
namespace AMDGPUAS {
|
||||
enum AddressSpaces {
|
||||
PRIVATE_ADDRESS = 0, // Address space for private memory.
|
||||
GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
|
||||
CONSTANT_ADDRESS = 2, // Address space for constant memory.
|
||||
LOCAL_ADDRESS = 3, // Address space for local memory.
|
||||
REGION_ADDRESS = 4, // Address space for region memory.
|
||||
ADDRESS_NONE = 5, // Address space for unknown memory.
|
||||
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
|
||||
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
|
||||
USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
|
||||
LAST_ADDRESS = 9
|
||||
};
|
||||
|
||||
} // namespace AMDGPUAS
|
||||
|
||||
} // end namespace llvm
|
||||
#endif // AMDIL_H_
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDIL7XXDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST)
|
||||
{
|
||||
setCaps();
|
||||
std::string name = mSTM->getDeviceName();
|
||||
if (name == "rv710") {
|
||||
mDeviceFlag = OCL_DEVICE_RV710;
|
||||
} else if (name == "rv730") {
|
||||
mDeviceFlag = OCL_DEVICE_RV730;
|
||||
} else {
|
||||
mDeviceFlag = OCL_DEVICE_RV770;
|
||||
}
|
||||
}
|
||||
|
||||
AMDGPU7XXDevice::~AMDGPU7XXDevice()
|
||||
{
|
||||
}
|
||||
|
||||
void AMDGPU7XXDevice::setCaps()
|
||||
{
|
||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
}
|
||||
|
||||
size_t AMDGPU7XXDevice::getMaxLDSSize() const
|
||||
{
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_700;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPU7XXDevice::getWavefrontSize() const
|
||||
{
|
||||
return AMDGPUDevice::HalfWavefrontSize;
|
||||
}
|
||||
|
||||
uint32_t AMDGPU7XXDevice::getGeneration() const
|
||||
{
|
||||
return AMDGPUDeviceInfo::HD4XXX;
|
||||
}
|
||||
|
||||
uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const
|
||||
{
|
||||
switch (DeviceID) {
|
||||
default:
|
||||
assert(0 && "ID type passed in is unknown!");
|
||||
break;
|
||||
case GLOBAL_ID:
|
||||
case CONSTANT_ID:
|
||||
case RAW_UAV_ID:
|
||||
case ARENA_UAV_ID:
|
||||
break;
|
||||
case LDS_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return DEFAULT_LDS_ID;
|
||||
}
|
||||
break;
|
||||
case SCRATCH_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
|
||||
return DEFAULT_SCRATCH_ID;
|
||||
}
|
||||
break;
|
||||
case GDS_ID:
|
||||
assert(0 && "GDS UAV ID is not supported on this chip");
|
||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
||||
return DEFAULT_GDS_ID;
|
||||
}
|
||||
break;
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST)
|
||||
{
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPU770Device::~AMDGPU770Device()
|
||||
{
|
||||
}
|
||||
|
||||
void AMDGPU770Device::setCaps()
|
||||
{
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
||||
}
|
||||
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
|
||||
mHWBits.reset(AMDGPUDeviceInfo::LongOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::LongOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
}
|
||||
|
||||
size_t AMDGPU770Device::getWavefrontSize() const
|
||||
{
|
||||
return AMDGPUDevice::WavefrontSize;
|
||||
}
|
||||
|
||||
AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST)
|
||||
{
|
||||
}
|
||||
|
||||
AMDGPU710Device::~AMDGPU710Device()
|
||||
{
|
||||
}
|
||||
|
||||
size_t AMDGPU710Device::getWavefrontSize() const
|
||||
{
|
||||
return AMDGPUDevice::QuarterWavefrontSize;
|
||||
}
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDIL7XXDEVICEIMPL_H_
|
||||
#define _AMDIL7XXDEVICEIMPL_H_
|
||||
#include "AMDILDevice.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// 7XX generation of devices and their respective sub classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX
|
||||
// devices are derived from this class. The AMDGPU7XX device will only
|
||||
// support the minimal features that are required to be considered OpenCL 1.0
|
||||
// compliant and nothing more.
|
||||
class AMDGPU7XXDevice : public AMDGPUDevice {
|
||||
public:
|
||||
AMDGPU7XXDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPU7XXDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual size_t getWavefrontSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual uint32_t getResourceID(uint32_t DeviceID) const;
|
||||
virtual uint32_t getMaxNumUAVs() const;
|
||||
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
}; // AMDGPU7XXDevice
|
||||
|
||||
// The AMDGPU770Device class represents the RV770 chip and it's
|
||||
// derivative cards. The difference between this device and the base
|
||||
// class is this device device adds support for double precision
|
||||
// and has a larger wavefront size.
|
||||
class AMDGPU770Device : public AMDGPU7XXDevice {
|
||||
public:
|
||||
AMDGPU770Device(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPU770Device();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDGPU770Device
|
||||
|
||||
// The AMDGPU710Device class derives from the 7XX base class, but this
|
||||
// class is a smaller derivative, so we need to overload some of the
|
||||
// functions in order to correctly specify this information.
|
||||
class AMDGPU710Device : public AMDGPU7XXDevice {
|
||||
public:
|
||||
AMDGPU710Device(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPU710Device();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
}; // AMDGPU710Device
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDILDEVICEIMPL_H_
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target-independent interfaces which we are implementing
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
// Dummy Instruction itineraries for pseudo instructions
|
||||
def ALU_NULL : FuncUnit;
|
||||
def NullALU : InstrItinClass;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDIL Subtarget features.
|
||||
//===----------------------------------------------------------------------===//
|
||||
def FeatureFP64 : SubtargetFeature<"fp64",
|
||||
"CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
|
||||
"true",
|
||||
"Enable 64bit double precision operations">;
|
||||
def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
|
||||
"CapsOverride[AMDGPUDeviceInfo::ByteStores]",
|
||||
"true",
|
||||
"Enable byte addressable stores">;
|
||||
def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
|
||||
"CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
|
||||
"true",
|
||||
"Enable duplicate barrier detection(HD5XXX or later).">;
|
||||
def FeatureImages : SubtargetFeature<"images",
|
||||
"CapsOverride[AMDGPUDeviceInfo::Images]",
|
||||
"true",
|
||||
"Enable image functions">;
|
||||
def FeatureMultiUAV : SubtargetFeature<"multi_uav",
|
||||
"CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
|
||||
"true",
|
||||
"Generate multiple UAV code(HD5XXX family or later)">;
|
||||
def FeatureMacroDB : SubtargetFeature<"macrodb",
|
||||
"CapsOverride[AMDGPUDeviceInfo::MacroDB]",
|
||||
"true",
|
||||
"Use internal macrodb, instead of macrodb in driver">;
|
||||
def FeatureNoAlias : SubtargetFeature<"noalias",
|
||||
"CapsOverride[AMDGPUDeviceInfo::NoAlias]",
|
||||
"true",
|
||||
"assert that all kernel argument pointers are not aliased">;
|
||||
def FeatureNoInline : SubtargetFeature<"no-inline",
|
||||
"CapsOverride[AMDGPUDeviceInfo::NoInline]",
|
||||
"true",
|
||||
"specify whether to not inline functions">;
|
||||
|
||||
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
|
||||
"mIs64bit",
|
||||
"false",
|
||||
"Specify if 64bit addressing should be used.">;
|
||||
|
||||
def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
|
||||
"mIs32on64bit",
|
||||
"false",
|
||||
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
|
||||
def FeatureDebug : SubtargetFeature<"debug",
|
||||
"CapsOverride[AMDGPUDeviceInfo::Debug]",
|
||||
"true",
|
||||
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
|
||||
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
||||
"mDumpCode",
|
||||
"true",
|
||||
"Dump MachineInstrs in the CodeEmitter">;
|
||||
|
||||
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
|
||||
"mR600ALUInst",
|
||||
"false",
|
||||
"Older version of ALU instructions encoding.">;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File, Calling Conv, Instruction Descriptions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
include "AMDILRegisterInfo.td"
|
||||
include "AMDILInstrInfo.td"
|
||||
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,137 +0,0 @@
|
|||
//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
// Default implementation for all of the classes.
|
||||
AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST)
|
||||
{
|
||||
mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
|
||||
mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
|
||||
setCaps();
|
||||
mDeviceFlag = OCL_DEVICE_ALL;
|
||||
}
|
||||
|
||||
AMDGPUDevice::~AMDGPUDevice()
|
||||
{
|
||||
mHWBits.clear();
|
||||
mSWBits.clear();
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxGDSSize() const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDGPUDevice::getDeviceFlag() const
|
||||
{
|
||||
return mDeviceFlag;
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxNumCBs() const
|
||||
{
|
||||
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
|
||||
return HW_MAX_NUM_CB;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxCBSize() const
|
||||
{
|
||||
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
|
||||
return MAX_CB_SIZE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPUDevice::getMaxScratchSize() const
|
||||
{
|
||||
return 65536;
|
||||
}
|
||||
|
||||
uint32_t AMDGPUDevice::getStackAlignment() const
|
||||
{
|
||||
return 16;
|
||||
}
|
||||
|
||||
void AMDGPUDevice::setCaps()
|
||||
{
|
||||
mSWBits.set(AMDGPUDeviceInfo::HalfOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::ByteOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::ShortOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::NoInline);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::MacroDB);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
|
||||
} else {
|
||||
mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
|
||||
} else {
|
||||
mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
|
||||
}
|
||||
mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::LongOps);
|
||||
}
|
||||
|
||||
AMDGPUDeviceInfo::ExecutionMode
|
||||
AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const
|
||||
{
|
||||
if (mHWBits[Caps]) {
|
||||
assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
|
||||
return AMDGPUDeviceInfo::Hardware;
|
||||
}
|
||||
|
||||
if (mSWBits[Caps]) {
|
||||
assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
|
||||
return AMDGPUDeviceInfo::Software;
|
||||
}
|
||||
|
||||
return AMDGPUDeviceInfo::Unsupported;
|
||||
|
||||
}
|
||||
|
||||
bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const
|
||||
{
|
||||
return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
|
||||
}
|
||||
|
||||
bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const
|
||||
{
|
||||
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
|
||||
}
|
||||
|
||||
bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const
|
||||
{
|
||||
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUDevice::getDataLayout() const
|
||||
{
|
||||
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
"-n8:16:32:64");
|
||||
}
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILDEVICEIMPL_H_
|
||||
#define _AMDILDEVICEIMPL_H_
|
||||
#include "AMDIL.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
class MCStreamer;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Interface for data that is specific to a single device
|
||||
//===----------------------------------------------------------------------===//
|
||||
class AMDGPUDevice {
|
||||
public:
|
||||
AMDGPUDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUDevice();
|
||||
|
||||
// Enum values for the various memory types.
|
||||
enum {
|
||||
RAW_UAV_ID = 0,
|
||||
ARENA_UAV_ID = 1,
|
||||
LDS_ID = 2,
|
||||
GDS_ID = 3,
|
||||
SCRATCH_ID = 4,
|
||||
CONSTANT_ID = 5,
|
||||
GLOBAL_ID = 6,
|
||||
MAX_IDS = 7
|
||||
} IO_TYPE_IDS;
|
||||
|
||||
// Returns the max LDS size that the hardware supports. Size is in
|
||||
// bytes.
|
||||
virtual size_t getMaxLDSSize() const = 0;
|
||||
|
||||
// Returns the max GDS size that the hardware supports if the GDS is
|
||||
// supported by the hardware. Size is in bytes.
|
||||
virtual size_t getMaxGDSSize() const;
|
||||
|
||||
// Returns the max number of hardware constant address spaces that
|
||||
// are supported by this device.
|
||||
virtual size_t getMaxNumCBs() const;
|
||||
|
||||
// Returns the max number of bytes a single hardware constant buffer
|
||||
// can support. Size is in bytes.
|
||||
virtual size_t getMaxCBSize() const;
|
||||
|
||||
// Returns the max number of bytes allowed by the hardware scratch
|
||||
// buffer. Size is in bytes.
|
||||
virtual size_t getMaxScratchSize() const;
|
||||
|
||||
// Get the flag that corresponds to the device.
|
||||
virtual uint32_t getDeviceFlag() const;
|
||||
|
||||
// Returns the number of work-items that exist in a single hardware
|
||||
// wavefront.
|
||||
virtual size_t getWavefrontSize() const = 0;
|
||||
|
||||
// Get the generational name of this specific device.
|
||||
virtual uint32_t getGeneration() const = 0;
|
||||
|
||||
// Get the stack alignment of this specific device.
|
||||
virtual uint32_t getStackAlignment() const;
|
||||
|
||||
// Get the resource ID for this specific device.
|
||||
virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
|
||||
|
||||
// Get the max number of UAV's for this device.
|
||||
virtual uint32_t getMaxNumUAVs() const = 0;
|
||||
|
||||
// API utilizing more detailed capabilities of each family of
|
||||
// cards. If a capability is supported, then either usesHardware or
|
||||
// usesSoftware returned true. If usesHardware returned true, then
|
||||
// usesSoftware must return false for the same capability. Hardware
|
||||
// execution means that the feature is done natively by the hardware
|
||||
// and is not emulated by the softare. Software execution means
|
||||
// that the feature could be done in the hardware, but there is
|
||||
// software that emulates it with possibly using the hardware for
|
||||
// support since the hardware does not fully comply with OpenCL
|
||||
// specs.
|
||||
bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
|
||||
bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
|
||||
bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
|
||||
virtual std::string getDataLayout() const;
|
||||
static const unsigned int MAX_LDS_SIZE_700 = 16384;
|
||||
static const unsigned int MAX_LDS_SIZE_800 = 32768;
|
||||
static const unsigned int WavefrontSize = 64;
|
||||
static const unsigned int HalfWavefrontSize = 32;
|
||||
static const unsigned int QuarterWavefrontSize = 16;
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
llvm::BitVector mHWBits;
|
||||
llvm::BitVector mSWBits;
|
||||
AMDGPUSubtarget *mSTM;
|
||||
uint32_t mDeviceFlag;
|
||||
private:
|
||||
AMDGPUDeviceInfo::ExecutionMode
|
||||
getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
|
||||
}; // AMDILDevice
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDILDEVICEIMPL_H_
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Function that creates DeviceInfo from a device name and other information.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
namespace llvm {
|
||||
namespace AMDGPUDeviceInfo {
|
||||
AMDGPUDevice*
|
||||
getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr,
|
||||
bool is64bit, bool is64on32bit)
|
||||
{
|
||||
if (deviceName.c_str()[2] == '7') {
|
||||
switch (deviceName.c_str()[3]) {
|
||||
case '1':
|
||||
return new AMDGPU710Device(ptr);
|
||||
case '7':
|
||||
return new AMDGPU770Device(ptr);
|
||||
default:
|
||||
return new AMDGPU7XXDevice(ptr);
|
||||
};
|
||||
} else if (deviceName == "cypress") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUCypressDevice(ptr);
|
||||
} else if (deviceName == "juniper") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUEvergreenDevice(ptr);
|
||||
} else if (deviceName == "redwood") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPURedwoodDevice(ptr);
|
||||
} else if (deviceName == "cedar") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUCedarDevice(ptr);
|
||||
} else if (deviceName == "barts"
|
||||
|| deviceName == "turks") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUNIDevice(ptr);
|
||||
} else if (deviceName == "cayman") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUCaymanDevice(ptr);
|
||||
} else if (deviceName == "caicos") {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPUNIDevice(ptr);
|
||||
} else if (deviceName == "SI") {
|
||||
return new AMDGPUSIDevice(ptr);
|
||||
} else {
|
||||
#if DEBUG
|
||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
||||
assert(!is64on32bit && "This device does not support 64bit"
|
||||
" on 32bit pointers!");
|
||||
#endif
|
||||
return new AMDGPU7XXDevice(ptr);
|
||||
}
|
||||
}
|
||||
} // End namespace AMDGPUDeviceInfo
|
||||
} // End namespace llvm
|
||||
|
|
@ -1,90 +0,0 @@
|
|||
//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILDEVICEINFO_H_
|
||||
#define _AMDILDEVICEINFO_H_
|
||||
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
class AMDGPUDevice;
|
||||
class AMDGPUSubtarget;
|
||||
namespace AMDGPUDeviceInfo
|
||||
{
|
||||
// Each Capabilities can be executed using a hardware instruction,
|
||||
// emulated with a sequence of software instructions, or not
|
||||
// supported at all.
|
||||
enum ExecutionMode {
|
||||
Unsupported = 0, // Unsupported feature on the card(Default value)
|
||||
Software, // This is the execution mode that is set if the
|
||||
// feature is emulated in software
|
||||
Hardware // This execution mode is set if the feature exists
|
||||
// natively in hardware
|
||||
};
|
||||
|
||||
// Any changes to this needs to have a corresponding update to the
|
||||
// twiki page GPUMetadataABI
|
||||
enum Caps {
|
||||
HalfOps = 0x1, // Half float is supported or not.
|
||||
DoubleOps = 0x2, // Double is supported or not.
|
||||
ByteOps = 0x3, // Byte(char) is support or not.
|
||||
ShortOps = 0x4, // Short is supported or not.
|
||||
LongOps = 0x5, // Long is supported or not.
|
||||
Images = 0x6, // Images are supported or not.
|
||||
ByteStores = 0x7, // ByteStores available(!HD4XXX).
|
||||
ConstantMem = 0x8, // Constant/CB memory.
|
||||
LocalMem = 0x9, // Local/LDS memory.
|
||||
PrivateMem = 0xA, // Scratch/Private/Stack memory.
|
||||
RegionMem = 0xB, // OCL GDS Memory Extension.
|
||||
FMA = 0xC, // Use HW FMA or SW FMA.
|
||||
ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
|
||||
MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
|
||||
Reserved0 = 0xF, // ReservedFlag
|
||||
NoAlias = 0x10, // Cached loads.
|
||||
Signed24BitOps = 0x11, // Peephole Optimization.
|
||||
// Debug mode implies that no hardware features or optimizations
|
||||
// are performned and that all memory access go through a single
|
||||
// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
|
||||
Debug = 0x12, // Debug mode is enabled.
|
||||
CachedMem = 0x13, // Cached mem is available or not.
|
||||
BarrierDetect = 0x14, // Detect duplicate barriers.
|
||||
Reserved1 = 0x15, // Reserved flag
|
||||
ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
|
||||
ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
|
||||
TmrReg = 0x18, // Flag to specify if Tmr register is supported.
|
||||
NoInline = 0x19, // Flag to specify that no inlining should occur.
|
||||
MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
|
||||
HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
|
||||
ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
|
||||
PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
|
||||
// If more capabilities are required, then
|
||||
// this number needs to be increased.
|
||||
// All capabilities must come before this
|
||||
// number.
|
||||
MaxNumberCapabilities = 0x20
|
||||
};
|
||||
// These have to be in order with the older generations
|
||||
// having the lower number enumerations.
|
||||
enum Generation {
|
||||
HD4XXX = 0, // 7XX based devices.
|
||||
HD5XXX, // Evergreen based devices.
|
||||
HD6XXX, // NI/Evergreen+ based devices.
|
||||
HD7XXX,
|
||||
HDTEST, // Experimental feature testing device.
|
||||
HDNUMGEN
|
||||
};
|
||||
|
||||
|
||||
AMDGPUDevice*
|
||||
getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
|
||||
bool is64bit = false, bool is64on32bit = false);
|
||||
} // namespace AMDILDeviceInfo
|
||||
} // namespace llvm
|
||||
#endif // _AMDILDEVICEINFO_H_
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#ifndef __AMDIL_DEVICES_H_
|
||||
#define __AMDIL_DEVICES_H_
|
||||
// Include all of the device specific header files
|
||||
// This file is for Internal use only!
|
||||
#include "AMDIL7XXDevice.h"
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDILSIDevice.h"
|
||||
|
||||
#endif // _AMDIL_DEVICES_H_
|
||||
|
|
@ -1,169 +0,0 @@
|
|||
//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUDevice(ST) {
|
||||
setCaps();
|
||||
std::string name = ST->getDeviceName();
|
||||
if (name == "cedar") {
|
||||
mDeviceFlag = OCL_DEVICE_CEDAR;
|
||||
} else if (name == "redwood") {
|
||||
mDeviceFlag = OCL_DEVICE_REDWOOD;
|
||||
} else if (name == "cypress") {
|
||||
mDeviceFlag = OCL_DEVICE_CYPRESS;
|
||||
} else {
|
||||
mDeviceFlag = OCL_DEVICE_JUNIPER;
|
||||
}
|
||||
}
|
||||
|
||||
AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
|
||||
}
|
||||
|
||||
size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_800;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
|
||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
||||
return MAX_LDS_SIZE_800;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
|
||||
return 12;
|
||||
}
|
||||
|
||||
uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
|
||||
switch(id) {
|
||||
default:
|
||||
assert(0 && "ID type passed in is unknown!");
|
||||
break;
|
||||
case CONSTANT_ID:
|
||||
case RAW_UAV_ID:
|
||||
return GLOBAL_RETURN_RAW_UAV_ID;
|
||||
case GLOBAL_ID:
|
||||
case ARENA_UAV_ID:
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
case LDS_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return DEFAULT_LDS_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
case GDS_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
||||
return DEFAULT_GDS_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
case SCRATCH_ID:
|
||||
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
|
||||
return DEFAULT_SCRATCH_ID;
|
||||
} else {
|
||||
return DEFAULT_ARENA_UAV_ID;
|
||||
}
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
|
||||
return AMDGPUDevice::WavefrontSize;
|
||||
}
|
||||
|
||||
uint32_t AMDGPUEvergreenDevice::getGeneration() const {
|
||||
return AMDGPUDeviceInfo::HD5XXX;
|
||||
}
|
||||
|
||||
void AMDGPUEvergreenDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
|
||||
mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
|
||||
mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
|
||||
mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::ByteStores);
|
||||
}
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
mSWBits.set(AMDGPUDeviceInfo::RegionMem);
|
||||
} else {
|
||||
mHWBits.set(AMDGPUDeviceInfo::LocalMem);
|
||||
mHWBits.set(AMDGPUDeviceInfo::RegionMem);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::Images);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::NoAlias);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::CachedMem);
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
|
||||
mHWBits.set(AMDGPUDeviceInfo::LongOps);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::LongOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::TmrReg);
|
||||
}
|
||||
|
||||
AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPUCypressDevice::~AMDGPUCypressDevice() {
|
||||
}
|
||||
|
||||
void AMDGPUCypressDevice::setCaps() {
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPUCedarDevice::~AMDGPUCedarDevice() {
|
||||
}
|
||||
|
||||
void AMDGPUCedarDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
|
||||
size_t AMDGPUCedarDevice::getWavefrontSize() const {
|
||||
return AMDGPUDevice::QuarterWavefrontSize;
|
||||
}
|
||||
|
||||
AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST) {
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPURedwoodDevice::~AMDGPURedwoodDevice()
|
||||
{
|
||||
}
|
||||
|
||||
void AMDGPURedwoodDevice::setCaps() {
|
||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
|
||||
size_t AMDGPURedwoodDevice::getWavefrontSize() const {
|
||||
return AMDGPUDevice::HalfWavefrontSize;
|
||||
}
|
||||
|
|
@ -1,87 +0,0 @@
|
|||
//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILEVERGREENDEVICE_H_
|
||||
#define _AMDILEVERGREENDEVICE_H_
|
||||
#include "AMDILDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Evergreen generation of devices and their respective sub classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
// The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
|
||||
// series of cards. This class contains information required to differentiate
|
||||
// the Evergreen device from the generic AMDGPUDevice. This device represents
|
||||
// that capabilities of the 'Juniper' cards, also known as the HD57XX.
|
||||
class AMDGPUEvergreenDevice : public AMDGPUDevice {
|
||||
public:
|
||||
AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUEvergreenDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual size_t getMaxGDSSize() const;
|
||||
virtual size_t getWavefrontSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual uint32_t getMaxNumUAVs() const;
|
||||
virtual uint32_t getResourceID(uint32_t) const;
|
||||
protected:
|
||||
virtual void setCaps();
|
||||
}; // AMDGPUEvergreenDevice
|
||||
|
||||
// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
|
||||
// support for double precision operations. This device is used to represent
|
||||
// both the Cypress and Hemlock cards, which are commercially known as HD58XX
|
||||
// and HD59XX cards.
|
||||
class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUCypressDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUCypressDevice();
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDGPUCypressDevice
|
||||
|
||||
|
||||
// The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
|
||||
// devices. This class differs from the base AMDGPUEvergreenDevice in that the
|
||||
// device is a ~quarter of the 'Juniper'. These are commercially known as the
|
||||
// HD54XX and HD53XX series of cards.
|
||||
class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUCedarDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPUCedarDevice();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDGPUCedarDevice
|
||||
|
||||
// The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
|
||||
// devices. This class differs from the base class, in that these devices are
|
||||
// considered about half of a 'Juniper' device. These are commercially known as
|
||||
// the HD55XX and HD56XX series of cards.
|
||||
class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
|
||||
virtual ~AMDGPURedwoodDevice();
|
||||
virtual size_t getWavefrontSize() const;
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDGPURedwoodDevice
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDGPUEVERGREENDEVICE_H_
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDILFrameLowering.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
|
||||
int LAO, unsigned TransAl)
|
||||
: TargetFrameLowering(D, StackAl, LAO, TransAl)
|
||||
{
|
||||
}
|
||||
|
||||
AMDGPUFrameLowering::~AMDGPUFrameLowering()
|
||||
{
|
||||
}
|
||||
|
||||
/// getFrameIndexOffset - Returns the displacement from the frame register to
|
||||
/// the stack frame of the specified index.
|
||||
int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
|
||||
int FI) const {
|
||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
return MFI->getObjectOffset(FI);
|
||||
}
|
||||
|
||||
const TargetFrameLowering::SpillSlot *
|
||||
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
|
||||
{
|
||||
NumEntries = 0;
|
||||
return 0;
|
||||
}
|
||||
void
|
||||
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const
|
||||
{
|
||||
}
|
||||
void
|
||||
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
|
||||
{
|
||||
}
|
||||
bool
|
||||
AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef _AMDILFRAME_LOWERING_H_
|
||||
#define _AMDILFRAME_LOWERING_H_
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
||||
/// Information about the stack frame layout on the AMDGPU targets. It holds
|
||||
/// the direction of the stack growth, the known stack alignment on entry to
|
||||
/// each function, and the offset to the locals area.
|
||||
/// See TargetFrameInfo for more comments.
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUFrameLowering : public TargetFrameLowering {
|
||||
public:
|
||||
AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
|
||||
TransAl = 1);
|
||||
virtual ~AMDGPUFrameLowering();
|
||||
virtual int getFrameIndexOffset(const MachineFunction &MF,
|
||||
int FI) const;
|
||||
virtual const SpillSlot *
|
||||
getCalleeSavedSpillSlots(unsigned &NumEntries) const;
|
||||
virtual void emitPrologue(MachineFunction &MF) const;
|
||||
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
virtual bool hasFP(const MachineFunction &MF) const;
|
||||
}; // class AMDGPUFrameLowering
|
||||
} // namespace llvm
|
||||
#endif // _AMDILFRAME_LOWERING_H_
|
||||
|
|
@ -1,395 +0,0 @@
|
|||
//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines an instruction selector for the AMDIL target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
#include "llvm/ADT/ValueMap.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include <list>
|
||||
#include <queue>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Selector Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions
|
||||
// //for SelectionDAG operations.
|
||||
//
|
||||
namespace {
|
||||
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
|
||||
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
|
||||
// make the right decision when generating code for different targets.
|
||||
const AMDGPUSubtarget &Subtarget;
|
||||
public:
|
||||
AMDGPUDAGToDAGISel(TargetMachine &TM);
|
||||
virtual ~AMDGPUDAGToDAGISel();
|
||||
|
||||
SDNode *Select(SDNode *N);
|
||||
virtual const char *getPassName() const;
|
||||
|
||||
private:
|
||||
inline SDValue getSmallIPtrImm(unsigned Imm);
|
||||
|
||||
// Complex pattern selectors
|
||||
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
||||
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
|
||||
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
|
||||
|
||||
static bool checkType(const Value *ptr, unsigned int addrspace);
|
||||
static const Value *getBasePointerValue(const Value *V);
|
||||
|
||||
static bool isGlobalStore(const StoreSDNode *N);
|
||||
static bool isPrivateStore(const StoreSDNode *N);
|
||||
static bool isLocalStore(const StoreSDNode *N);
|
||||
static bool isRegionStore(const StoreSDNode *N);
|
||||
|
||||
static bool isCPLoad(const LoadSDNode *N);
|
||||
static bool isConstantLoad(const LoadSDNode *N, int cbID);
|
||||
static bool isGlobalLoad(const LoadSDNode *N);
|
||||
static bool isPrivateLoad(const LoadSDNode *N);
|
||||
static bool isLocalLoad(const LoadSDNode *N);
|
||||
static bool isRegionLoad(const LoadSDNode *N);
|
||||
|
||||
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
|
||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
||||
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "AMDGPUGenDAGISel.inc"
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
// createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific
|
||||
// DAG, ready for instruction scheduling.
|
||||
//
|
||||
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
|
||||
) {
|
||||
return new AMDGPUDAGToDAGISel(TM);
|
||||
}
|
||||
|
||||
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
|
||||
)
|
||||
: SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>())
|
||||
{
|
||||
}
|
||||
|
||||
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
|
||||
}
|
||||
|
||||
SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
|
||||
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDRParam(
|
||||
SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||
|
||||
if (Addr.getOpcode() == ISD::FrameIndex) {
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
||||
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
}
|
||||
} else if (Addr.getOpcode() == ISD::ADD) {
|
||||
R1 = Addr.getOperand(0);
|
||||
R2 = Addr.getOperand(1);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
return false;
|
||||
}
|
||||
return SelectADDRParam(Addr, R1, R2);
|
||||
}
|
||||
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Addr.getOpcode() == ISD::FrameIndex) {
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
||||
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||
}
|
||||
} else if (Addr.getOpcode() == ISD::ADD) {
|
||||
R1 = Addr.getOperand(0);
|
||||
R2 = Addr.getOperand(1);
|
||||
} else {
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
||||
unsigned int Opc = N->getOpcode();
|
||||
if (N->isMachineOpcode()) {
|
||||
return NULL; // Already selected.
|
||||
}
|
||||
switch (Opc) {
|
||||
default: break;
|
||||
case ISD::FrameIndex:
|
||||
{
|
||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
|
||||
unsigned int FI = FIN->getIndex();
|
||||
EVT OpVT = N->getValueType(0);
|
||||
unsigned int NewOpc = AMDGPU::COPY;
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
|
||||
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return SelectCode(N);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
|
||||
if (!ptr) {
|
||||
return false;
|
||||
}
|
||||
Type *ptrType = ptr->getType();
|
||||
return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
|
||||
}
|
||||
|
||||
const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V)
|
||||
{
|
||||
if (!V) {
|
||||
return NULL;
|
||||
}
|
||||
const Value *ret = NULL;
|
||||
ValueMap<const Value *, bool> ValueBitMap;
|
||||
std::queue<const Value *, std::list<const Value *> > ValueQueue;
|
||||
ValueQueue.push(V);
|
||||
while (!ValueQueue.empty()) {
|
||||
V = ValueQueue.front();
|
||||
if (ValueBitMap.find(V) == ValueBitMap.end()) {
|
||||
ValueBitMap[V] = true;
|
||||
if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
|
||||
ret = V;
|
||||
break;
|
||||
} else if (dyn_cast<GlobalVariable>(V)) {
|
||||
ret = V;
|
||||
break;
|
||||
} else if (dyn_cast<Constant>(V)) {
|
||||
const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
|
||||
if (CE) {
|
||||
ValueQueue.push(CE->getOperand(0));
|
||||
}
|
||||
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
|
||||
ret = AI;
|
||||
break;
|
||||
} else if (const Instruction *I = dyn_cast<Instruction>(V)) {
|
||||
uint32_t numOps = I->getNumOperands();
|
||||
for (uint32_t x = 0; x < numOps; ++x) {
|
||||
ValueQueue.push(I->getOperand(x));
|
||||
}
|
||||
} else {
|
||||
// assert(0 && "Found a Value that we didn't know how to handle!");
|
||||
}
|
||||
}
|
||||
ValueQueue.pop();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
|
||||
return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
|
||||
if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
|
||||
return true;
|
||||
}
|
||||
MachineMemOperand *MMO = N->getMemOperand();
|
||||
const Value *V = MMO->getValue();
|
||||
const Value *BV = getBasePointerValue(V);
|
||||
if (MMO
|
||||
&& MMO->getValue()
|
||||
&& ((V && dyn_cast<GlobalValue>(V))
|
||||
|| (BV && dyn_cast<GlobalValue>(
|
||||
getBasePointerValue(MMO->getValue()))))) {
|
||||
return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
|
||||
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
|
||||
MachineMemOperand *MMO = N->getMemOperand();
|
||||
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
|
||||
if (MMO) {
|
||||
const Value *V = MMO->getValue();
|
||||
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
|
||||
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
|
||||
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
|
||||
// Check to make sure we are not a constant pool load or a constant load
|
||||
// that is marked as a private load
|
||||
if (isCPLoad(N) || isConstantLoad(N, -1)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
|
||||
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const char *AMDGPUDAGToDAGISel::getPassName() const {
|
||||
return "AMDGPU DAG->DAG Pattern Instruction Selection";
|
||||
}
|
||||
|
||||
#ifdef DEBUGTMP
|
||||
#undef INT64_C
|
||||
#endif
|
||||
#undef DEBUGTMP
|
||||
|
||||
///==== AMDGPU Functions ====///
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
|
||||
SDValue& Offset) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
if (Addr.getOpcode() == ISD::ADD) {
|
||||
bool Match = false;
|
||||
|
||||
// Find the base ptr and the offset
|
||||
for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
|
||||
SDValue Arg = Addr.getOperand(i);
|
||||
ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
|
||||
// This arg isn't a constant so it must be the base PTR.
|
||||
if (!OffsetNode) {
|
||||
Base = Addr.getOperand(i);
|
||||
continue;
|
||||
}
|
||||
// Check if the constant argument fits in 8-bits. The offset is in bytes
|
||||
// so we need to convert it to dwords.
|
||||
if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
|
||||
Match = true;
|
||||
Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
|
||||
MVT::i32);
|
||||
}
|
||||
}
|
||||
return Match;
|
||||
}
|
||||
|
||||
// Default case, no offset
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset)
|
||||
{
|
||||
ConstantSDNode * IMMOffset;
|
||||
|
||||
if (Addr.getOpcode() == ISD::ADD
|
||||
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
|
||||
&& isInt<16>(IMMOffset->getZExtValue())) {
|
||||
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
||||
return true;
|
||||
// If the pointer address is constant, we can move it to the offset field.
|
||||
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
|
||||
&& isInt<16>(IMMOffset->getZExtValue())) {
|
||||
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
|
||||
CurDAG->getEntryNode().getDebugLoc(),
|
||||
AMDGPU::ZERO, MVT::i32);
|
||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Default case, no offset
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
|
||||
SDValue& Offset) {
|
||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Addr.getOpcode() == ISD::TargetGlobalAddress ||
|
||||
Addr.getOpcode() != ISD::ADD) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Base = Addr.getOperand(0);
|
||||
Offset = Addr.getOperand(1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1,677 +0,0 @@
|
|||
//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains TargetLowering functions borrowed from AMDLI.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
#include "AMDILDevices.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
#include "llvm/CallingConv.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
|
||||
using namespace llvm;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Calling Convention Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "AMDGPUGenCallingConv.inc"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TargetLowering Implementation Help Functions End
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TargetLowering Class Implementation Begins
|
||||
//===----------------------------------------------------------------------===//
|
||||
void AMDGPUTargetLowering::InitAMDILLowering()
|
||||
{
|
||||
int types[] =
|
||||
{
|
||||
(int)MVT::i8,
|
||||
(int)MVT::i16,
|
||||
(int)MVT::i32,
|
||||
(int)MVT::f32,
|
||||
(int)MVT::f64,
|
||||
(int)MVT::i64,
|
||||
(int)MVT::v2i8,
|
||||
(int)MVT::v4i8,
|
||||
(int)MVT::v2i16,
|
||||
(int)MVT::v4i16,
|
||||
(int)MVT::v4f32,
|
||||
(int)MVT::v4i32,
|
||||
(int)MVT::v2f32,
|
||||
(int)MVT::v2i32,
|
||||
(int)MVT::v2f64,
|
||||
(int)MVT::v2i64
|
||||
};
|
||||
|
||||
int IntTypes[] =
|
||||
{
|
||||
(int)MVT::i8,
|
||||
(int)MVT::i16,
|
||||
(int)MVT::i32,
|
||||
(int)MVT::i64
|
||||
};
|
||||
|
||||
int FloatTypes[] =
|
||||
{
|
||||
(int)MVT::f32,
|
||||
(int)MVT::f64
|
||||
};
|
||||
|
||||
int VectorTypes[] =
|
||||
{
|
||||
(int)MVT::v2i8,
|
||||
(int)MVT::v4i8,
|
||||
(int)MVT::v2i16,
|
||||
(int)MVT::v4i16,
|
||||
(int)MVT::v4f32,
|
||||
(int)MVT::v4i32,
|
||||
(int)MVT::v2f32,
|
||||
(int)MVT::v2i32,
|
||||
(int)MVT::v2f64,
|
||||
(int)MVT::v2i64
|
||||
};
|
||||
size_t numTypes = sizeof(types) / sizeof(*types);
|
||||
size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
|
||||
size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
|
||||
size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
|
||||
|
||||
const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
|
||||
// These are the current register classes that are
|
||||
// supported
|
||||
|
||||
for (unsigned int x = 0; x < numTypes; ++x) {
|
||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
|
||||
|
||||
//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
|
||||
// We cannot sextinreg, expand to shifts
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
|
||||
setOperationAction(ISD::SUBE, VT, Expand);
|
||||
setOperationAction(ISD::SUBC, VT, Expand);
|
||||
setOperationAction(ISD::ADDE, VT, Expand);
|
||||
setOperationAction(ISD::ADDC, VT, Expand);
|
||||
setOperationAction(ISD::BRCOND, VT, Custom);
|
||||
setOperationAction(ISD::BR_JT, VT, Expand);
|
||||
setOperationAction(ISD::BRIND, VT, Expand);
|
||||
// TODO: Implement custom UREM/SREM routines
|
||||
setOperationAction(ISD::SREM, VT, Expand);
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
||||
if (VT != MVT::i64 && VT != MVT::v2i64) {
|
||||
setOperationAction(ISD::SDIV, VT, Custom);
|
||||
}
|
||||
}
|
||||
for (unsigned int x = 0; x < numFloatTypes; ++x) {
|
||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
|
||||
|
||||
// IL does not have these operations for floating point types
|
||||
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
|
||||
setOperationAction(ISD::SETOLT, VT, Expand);
|
||||
setOperationAction(ISD::SETOGE, VT, Expand);
|
||||
setOperationAction(ISD::SETOGT, VT, Expand);
|
||||
setOperationAction(ISD::SETOLE, VT, Expand);
|
||||
setOperationAction(ISD::SETULT, VT, Expand);
|
||||
setOperationAction(ISD::SETUGE, VT, Expand);
|
||||
setOperationAction(ISD::SETUGT, VT, Expand);
|
||||
setOperationAction(ISD::SETULE, VT, Expand);
|
||||
}
|
||||
|
||||
for (unsigned int x = 0; x < numIntTypes; ++x) {
|
||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
|
||||
|
||||
// GPU also does not have divrem function for signed or unsigned
|
||||
setOperationAction(ISD::SDIVREM, VT, Expand);
|
||||
|
||||
// GPU does not have [S|U]MUL_LOHI functions as a single instruction
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
||||
|
||||
// GPU doesn't have a rotl, rotr, or byteswap instruction
|
||||
setOperationAction(ISD::ROTR, VT, Expand);
|
||||
setOperationAction(ISD::BSWAP, VT, Expand);
|
||||
|
||||
// GPU doesn't have any counting operators
|
||||
setOperationAction(ISD::CTPOP, VT, Expand);
|
||||
setOperationAction(ISD::CTTZ, VT, Expand);
|
||||
setOperationAction(ISD::CTLZ, VT, Expand);
|
||||
}
|
||||
|
||||
for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
|
||||
{
|
||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
|
||||
|
||||
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
|
||||
setOperationAction(ISD::SDIVREM, VT, Expand);
|
||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
||||
// setOperationAction(ISD::VSETCC, VT, Expand);
|
||||
setOperationAction(ISD::SELECT_CC, VT, Expand);
|
||||
|
||||
}
|
||||
if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
|
||||
setOperationAction(ISD::MULHU, MVT::i64, Expand);
|
||||
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::MULHS, MVT::i64, Expand);
|
||||
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::Constant , MVT::i64 , Legal);
|
||||
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
|
||||
}
|
||||
if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
// we support loading/storing v2f64 but not operations on the type
|
||||
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
|
||||
// We want to expand vector conversions into their scalar
|
||||
// counterparts.
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
|
||||
setOperationAction(ISD::FABS, MVT::f64, Expand);
|
||||
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
|
||||
}
|
||||
// TODO: Fix the UDIV24 algorithm so it works for these
|
||||
// types correctly. This needs vector comparisons
|
||||
// for this to work correctly.
|
||||
setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
|
||||
setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
|
||||
setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
|
||||
setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
|
||||
setOperationAction(ISD::SUBC, MVT::Other, Expand);
|
||||
setOperationAction(ISD::ADDE, MVT::Other, Expand);
|
||||
setOperationAction(ISD::ADDC, MVT::Other, Expand);
|
||||
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
||||
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
||||
setOperationAction(ISD::BRIND, MVT::Other, Expand);
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
|
||||
|
||||
|
||||
// Use the default implementation.
|
||||
setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
|
||||
setOperationAction(ISD::Constant , MVT::i32 , Legal);
|
||||
|
||||
setSchedulingPreference(Sched::RegPressure);
|
||||
setPow2DivIsCheap(false);
|
||||
setPrefLoopAlignment(16);
|
||||
setSelectIsExpensive(true);
|
||||
setJumpIsExpensive(true);
|
||||
|
||||
maxStoresPerMemcpy = 4096;
|
||||
maxStoresPerMemmove = 4096;
|
||||
maxStoresPerMemset = 4096;
|
||||
|
||||
#undef numTypes
|
||||
#undef numIntTypes
|
||||
#undef numVectorTypes
|
||||
#undef numFloatTypes
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
||||
const CallInst &I, unsigned Intrinsic) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// The backend supports 32 and 64 bit floating point immediates
|
||||
bool
|
||||
AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
|
||||
{
|
||||
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
||||
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
|
||||
{
|
||||
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
||||
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
|
||||
// be zero. Op is expected to be a target specific node. Used by DAG
|
||||
// combiner.
|
||||
|
||||
void
|
||||
AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
|
||||
const SDValue Op,
|
||||
APInt &KnownZero,
|
||||
APInt &KnownOne,
|
||||
const SelectionDAG &DAG,
|
||||
unsigned Depth) const
|
||||
{
|
||||
APInt KnownZero2;
|
||||
APInt KnownOne2;
|
||||
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
|
||||
switch (Op.getOpcode()) {
|
||||
default: break;
|
||||
case ISD::SELECT_CC:
|
||||
DAG.ComputeMaskedBits(
|
||||
Op.getOperand(1),
|
||||
KnownZero,
|
||||
KnownOne,
|
||||
Depth + 1
|
||||
);
|
||||
DAG.ComputeMaskedBits(
|
||||
Op.getOperand(0),
|
||||
KnownZero2,
|
||||
KnownOne2
|
||||
);
|
||||
assert((KnownZero & KnownOne) == 0
|
||||
&& "Bits known to be one AND zero?");
|
||||
assert((KnownZero2 & KnownOne2) == 0
|
||||
&& "Bits known to be one AND zero?");
|
||||
// Only known if known in both the LHS and RHS
|
||||
KnownOne &= KnownOne2;
|
||||
KnownZero &= KnownZero2;
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Other Lowering Hooks
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
EVT OVT = Op.getValueType();
|
||||
SDValue DST;
|
||||
if (OVT.getScalarType() == MVT::i64) {
|
||||
DST = LowerSDIV64(Op, DAG);
|
||||
} else if (OVT.getScalarType() == MVT::i32) {
|
||||
DST = LowerSDIV32(Op, DAG);
|
||||
} else if (OVT.getScalarType() == MVT::i16
|
||||
|| OVT.getScalarType() == MVT::i8) {
|
||||
DST = LowerSDIV24(Op, DAG);
|
||||
} else {
|
||||
DST = SDValue(Op.getNode(), 0);
|
||||
}
|
||||
return DST;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
EVT OVT = Op.getValueType();
|
||||
SDValue DST;
|
||||
if (OVT.getScalarType() == MVT::i64) {
|
||||
DST = LowerSREM64(Op, DAG);
|
||||
} else if (OVT.getScalarType() == MVT::i32) {
|
||||
DST = LowerSREM32(Op, DAG);
|
||||
} else if (OVT.getScalarType() == MVT::i16) {
|
||||
DST = LowerSREM16(Op, DAG);
|
||||
} else if (OVT.getScalarType() == MVT::i8) {
|
||||
DST = LowerSREM8(Op, DAG);
|
||||
} else {
|
||||
DST = SDValue(Op.getNode(), 0);
|
||||
}
|
||||
return DST;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
SDValue Data = Op.getOperand(0);
|
||||
VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT DVT = Data.getValueType();
|
||||
EVT BVT = BaseType->getVT();
|
||||
unsigned baseBits = BVT.getScalarType().getSizeInBits();
|
||||
unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
|
||||
unsigned shiftBits = srcBits - baseBits;
|
||||
if (srcBits < 32) {
|
||||
// If the op is less than 32 bits, then it needs to extend to 32bits
|
||||
// so it can properly keep the upper bits valid.
|
||||
EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
|
||||
Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
|
||||
shiftBits = 32 - baseBits;
|
||||
DVT = IVT;
|
||||
}
|
||||
SDValue Shift = DAG.getConstant(shiftBits, DVT);
|
||||
// Shift left by 'Shift' bits.
|
||||
Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
|
||||
// Signed shift Right by 'Shift' bits.
|
||||
Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
|
||||
if (srcBits < 32) {
|
||||
// Once the sign extension is done, the op needs to be converted to
|
||||
// its original type.
|
||||
Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
|
||||
}
|
||||
return Data;
|
||||
}
|
||||
EVT
|
||||
AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
|
||||
{
|
||||
int iSize = (size * numEle);
|
||||
int vEle = (iSize >> ((size == 64) ? 6 : 5));
|
||||
if (!vEle) {
|
||||
vEle = 1;
|
||||
}
|
||||
if (size == 64) {
|
||||
if (vEle == 1) {
|
||||
return EVT(MVT::i64);
|
||||
} else {
|
||||
return EVT(MVT::getVectorVT(MVT::i64, vEle));
|
||||
}
|
||||
} else {
|
||||
if (vEle == 1) {
|
||||
return EVT(MVT::i32);
|
||||
} else {
|
||||
return EVT(MVT::getVectorVT(MVT::i32, vEle));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue Cond = Op.getOperand(1);
|
||||
SDValue Jump = Op.getOperand(2);
|
||||
SDValue Result;
|
||||
Result = DAG.getNode(
|
||||
AMDGPUISD::BRANCH_COND,
|
||||
Op.getDebugLoc(),
|
||||
Op.getValueType(),
|
||||
Chain, Jump, Cond);
|
||||
return Result;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT OVT = Op.getValueType();
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
MVT INTTY;
|
||||
MVT FLTTY;
|
||||
if (!OVT.isVector()) {
|
||||
INTTY = MVT::i32;
|
||||
FLTTY = MVT::f32;
|
||||
} else if (OVT.getVectorNumElements() == 2) {
|
||||
INTTY = MVT::v2i32;
|
||||
FLTTY = MVT::v2f32;
|
||||
} else if (OVT.getVectorNumElements() == 4) {
|
||||
INTTY = MVT::v4i32;
|
||||
FLTTY = MVT::v4f32;
|
||||
}
|
||||
unsigned bitsize = OVT.getScalarType().getSizeInBits();
|
||||
// char|short jq = ia ^ ib;
|
||||
SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
|
||||
|
||||
// jq = jq >> (bitsize - 2)
|
||||
jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
|
||||
|
||||
// jq = jq | 0x1
|
||||
jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
|
||||
|
||||
// jq = (int)jq
|
||||
jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
|
||||
|
||||
// int ia = (int)LHS;
|
||||
SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
|
||||
|
||||
// int ib, (int)RHS;
|
||||
SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
|
||||
|
||||
// float fa = (float)ia;
|
||||
SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
|
||||
|
||||
// float fb = (float)ib;
|
||||
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
|
||||
|
||||
// float fq = native_divide(fa, fb);
|
||||
SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
|
||||
|
||||
// fq = trunc(fq);
|
||||
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
|
||||
|
||||
// float fqneg = -fq;
|
||||
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
|
||||
|
||||
// float fr = mad(fqneg, fb, fa);
|
||||
SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
|
||||
|
||||
// int iq = (int)fq;
|
||||
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
|
||||
|
||||
// fr = fabs(fr);
|
||||
fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
|
||||
|
||||
// fb = fabs(fb);
|
||||
fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
|
||||
|
||||
// int cv = fr >= fb;
|
||||
SDValue cv;
|
||||
if (INTTY == MVT::i32) {
|
||||
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
||||
} else {
|
||||
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
||||
}
|
||||
// jq = (cv ? jq : 0);
|
||||
jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
|
||||
DAG.getConstant(0, OVT));
|
||||
// dst = iq + jq;
|
||||
iq = DAG.getSExtOrTrunc(iq, DL, OVT);
|
||||
iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
|
||||
return iq;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT OVT = Op.getValueType();
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
// The LowerSDIV32 function generates equivalent to the following IL.
|
||||
// mov r0, LHS
|
||||
// mov r1, RHS
|
||||
// ilt r10, r0, 0
|
||||
// ilt r11, r1, 0
|
||||
// iadd r0, r0, r10
|
||||
// iadd r1, r1, r11
|
||||
// ixor r0, r0, r10
|
||||
// ixor r1, r1, r11
|
||||
// udiv r0, r0, r1
|
||||
// ixor r10, r10, r11
|
||||
// iadd r0, r0, r10
|
||||
// ixor DST, r0, r10
|
||||
|
||||
// mov r0, LHS
|
||||
SDValue r0 = LHS;
|
||||
|
||||
// mov r1, RHS
|
||||
SDValue r1 = RHS;
|
||||
|
||||
// ilt r10, r0, 0
|
||||
SDValue r10 = DAG.getSelectCC(DL,
|
||||
r0, DAG.getConstant(0, OVT),
|
||||
DAG.getConstant(-1, MVT::i32),
|
||||
DAG.getConstant(0, MVT::i32),
|
||||
ISD::SETLT);
|
||||
|
||||
// ilt r11, r1, 0
|
||||
SDValue r11 = DAG.getSelectCC(DL,
|
||||
r1, DAG.getConstant(0, OVT),
|
||||
DAG.getConstant(-1, MVT::i32),
|
||||
DAG.getConstant(0, MVT::i32),
|
||||
ISD::SETLT);
|
||||
|
||||
// iadd r0, r0, r10
|
||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
||||
|
||||
// iadd r1, r1, r11
|
||||
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
||||
|
||||
// ixor r0, r0, r10
|
||||
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
||||
|
||||
// ixor r1, r1, r11
|
||||
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
||||
|
||||
// udiv r0, r0, r1
|
||||
r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
|
||||
|
||||
// ixor r10, r10, r11
|
||||
r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
|
||||
|
||||
// iadd r0, r0, r10
|
||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
||||
|
||||
// ixor DST, r0, r10
|
||||
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
||||
return DST;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
return SDValue(Op.getNode(), 0);
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT OVT = Op.getValueType();
|
||||
MVT INTTY = MVT::i32;
|
||||
if (OVT == MVT::v2i8) {
|
||||
INTTY = MVT::v2i32;
|
||||
} else if (OVT == MVT::v4i8) {
|
||||
INTTY = MVT::v4i32;
|
||||
}
|
||||
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
||||
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
||||
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
||||
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
||||
return LHS;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT OVT = Op.getValueType();
|
||||
MVT INTTY = MVT::i32;
|
||||
if (OVT == MVT::v2i16) {
|
||||
INTTY = MVT::v2i32;
|
||||
} else if (OVT == MVT::v4i16) {
|
||||
INTTY = MVT::v4i32;
|
||||
}
|
||||
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
||||
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
||||
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
||||
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
||||
return LHS;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT OVT = Op.getValueType();
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
// The LowerSREM32 function generates equivalent to the following IL.
|
||||
// mov r0, LHS
|
||||
// mov r1, RHS
|
||||
// ilt r10, r0, 0
|
||||
// ilt r11, r1, 0
|
||||
// iadd r0, r0, r10
|
||||
// iadd r1, r1, r11
|
||||
// ixor r0, r0, r10
|
||||
// ixor r1, r1, r11
|
||||
// udiv r20, r0, r1
|
||||
// umul r20, r20, r1
|
||||
// sub r0, r0, r20
|
||||
// iadd r0, r0, r10
|
||||
// ixor DST, r0, r10
|
||||
|
||||
// mov r0, LHS
|
||||
SDValue r0 = LHS;
|
||||
|
||||
// mov r1, RHS
|
||||
SDValue r1 = RHS;
|
||||
|
||||
// ilt r10, r0, 0
|
||||
SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
|
||||
|
||||
// ilt r11, r1, 0
|
||||
SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
|
||||
|
||||
// iadd r0, r0, r10
|
||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
||||
|
||||
// iadd r1, r1, r11
|
||||
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
||||
|
||||
// ixor r0, r0, r10
|
||||
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
||||
|
||||
// ixor r1, r1, r11
|
||||
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
||||
|
||||
// udiv r20, r0, r1
|
||||
SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
|
||||
|
||||
// umul r20, r20, r1
|
||||
r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
|
||||
|
||||
// sub r0, r0, r20
|
||||
r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
|
||||
|
||||
// iadd r0, r0, r10
|
||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
||||
|
||||
// ixor DST, r0, r10
|
||||
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
||||
return DST;
|
||||
}
|
||||
|
||||
SDValue
|
||||
AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
return SDValue(Op.getNode(), 0);
|
||||
}
|
||||
|
|
@ -1,270 +0,0 @@
|
|||
//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the AMDIL instructions in TableGen format.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AMDIL Instruction Predicate Definitions
|
||||
// Predicate that is set to true if the hardware supports double precision
|
||||
// divide
|
||||
def HasHWDDiv : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
|
||||
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
|
||||
|
||||
// Predicate that is set to true if the hardware supports double, but not double
|
||||
// precision divide in hardware
|
||||
def HasSWDDiv : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
|
||||
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
|
||||
|
||||
// Predicate that is set to true if the hardware support 24bit signed
|
||||
// math ops. Otherwise a software expansion to 32bit math ops is used instead.
|
||||
def HasHWSign24Bit : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
|
||||
|
||||
// Predicate that is set to true if 64bit operations are supported or not
|
||||
def HasHW64Bit : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDGPUDeviceInfo::LongOps)">;
|
||||
def HasSW64Bit : Predicate<"Subtarget.device()"
|
||||
"->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
|
||||
|
||||
// Predicate that is set to true if the timer register is supported
|
||||
def HasTmrRegister : Predicate<"Subtarget.device()"
|
||||
"->isSupported(AMDGPUDeviceInfo::TmrReg)">;
|
||||
// Predicate that is true if we are at least evergreen series
|
||||
def HasDeviceIDInst : Predicate<"Subtarget.device()"
|
||||
"->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
|
||||
|
||||
// Predicate that is true if we have region address space.
|
||||
def hasRegionAS : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
|
||||
|
||||
// Predicate that is false if we don't have region address space.
|
||||
def noRegionAS : Predicate<"!Subtarget.device()"
|
||||
"->isSupported(AMDGPUDeviceInfo::RegionMem)">;
|
||||
|
||||
|
||||
// Predicate that is set to true if 64bit Mul is supported in the IL or not
|
||||
def HasHW64Mul : Predicate<"Subtarget.calVersion()"
|
||||
">= CAL_VERSION_SC_139"
|
||||
"&& Subtarget.device()"
|
||||
"->getGeneration() >="
|
||||
"AMDGPUDeviceInfo::HD5XXX">;
|
||||
def HasSW64Mul : Predicate<"Subtarget.calVersion()"
|
||||
"< CAL_VERSION_SC_139">;
|
||||
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
|
||||
def HasHW64DivMod : Predicate<"Subtarget.device()"
|
||||
"->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
|
||||
def HasSW64DivMod : Predicate<"Subtarget.device()"
|
||||
"->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
|
||||
|
||||
// Predicate that is set to true if 64bit pointer are used.
|
||||
def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
|
||||
def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Operands
|
||||
//===--------------------------------------------------------------------===//
|
||||
def brtarget : Operand<OtherVT>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Selection DAG Type Profiles
|
||||
//===--------------------------------------------------------------------===//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Generic Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
|
||||
]>;
|
||||
def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
|
||||
SDTCisEltOfVec<1, 0>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flow Control Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Branch instruction where second and third are basic blocks
|
||||
def SDTIL_BRCond : SDTypeProfile<0, 2, [
|
||||
SDTCisVT<0, OtherVT>
|
||||
]>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Selection DAG Nodes
|
||||
//===--------------------------------------------------------------------===//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flow Control DAG Nodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Call/Return DAG Nodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue]>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instructions
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Floating point math functions
|
||||
def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
|
||||
def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Integer functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
|
||||
[SDNPCommutative, SDNPAssociative]>;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Custom Pattern DAG Nodes
|
||||
//===--------------------------------------------------------------------===//
|
||||
def global_store : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Load pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Global address space loads
|
||||
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
||||
}]>;
|
||||
// Constant address space loads
|
||||
def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Complex addressing mode patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
|
||||
def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
|
||||
def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
|
||||
def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction format classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
|
||||
: Instruction {
|
||||
|
||||
let Namespace = "AMDGPU";
|
||||
dag OutOperandList = outs;
|
||||
dag InOperandList = ins;
|
||||
let Pattern = pattern;
|
||||
let AsmString = !strconcat(asmstr, "\n");
|
||||
let isPseudo = 1;
|
||||
let Itinerary = NullALU;
|
||||
bit hasIEEEFlag = 0;
|
||||
bit hasZeroOpFlag = 0;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Multiclass Instruction formats
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Multiclass that handles branch instructions
|
||||
multiclass BranchConditional<SDNode Op> {
|
||||
def _i32 : ILFormat<(outs),
|
||||
(ins brtarget:$target, GPRI32:$src0),
|
||||
"; i32 Pseudo branch instruction",
|
||||
[(Op bb:$target, GPRI32:$src0)]>;
|
||||
def _f32 : ILFormat<(outs),
|
||||
(ins brtarget:$target, GPRF32:$src0),
|
||||
"; f32 Pseudo branch instruction",
|
||||
[(Op bb:$target, GPRF32:$src0)]>;
|
||||
}
|
||||
|
||||
// Only scalar types should generate flow control
|
||||
multiclass BranchInstr<string name> {
|
||||
def _i32 : ILFormat<(outs), (ins GPRI32:$src),
|
||||
!strconcat(name, " $src"), []>;
|
||||
def _f32 : ILFormat<(outs), (ins GPRF32:$src),
|
||||
!strconcat(name, " $src"), []>;
|
||||
}
|
||||
// Only scalar types should generate flow control
|
||||
multiclass BranchInstr2<string name> {
|
||||
def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
|
||||
!strconcat(name, " $src0, $src1"), []>;
|
||||
def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
|
||||
!strconcat(name, " $src0, $src1"), []>;
|
||||
}
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Intrinsics support
|
||||
//===--------------------------------------------------------------------===//
|
||||
include "AMDILIntrinsics.td"
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instructions support
|
||||
//===--------------------------------------------------------------------===//
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Custom Inserter for Branches and returns, this eventually will be a
|
||||
// seperate pass
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
|
||||
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
|
||||
"; Pseudo unconditional branch instruction",
|
||||
[(br bb:$target)]>;
|
||||
defm BRANCH_COND : BranchConditional<IL_brcond>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Flow and Program control Instructions
|
||||
//===---------------------------------------------------------------------===//
|
||||
let isTerminator=1 in {
|
||||
def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
|
||||
!strconcat("SWITCH", " $src"), []>;
|
||||
def CASE : ILFormat< (outs), (ins GPRI32:$src),
|
||||
!strconcat("CASE", " $src"), []>;
|
||||
def BREAK : ILFormat< (outs), (ins),
|
||||
"BREAK", []>;
|
||||
def CONTINUE : ILFormat< (outs), (ins),
|
||||
"CONTINUE", []>;
|
||||
def DEFAULT : ILFormat< (outs), (ins),
|
||||
"DEFAULT", []>;
|
||||
def ELSE : ILFormat< (outs), (ins),
|
||||
"ELSE", []>;
|
||||
def ENDSWITCH : ILFormat< (outs), (ins),
|
||||
"ENDSWITCH", []>;
|
||||
def ENDMAIN : ILFormat< (outs), (ins),
|
||||
"ENDMAIN", []>;
|
||||
def END : ILFormat< (outs), (ins),
|
||||
"END", []>;
|
||||
def ENDFUNC : ILFormat< (outs), (ins),
|
||||
"ENDFUNC", []>;
|
||||
def ENDIF : ILFormat< (outs), (ins),
|
||||
"ENDIF", []>;
|
||||
def WHILELOOP : ILFormat< (outs), (ins),
|
||||
"WHILE", []>;
|
||||
def ENDLOOP : ILFormat< (outs), (ins),
|
||||
"ENDLOOP", []>;
|
||||
def FUNC : ILFormat< (outs), (ins),
|
||||
"FUNC", []>;
|
||||
def RETDYN : ILFormat< (outs), (ins),
|
||||
"RET_DYN", []>;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
|
||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
||||
defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
|
||||
defm IFC : BranchInstr2<"IFC">;
|
||||
defm BREAKC : BranchInstr2<"BREAKC">;
|
||||
defm CONTINUEC : BranchInstr2<"CONTINUEC">;
|
||||
}
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the AMDIL Implementation of the IntrinsicInfo class.
|
||||
//
|
||||
//===-----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Module.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
||||
|
||||
AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
|
||||
: TargetIntrinsicInfo()
|
||||
{
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
|
||||
unsigned int numTys) const
|
||||
{
|
||||
static const char* const names[] = {
|
||||
#define GET_INTRINSIC_NAME_TABLE
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_NAME_TABLE
|
||||
};
|
||||
|
||||
//assert(!isOverloaded(IntrID)
|
||||
//&& "AMDGPU Intrinsics are not overloaded");
|
||||
if (IntrID < Intrinsic::num_intrinsics) {
|
||||
return 0;
|
||||
}
|
||||
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
|
||||
&& "Invalid intrinsic ID");
|
||||
|
||||
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
|
||||
return Result;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
|
||||
{
|
||||
#define GET_FUNCTION_RECOGNIZER
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_FUNCTION_RECOGNIZER
|
||||
AMDGPUIntrinsic::ID IntrinsicID
|
||||
= (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
|
||||
IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
|
||||
|
||||
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
|
||||
return IntrinsicID;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool
|
||||
AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const
|
||||
{
|
||||
// Overload Table
|
||||
#define GET_INTRINSIC_OVERLOAD_TABLE
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_OVERLOAD_TABLE
|
||||
}
|
||||
|
||||
/// This defines the "getAttributes(ID id)" method.
|
||||
#define GET_INTRINSIC_ATTRIBUTES
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_ATTRIBUTES
|
||||
|
||||
Function*
|
||||
AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
|
||||
Type **Tys,
|
||||
unsigned numTys) const
|
||||
{
|
||||
//Silence a warning
|
||||
AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
|
||||
(void)List;
|
||||
assert(!"Not implemented");
|
||||
}
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the AMDIL Implementation of the Intrinsic Info class.
|
||||
//
|
||||
//===-----------------------------------------------------------------------===//
|
||||
#ifndef _AMDIL_INTRINSICS_H_
|
||||
#define _AMDIL_INTRINSICS_H_
|
||||
|
||||
#include "llvm/Intrinsics.h"
|
||||
#include "llvm/Target/TargetIntrinsicInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class TargetMachine;
|
||||
namespace AMDGPUIntrinsic {
|
||||
enum ID {
|
||||
last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
|
||||
#define GET_INTRINSIC_ENUM_VALUES
|
||||
#include "AMDGPUGenIntrinsics.inc"
|
||||
#undef GET_INTRINSIC_ENUM_VALUES
|
||||
, num_AMDGPU_intrinsics
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
|
||||
public:
|
||||
AMDGPUIntrinsicInfo(TargetMachine *tm);
|
||||
std::string getName(unsigned int IntrId, Type **Tys = 0,
|
||||
unsigned int numTys = 0) const;
|
||||
unsigned int lookupName(const char *Name, unsigned int Len) const;
|
||||
bool isOverloaded(unsigned int IID) const;
|
||||
Function *getDeclaration(Module *M, unsigned int ID,
|
||||
Type **Tys = 0,
|
||||
unsigned int numTys = 0) const;
|
||||
}; // AMDGPUIntrinsicInfo
|
||||
}
|
||||
|
||||
#endif // _AMDIL_INTRINSICS_H_
|
||||
|
||||
|
|
@ -1,242 +0,0 @@
|
|||
//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all of the amdil-specific intrinsics
|
||||
//
|
||||
//===---------------------------------------------------------------===//
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Intrinsic classes
|
||||
// Generic versions of the above classes but for Target specific intrinsics
|
||||
// instead of SDNode patterns.
|
||||
//===--------------------------------------------------------------------===//
|
||||
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||
class VoidIntLong :
|
||||
Intrinsic<[llvm_i64_ty], [], []>;
|
||||
class VoidIntInt :
|
||||
Intrinsic<[llvm_i32_ty], [], []>;
|
||||
class VoidIntBool :
|
||||
Intrinsic<[llvm_i32_ty], [], []>;
|
||||
class UnaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class UnaryIntFloat :
|
||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class ConvertIntFTOI :
|
||||
Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
||||
class ConvertIntITOF :
|
||||
Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
|
||||
class UnaryIntNoRetInt :
|
||||
Intrinsic<[], [llvm_anyint_ty], []>;
|
||||
class UnaryIntNoRetFloat :
|
||||
Intrinsic<[], [llvm_anyfloat_ty], []>;
|
||||
class BinaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class BinaryIntFloat :
|
||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class BinaryIntNoRetInt :
|
||||
Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
|
||||
class BinaryIntNoRetFloat :
|
||||
Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
|
||||
class TernaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class TernaryIntFloat :
|
||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class QuaternaryIntInt :
|
||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
||||
LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
||||
class UnaryAtomicInt :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class BinaryAtomicInt :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class TernaryAtomicInt :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
||||
class UnaryAtomicIntNoRet :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class BinaryAtomicIntNoRet :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
class TernaryAtomicIntNoRet :
|
||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
||||
}
|
||||
|
||||
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
||||
def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
|
||||
|
||||
def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
|
||||
UnaryIntInt;
|
||||
def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
|
||||
QuaternaryIntInt;
|
||||
def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
|
||||
BinaryIntFloat;
|
||||
def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
|
||||
BinaryIntInt;
|
||||
def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
|
||||
BinaryIntFloat;
|
||||
def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
|
||||
TernaryIntInt;
|
||||
def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
|
||||
def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
|
||||
def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
|
||||
def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
|
||||
UnaryIntFloat;
|
||||
def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
|
||||
TernaryIntFloat;
|
||||
def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
|
||||
llvm_v4i32_ty, llvm_i32_ty], []>;
|
||||
|
||||
def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
|
||||
def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
|
||||
Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
|
||||
def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
|
||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
|
||||
def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
|
||||
ConvertIntFTOI;
|
||||
def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
|
||||
def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
|
||||
ConvertIntITOF;
|
||||
def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
||||
llvm_v2f32_ty, llvm_float_ty], []>;
|
||||
def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
||||
llvm_v2f32_ty], []>;
|
||||
def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], []>;
|
||||
def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
|
||||
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
||||
llvm_v4f32_ty], []>;
|
||||
}
|
||||
|
|
@ -1,71 +0,0 @@
|
|||
//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST)
|
||||
{
|
||||
std::string name = ST->getDeviceName();
|
||||
if (name == "caicos") {
|
||||
mDeviceFlag = OCL_DEVICE_CAICOS;
|
||||
} else if (name == "turks") {
|
||||
mDeviceFlag = OCL_DEVICE_TURKS;
|
||||
} else if (name == "cayman") {
|
||||
mDeviceFlag = OCL_DEVICE_CAYMAN;
|
||||
} else {
|
||||
mDeviceFlag = OCL_DEVICE_BARTS;
|
||||
}
|
||||
}
|
||||
AMDGPUNIDevice::~AMDGPUNIDevice()
|
||||
{
|
||||
}
|
||||
|
||||
size_t
|
||||
AMDGPUNIDevice::getMaxLDSSize() const
|
||||
{
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_900;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDGPUNIDevice::getGeneration() const
|
||||
{
|
||||
return AMDGPUDeviceInfo::HD6XXX;
|
||||
}
|
||||
|
||||
|
||||
AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUNIDevice(ST)
|
||||
{
|
||||
setCaps();
|
||||
}
|
||||
|
||||
AMDGPUCaymanDevice::~AMDGPUCaymanDevice()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
AMDGPUCaymanDevice::setCaps()
|
||||
{
|
||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
||||
mHWBits.set(AMDGPUDeviceInfo::FMA);
|
||||
}
|
||||
mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
|
||||
mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
|
||||
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
|
||||
}
|
||||
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===---------------------------------------------------------------------===//
|
||||
#ifndef _AMDILNIDEVICE_H_
|
||||
#define _AMDILNIDEVICE_H_
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
//===---------------------------------------------------------------------===//
|
||||
// NI generation of devices and their respective sub classes
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
// The AMDGPUNIDevice is the base class for all Northern Island series of
|
||||
// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
|
||||
// exception being differences in wavefront size and hardware capabilities. The
|
||||
// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
||||
// integer operations
|
||||
|
||||
class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUNIDevice(AMDGPUSubtarget*);
|
||||
virtual ~AMDGPUNIDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
protected:
|
||||
}; // AMDGPUNIDevice
|
||||
|
||||
// Just as the AMDGPUCypressDevice is the double capable version of the
|
||||
// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of
|
||||
// the AMDGPUNIDevice. The other major difference that is not as useful from
|
||||
// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
|
||||
// NI family is a 5 wide.
|
||||
|
||||
class AMDGPUCaymanDevice: public AMDGPUNIDevice {
|
||||
public:
|
||||
AMDGPUCaymanDevice(AMDGPUSubtarget*);
|
||||
virtual ~AMDGPUCaymanDevice();
|
||||
private:
|
||||
virtual void setCaps();
|
||||
}; // AMDGPUCaymanDevice
|
||||
|
||||
static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
|
||||
} // namespace llvm
|
||||
#endif // _AMDILNIDEVICE_H_
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,110 +0,0 @@
|
|||
//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Declarations that describe the AMDIL register file
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class AMDILReg<bits<16> num, string n> : Register<n> {
|
||||
field bits<16> Value;
|
||||
let Value = num;
|
||||
let Namespace = "AMDGPU";
|
||||
}
|
||||
|
||||
// We will start with 8 registers for each class before expanding to more
|
||||
// Since the swizzle is added based on the register class, we can leave it
|
||||
// off here and just specify different registers for different register classes
|
||||
def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
|
||||
def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
|
||||
def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
|
||||
def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
|
||||
def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
|
||||
def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
|
||||
def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
|
||||
def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
|
||||
def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
|
||||
def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
|
||||
def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
|
||||
def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
|
||||
def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
|
||||
def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
|
||||
def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
|
||||
def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
|
||||
def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
|
||||
def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
|
||||
def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
|
||||
def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
|
||||
|
||||
// All registers between 1000 and 1024 are reserved and cannot be used
|
||||
// unless commented in this section
|
||||
// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
|
||||
// r1020 is used to hold the frame index for local arrays
|
||||
// r1019 is used to hold the dynamic stack allocation pointer
|
||||
// r1018 is used as a temporary register for handwritten code
|
||||
// r1017 is used as a temporary register for handwritten code
|
||||
// r1016 is used as a temporary register for load/store code
|
||||
// r1015 is used as a temporary register for data segment offset
|
||||
// r1014 is used as a temporary register for store code
|
||||
// r1013 is used as the section data pointer register
|
||||
// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
|
||||
// r1009 is used as the frame pointer register
|
||||
// r999 is used as the mem register.
|
||||
// r998 is used as the return address register.
|
||||
//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
|
||||
//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
|
||||
//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
|
||||
//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
|
||||
//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
|
||||
//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
|
||||
def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
|
||||
def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
|
||||
def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
|
||||
def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
|
||||
def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
|
||||
def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
|
||||
def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
|
||||
def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
|
||||
def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
|
||||
def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
|
||||
def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
|
||||
def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
|
||||
def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
|
||||
def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
|
||||
def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
|
||||
def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
|
||||
def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
|
||||
def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
|
||||
def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
|
||||
def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
|
||||
def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
|
||||
def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
|
||||
def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
|
||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||
{
|
||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||
let AltOrderSelect = [{
|
||||
return 1;
|
||||
}];
|
||||
}
|
||||
def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
|
||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||
{
|
||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||
let AltOrderSelect = [{
|
||||
return 1;
|
||||
}];
|
||||
}
|
||||
def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
|
||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
||||
{
|
||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
||||
let AltOrderSelect = [{
|
||||
return 1;
|
||||
}];
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
#include "AMDILSIDevice.h"
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDILNIDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
|
||||
: AMDGPUEvergreenDevice(ST)
|
||||
{
|
||||
}
|
||||
AMDGPUSIDevice::~AMDGPUSIDevice()
|
||||
{
|
||||
}
|
||||
|
||||
size_t
|
||||
AMDGPUSIDevice::getMaxLDSSize() const
|
||||
{
|
||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
||||
return MAX_LDS_SIZE_900;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
AMDGPUSIDevice::getGeneration() const
|
||||
{
|
||||
return AMDGPUDeviceInfo::HD7XXX;
|
||||
}
|
||||
|
||||
std::string
|
||||
AMDGPUSIDevice::getDataLayout() const
|
||||
{
|
||||
return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
|
||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
"-n8:16:32:64");
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface for the subtarget data classes.
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
// This file will define the interface that each generation needs to
|
||||
// implement in order to correctly answer queries on the capabilities of the
|
||||
// specific hardware.
|
||||
//===---------------------------------------------------------------------===//
|
||||
#ifndef _AMDILSIDEVICE_H_
|
||||
#define _AMDILSIDEVICE_H_
|
||||
#include "AMDILEvergreenDevice.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
|
||||
namespace llvm {
|
||||
class AMDGPUSubtarget;
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SI generation of devices and their respective sub classes
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
// The AMDGPUSIDevice is the base class for all Northern Island series of
|
||||
// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
|
||||
// exception being differences in wavefront size and hardware capabilities. The
|
||||
// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
||||
// integer operations
|
||||
|
||||
class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
|
||||
public:
|
||||
AMDGPUSIDevice(AMDGPUSubtarget*);
|
||||
virtual ~AMDGPUSIDevice();
|
||||
virtual size_t getMaxLDSSize() const;
|
||||
virtual uint32_t getGeneration() const;
|
||||
virtual std::string getDataLayout() const;
|
||||
protected:
|
||||
}; // AMDGPUSIDevice
|
||||
|
||||
} // namespace llvm
|
||||
#endif // _AMDILSIDEVICE_H_
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//==-----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides helper macros for expanding case statements.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#ifndef AMDILUTILITYFUNCTIONS_H_
|
||||
#define AMDILUTILITYFUNCTIONS_H_
|
||||
|
||||
// Macros that are used to help with switch statements for various data types
|
||||
// However, these macro's do not return anything unlike the second set below.
|
||||
#define ExpandCaseTo32bitIntTypes(Instr) \
|
||||
case Instr##_i32:
|
||||
|
||||
#define ExpandCaseTo32bitIntTruncTypes(Instr) \
|
||||
case Instr##_i32i8: \
|
||||
case Instr##_i32i16:
|
||||
|
||||
#define ExpandCaseToIntTypes(Instr) \
|
||||
ExpandCaseTo32bitIntTypes(Instr)
|
||||
|
||||
#define ExpandCaseToIntTruncTypes(Instr) \
|
||||
ExpandCaseTo32bitIntTruncTypes(Instr)
|
||||
|
||||
#define ExpandCaseToFloatTypes(Instr) \
|
||||
case Instr##_f32:
|
||||
|
||||
#define ExpandCaseTo32bitScalarTypes(Instr) \
|
||||
ExpandCaseTo32bitIntTypes(Instr) \
|
||||
case Instr##_f32:
|
||||
|
||||
#define ExpandCaseToAllScalarTypes(Instr) \
|
||||
ExpandCaseToFloatTypes(Instr) \
|
||||
ExpandCaseToIntTypes(Instr)
|
||||
|
||||
#define ExpandCaseToAllScalarTruncTypes(Instr) \
|
||||
ExpandCaseToFloatTruncTypes(Instr) \
|
||||
ExpandCaseToIntTruncTypes(Instr)
|
||||
|
||||
#define ExpandCaseToAllTypes(Instr) \
|
||||
ExpandCaseToAllScalarTypes(Instr)
|
||||
|
||||
#define ExpandCaseToAllTruncTypes(Instr) \
|
||||
ExpandCaseToAllScalarTruncTypes(Instr)
|
||||
|
||||
// Macros that expand into statements with return values
|
||||
#define ExpandCaseTo32bitIntReturn(Instr, Return) \
|
||||
case Instr##_i32: return Return##_i32;
|
||||
|
||||
#define ExpandCaseToIntReturn(Instr, Return) \
|
||||
ExpandCaseTo32bitIntReturn(Instr, Return)
|
||||
|
||||
#define ExpandCaseToFloatReturn(Instr, Return) \
|
||||
case Instr##_f32: return Return##_f32;\
|
||||
|
||||
#define ExpandCaseToAllScalarReturn(Instr, Return) \
|
||||
ExpandCaseToFloatReturn(Instr, Return) \
|
||||
ExpandCaseToIntReturn(Instr, Return)
|
||||
|
||||
// These macros expand to common groupings of RegClass ID's
|
||||
#define ExpandCaseTo1CompRegID \
|
||||
case AMDGPU::GPRI32RegClassID: \
|
||||
case AMDGPU::GPRF32RegClassID:
|
||||
|
||||
#define ExpandCaseTo32BitType(Instr) \
|
||||
case Instr##_i32: \
|
||||
case Instr##_f32:
|
||||
|
||||
#endif // AMDILUTILITYFUNCTIONS_H_
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
|
||||
#include "AMDGPUInstPrinter.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
|
||||
StringRef Annot) {
|
||||
printInstruction(MI, OS);
|
||||
|
||||
printAnnotation(OS, Annot);
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
|
||||
const MCOperand &Op = MI->getOperand(OpNo);
|
||||
if (Op.isReg()) {
|
||||
O << getRegisterName(Op.getReg());
|
||||
} else if (Op.isImm()) {
|
||||
O << Op.getImm();
|
||||
} else if (Op.isFPImm()) {
|
||||
O << Op.getFPImm();
|
||||
} else {
|
||||
assert(!"unknown operand type in printOperand");
|
||||
}
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
printOperand(MI, OpNo, O);
|
||||
}
|
||||
|
||||
#include "AMDGPUGenAsmWriter.inc"
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
|
||||
#ifndef AMDGPUINSTPRINTER_H
|
||||
#define AMDGPUINSTPRINTER_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUInstPrinter : public MCInstPrinter {
|
||||
public:
|
||||
AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI)
|
||||
: MCInstPrinter(MAI, MII, MRI) {}
|
||||
|
||||
//Autogenerated by tblgen
|
||||
void printInstruction(const MCInst *MI, raw_ostream &O);
|
||||
static const char *getRegisterName(unsigned RegNo);
|
||||
|
||||
// virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
|
||||
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
|
||||
|
||||
private:
|
||||
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
// void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
|
||||
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPUINSTRPRINTER_H
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
==============================================================================
|
||||
LLVM Release License
|
||||
==============================================================================
|
||||
University of Illinois/NCSA
|
||||
Open Source License
|
||||
|
||||
Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
|
||||
All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
||||
LLVM Team
|
||||
|
||||
University of Illinois at Urbana-Champaign
|
||||
|
||||
http://llvm.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal with
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimers.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimers in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the names of the LLVM Team, University of Illinois at
|
||||
Urbana-Champaign, nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this Software without specific
|
||||
prior written permission.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
#include "llvm/MC/MCAsmBackend.h"
|
||||
#include "llvm/MC/MCAssembler.h"
|
||||
#include "llvm/MC/MCObjectWriter.h"
|
||||
#include "llvm/MC/MCValue.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class AMDGPUMCObjectWriter : public MCObjectWriter {
|
||||
public:
|
||||
AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
|
||||
virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout) {
|
||||
//XXX: Implement if necessary.
|
||||
}
|
||||
virtual void RecordRelocation(const MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout,
|
||||
const MCFragment *Fragment,
|
||||
const MCFixup &Fixup,
|
||||
MCValue Target, uint64_t &FixedValue) {
|
||||
assert(!"Not implemented");
|
||||
}
|
||||
|
||||
virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
|
||||
|
||||
};
|
||||
|
||||
class AMDGPUAsmBackend : public MCAsmBackend {
|
||||
public:
|
||||
AMDGPUAsmBackend(const Target &T)
|
||||
: MCAsmBackend() {}
|
||||
|
||||
virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
|
||||
virtual unsigned getNumFixupKinds() const { return 0; };
|
||||
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
|
||||
uint64_t Value) const { assert(!"Not implemented"); }
|
||||
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
|
||||
const MCInstFragment *DF,
|
||||
const MCAsmLayout &Layout) const {
|
||||
return false;
|
||||
}
|
||||
virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
|
||||
assert(!"Not implemented");
|
||||
}
|
||||
virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
|
||||
virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
} //End anonymous namespace
|
||||
|
||||
void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
|
||||
const MCAsmLayout &Layout) {
|
||||
for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
|
||||
Asm.writeSectionData(I, Layout);
|
||||
}
|
||||
}
|
||||
|
||||
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) {
|
||||
return new AMDGPUAsmBackend(T);
|
||||
}
|
||||
|
||||
AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
|
||||
raw_ostream &OS) const {
|
||||
return new AMDGPUMCObjectWriter(OS);
|
||||
}
|
||||
|
|
@ -1,96 +0,0 @@
|
|||
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUMCAsmInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
|
||||
{
|
||||
HasSingleParameterDotFile = false;
|
||||
WeakDefDirective = 0;
|
||||
//===------------------------------------------------------------------===//
|
||||
HasSubsectionsViaSymbols = true;
|
||||
HasMachoZeroFillDirective = false;
|
||||
HasMachoTBSSDirective = false;
|
||||
HasStaticCtorDtorReferenceInStaticMode = false;
|
||||
LinkerRequiresNonEmptyDwarfLines = true;
|
||||
MaxInstLength = 16;
|
||||
PCSymbol = "$";
|
||||
SeparatorString = "\n";
|
||||
CommentColumn = 40;
|
||||
CommentString = ";";
|
||||
LabelSuffix = ":";
|
||||
GlobalPrefix = "@";
|
||||
PrivateGlobalPrefix = ";.";
|
||||
LinkerPrivateGlobalPrefix = "!";
|
||||
InlineAsmStart = ";#ASMSTART";
|
||||
InlineAsmEnd = ";#ASMEND";
|
||||
AssemblerDialect = 0;
|
||||
AllowQuotesInName = false;
|
||||
AllowNameToStartWithDigit = false;
|
||||
AllowPeriodsInName = false;
|
||||
|
||||
//===--- Data Emission Directives -------------------------------------===//
|
||||
ZeroDirective = ".zero";
|
||||
AsciiDirective = ".ascii\t";
|
||||
AscizDirective = ".asciz\t";
|
||||
Data8bitsDirective = ".byte\t";
|
||||
Data16bitsDirective = ".short\t";
|
||||
Data32bitsDirective = ".long\t";
|
||||
Data64bitsDirective = ".quad\t";
|
||||
GPRel32Directive = 0;
|
||||
SunStyleELFSectionSwitchSyntax = true;
|
||||
UsesELFSectionDirectiveForBSS = true;
|
||||
HasMicrosoftFastStdCallMangling = false;
|
||||
|
||||
//===--- Alignment Information ----------------------------------------===//
|
||||
AlignDirective = ".align\t";
|
||||
AlignmentIsInBytes = true;
|
||||
TextAlignFillValue = 0;
|
||||
|
||||
//===--- Global Variable Emission Directives --------------------------===//
|
||||
GlobalDirective = ".global";
|
||||
ExternDirective = ".extern";
|
||||
HasSetDirective = false;
|
||||
HasAggressiveSymbolFolding = true;
|
||||
LCOMMDirectiveType = LCOMM::None;
|
||||
COMMDirectiveAlignmentIsInBytes = false;
|
||||
HasDotTypeDotSizeDirective = false;
|
||||
HasNoDeadStrip = true;
|
||||
HasSymbolResolver = false;
|
||||
WeakRefDirective = ".weakref\t";
|
||||
LinkOnceDirective = 0;
|
||||
//===--- Dwarf Emission Directives -----------------------------------===//
|
||||
HasLEB128 = true;
|
||||
SupportsDebugInformation = true;
|
||||
ExceptionsType = ExceptionHandling::None;
|
||||
DwarfUsesInlineInfoSection = false;
|
||||
DwarfSectionOffsetDirective = ".offset";
|
||||
DwarfUsesLabelOffsetForRanges = true;
|
||||
|
||||
//===--- CBE Asm Translation Table -----------------------------------===//
|
||||
AsmTransCBE = 0;
|
||||
}
|
||||
const char*
|
||||
AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
|
||||
{
|
||||
switch (AS) {
|
||||
default:
|
||||
return 0;
|
||||
case 0:
|
||||
return 0;
|
||||
};
|
||||
return 0;
|
||||
}
|
||||
|
||||
const MCSection*
|
||||
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// TODO: Add full description
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUMCASMINFO_H_
|
||||
#define AMDGPUMCASMINFO_H_
|
||||
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
namespace llvm {
|
||||
class Target;
|
||||
class StringRef;
|
||||
|
||||
class AMDGPUMCAsmInfo : public MCAsmInfo {
|
||||
public:
|
||||
explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
|
||||
const char*
|
||||
getDataASDirective(unsigned int Size, unsigned int AS) const;
|
||||
const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
|
||||
};
|
||||
} // namespace llvm
|
||||
#endif // AMDGPUMCASMINFO_H_
|
||||
|
|
@ -1,59 +0,0 @@
|
|||
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// CodeEmitter interface for R600 and SI codegen.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef AMDGPUCODEEMITTER_H
|
||||
#define AMDGPUCODEEMITTER_H
|
||||
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCInst;
|
||||
class MCOperand;
|
||||
|
||||
class AMDGPUMCCodeEmitter : public MCCodeEmitter {
|
||||
public:
|
||||
|
||||
uint64_t getBinaryCodeForInstr(const MCInst &MI,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
||||
|
||||
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
return 0;
|
||||
}
|
||||
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
return 0;
|
||||
}
|
||||
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
|
||||
return Value;
|
||||
}
|
||||
virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
return 0;
|
||||
}
|
||||
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDGPUCODEEMITTER_H
|
||||
|
|
@ -1,111 +0,0 @@
|
|||
//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides AMDGPU specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPUMCTargetDesc.h"
|
||||
#include "AMDGPUMCAsmInfo.h"
|
||||
#include "InstPrinter/AMDGPUInstPrinter.h"
|
||||
#include "llvm/MC/MachineLocation.h"
|
||||
#include "llvm/MC/MCCodeGenInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_INSTRINFO_MC_DESC
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_MC_DESC
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static MCInstrInfo *createAMDGPUMCInstrInfo() {
|
||||
MCInstrInfo *X = new MCInstrInfo();
|
||||
InitAMDGPUMCInstrInfo(X);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
|
||||
MCRegisterInfo *X = new MCRegisterInfo();
|
||||
InitAMDGPUMCRegisterInfo(X, 0);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
|
||||
StringRef FS) {
|
||||
MCSubtargetInfo * X = new MCSubtargetInfo();
|
||||
InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL) {
|
||||
MCCodeGenInfo *X = new MCCodeGenInfo();
|
||||
X->InitMCCodeGenInfo(RM, CM, OL);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
|
||||
unsigned SyntaxVariant,
|
||||
const MCAsmInfo &MAI,
|
||||
const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCSubtargetInfo &STI) {
|
||||
return new AMDGPUInstPrinter(MAI, MII, MRI);
|
||||
}
|
||||
|
||||
static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx) {
|
||||
if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
|
||||
return createSIMCCodeEmitter(MCII, STI, Ctx);
|
||||
} else {
|
||||
return createR600MCCodeEmitter(MCII, STI, Ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
|
||||
MCContext &Ctx, MCAsmBackend &MAB,
|
||||
raw_ostream &_OS,
|
||||
MCCodeEmitter *_Emitter,
|
||||
bool RelaxAll,
|
||||
bool NoExecStack) {
|
||||
return createPureStreamer(Ctx, MAB, _OS, _Emitter);
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializeAMDGPUTargetMC() {
|
||||
|
||||
RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
|
||||
|
||||
TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
|
||||
|
||||
TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
|
||||
|
||||
TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
|
||||
|
||||
TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
|
||||
|
||||
TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
|
||||
|
||||
TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
|
||||
|
||||
TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
|
||||
|
||||
TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
|
||||
}
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides AMDGPU specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
|
||||
#ifndef AMDGPUMCTARGETDESC_H
|
||||
#define AMDGPUMCTARGETDESC_H
|
||||
|
||||
#include "llvm/ADT/StringRef.h"
|
||||
|
||||
namespace llvm {
|
||||
class MCAsmBackend;
|
||||
class MCCodeEmitter;
|
||||
class MCContext;
|
||||
class MCInstrInfo;
|
||||
class MCRegisterInfo;
|
||||
class MCSubtargetInfo;
|
||||
class Target;
|
||||
|
||||
extern Target TheAMDGPUTarget;
|
||||
|
||||
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx);
|
||||
|
||||
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT);
|
||||
} // End llvm namespace
|
||||
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "AMDGPUGenRegisterInfo.inc"
|
||||
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "AMDGPUGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "AMDGPUGenSubtargetInfo.inc"
|
||||
|
||||
#endif // AMDGPUMCTARGETDESC_H
|
||||
|
|
@ -1,727 +0,0 @@
|
|||
//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This code emitters outputs bytecode that is understood by the r600g driver
|
||||
// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
|
||||
// except that the size of the instruction fields are rounded up to the
|
||||
// nearest byte.
|
||||
//
|
||||
// [1] http://www.mesa3d.org/
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600Defines.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define SRC_BYTE_COUNT 11
|
||||
#define DST_BYTE_COUNT 5
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
|
||||
R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
|
||||
void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
|
||||
const MCInstrInfo &MCII;
|
||||
const MCSubtargetInfo &STI;
|
||||
MCContext &Ctx;
|
||||
|
||||
public:
|
||||
|
||||
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
|
||||
MCContext &ctx)
|
||||
: MCII(mcii), STI(sti), Ctx(ctx) { }
|
||||
|
||||
/// EncodeInstruction - Encode the instruction and write it to the OS.
|
||||
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
||||
|
||||
/// getMachineOpValue - Reutrn the encoding for an MCOperand.
|
||||
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
||||
private:
|
||||
|
||||
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
|
||||
raw_ostream &OS) const;
|
||||
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
|
||||
void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
|
||||
raw_ostream &OS) const;
|
||||
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
|
||||
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
|
||||
raw_ostream &OS) const;
|
||||
void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
|
||||
|
||||
void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
|
||||
|
||||
void EmitByte(unsigned int byte, raw_ostream &OS) const;
|
||||
|
||||
void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
|
||||
|
||||
void Emit(uint32_t value, raw_ostream &OS) const;
|
||||
void Emit(uint64_t value, raw_ostream &OS) const;
|
||||
|
||||
unsigned getHWRegIndex(unsigned reg) const;
|
||||
unsigned getHWRegChan(unsigned reg) const;
|
||||
unsigned getHWReg(unsigned regNo) const;
|
||||
|
||||
bool isFCOp(unsigned opcode) const;
|
||||
bool isTexOp(unsigned opcode) const;
|
||||
bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
|
||||
|
||||
/// getHWRegIndexGen - Get the register's hardware index. Implemented in
|
||||
/// R600HwRegInfo.include.
|
||||
unsigned getHWRegIndexGen(unsigned int Reg) const;
|
||||
|
||||
/// getHWRegChanGen - Get the register's channel. Implemented in
|
||||
/// R600HwRegInfo.include.
|
||||
unsigned getHWRegChanGen(unsigned int Reg) const;
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
enum RegElement {
|
||||
ELEMENT_X = 0,
|
||||
ELEMENT_Y,
|
||||
ELEMENT_Z,
|
||||
ELEMENT_W
|
||||
};
|
||||
|
||||
enum InstrTypes {
|
||||
INSTR_ALU = 0,
|
||||
INSTR_TEX,
|
||||
INSTR_FC,
|
||||
INSTR_NATIVE,
|
||||
INSTR_VTX
|
||||
};
|
||||
|
||||
enum FCInstr {
|
||||
FC_IF = 0,
|
||||
FC_IF_INT,
|
||||
FC_ELSE,
|
||||
FC_ENDIF,
|
||||
FC_BGNLOOP,
|
||||
FC_ENDLOOP,
|
||||
FC_BREAK,
|
||||
FC_BREAK_NZ_INT,
|
||||
FC_CONTINUE,
|
||||
FC_BREAK_Z_INT,
|
||||
FC_BREAK_NZ
|
||||
};
|
||||
|
||||
enum TextureTypes {
|
||||
TEXTURE_1D = 1,
|
||||
TEXTURE_2D,
|
||||
TEXTURE_3D,
|
||||
TEXTURE_CUBE,
|
||||
TEXTURE_RECT,
|
||||
TEXTURE_SHADOW1D,
|
||||
TEXTURE_SHADOW2D,
|
||||
TEXTURE_SHADOWRECT,
|
||||
TEXTURE_1D_ARRAY,
|
||||
TEXTURE_2D_ARRAY,
|
||||
TEXTURE_SHADOW1D_ARRAY,
|
||||
TEXTURE_SHADOW2D_ARRAY
|
||||
};
|
||||
|
||||
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx) {
|
||||
return new R600MCCodeEmitter(MCII, STI, Ctx);
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
if (isTexOp(MI.getOpcode())) {
|
||||
EmitTexInstr(MI, Fixups, OS);
|
||||
} else if (isFCOp(MI.getOpcode())){
|
||||
EmitFCInstr(MI, OS);
|
||||
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
||||
MI.getOpcode() == AMDGPU::BUNDLE ||
|
||||
MI.getOpcode() == AMDGPU::KILL) {
|
||||
return;
|
||||
} else {
|
||||
switch(MI.getOpcode()) {
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
|
||||
{
|
||||
uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
|
||||
EmitByte(INSTR_NATIVE, OS);
|
||||
Emit(inst, OS);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::CONSTANT_LOAD_eg:
|
||||
case AMDGPU::VTX_READ_PARAM_i32_eg:
|
||||
case AMDGPU::VTX_READ_PARAM_f32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_i8_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_i32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_f32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
|
||||
case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
|
||||
{
|
||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
|
||||
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
||||
|
||||
EmitByte(INSTR_VTX, OS);
|
||||
Emit(InstWord01, OS);
|
||||
Emit(InstWord2, OS);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
EmitALUInstr(MI, Fixups, OS);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
raw_ostream &OS) const {
|
||||
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
|
||||
unsigned NumOperands = MI.getNumOperands();
|
||||
|
||||
if(MCDesc.findFirstPredOperandIdx() > -1)
|
||||
NumOperands--;
|
||||
|
||||
if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0)
|
||||
NumOperands--;
|
||||
|
||||
if(MI.getOpcode() == AMDGPU::PRED_X)
|
||||
NumOperands = 2;
|
||||
|
||||
// XXX Check if instruction writes a result
|
||||
if (NumOperands < 1) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit instruction type
|
||||
EmitByte(INSTR_ALU, OS);
|
||||
|
||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
|
||||
|
||||
//older alu have different encoding for instructions with one or two src
|
||||
//parameters.
|
||||
if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
|
||||
!(MCDesc.TSFlags & R600_InstFlag::OP3)) {
|
||||
uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
|
||||
InstWord01 &= ~(0x3FFULL << 39);
|
||||
InstWord01 |= ISAOpCode << 1;
|
||||
}
|
||||
|
||||
unsigned int OpIndex;
|
||||
for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) {
|
||||
// Literal constants are always stored as the last operand.
|
||||
if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) {
|
||||
break;
|
||||
}
|
||||
EmitSrcISA(MI, OpIndex, InstWord01, OS);
|
||||
}
|
||||
|
||||
// Emit zeros for unused sources
|
||||
for ( ; OpIndex < 4; OpIndex++) {
|
||||
EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
|
||||
}
|
||||
|
||||
// Emit destination register
|
||||
const MCOperand &dstOp = MI.getOperand(0);
|
||||
if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) {
|
||||
//element of destination register
|
||||
InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61;
|
||||
|
||||
// isClamped
|
||||
if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
|
||||
InstWord01 |= 1ULL << 63;
|
||||
}
|
||||
|
||||
// write mask
|
||||
if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) {
|
||||
InstWord01 |= 1ULL << 36;
|
||||
}
|
||||
|
||||
// XXX: Emit relative addressing mode
|
||||
}
|
||||
|
||||
// Emit ALU
|
||||
|
||||
// Emit IsLast (for this instruction group) (1 byte)
|
||||
if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
|
||||
InstWord01 |= 1ULL << 31;
|
||||
}
|
||||
|
||||
// XXX: Emit push modifier
|
||||
if(isFlagSet(MI, 1, MO_FLAG_PUSH)) {
|
||||
InstWord01 |= 1ULL << 34;
|
||||
}
|
||||
|
||||
// XXX: Emit predicate (1 byte)
|
||||
int PredIdx = MCDesc.findFirstPredOperandIdx();
|
||||
if (PredIdx != -1) {
|
||||
switch(MI.getOperand(PredIdx).getReg()) {
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
InstWord01 |= 2ULL << 29;
|
||||
break;
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
InstWord01 |= 3ULL << 29;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//XXX: predicate
|
||||
//XXX: bank swizzle
|
||||
//XXX: OMOD
|
||||
//XXX: index mode
|
||||
|
||||
Emit(InstWord01, OS);
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
|
||||
raw_ostream &OS) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} Value;
|
||||
Value.i = 0;
|
||||
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
||||
// For other potential instruction operands, (e.g. constant registers) the
|
||||
// value of the source select is defined in the r600isa docs.
|
||||
if (MO.isReg()) {
|
||||
unsigned reg = MO.getReg();
|
||||
EmitTwoBytes(getHWReg(reg), OS);
|
||||
if (reg == AMDGPU::ALU_LITERAL_X) {
|
||||
unsigned ImmOpIndex = MI.getNumOperands() - 1;
|
||||
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
|
||||
if (ImmOp.isFPImm()) {
|
||||
Value.f = ImmOp.getFPImm();
|
||||
} else {
|
||||
assert(ImmOp.isImm());
|
||||
Value.i = ImmOp.getImm();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// XXX: Handle other operand types.
|
||||
EmitTwoBytes(0, OS);
|
||||
}
|
||||
|
||||
// Emit the source channel (1 byte)
|
||||
if (MO.isReg()) {
|
||||
EmitByte(getHWRegChan(MO.getReg()), OS);
|
||||
} else {
|
||||
EmitByte(0, OS);
|
||||
}
|
||||
|
||||
// XXX: Emit isNegated (1 byte)
|
||||
if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
|
||||
&& (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
|
||||
(MO.isReg() &&
|
||||
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
|
||||
EmitByte(1, OS);
|
||||
} else {
|
||||
EmitByte(0, OS);
|
||||
}
|
||||
|
||||
// Emit isAbsolute (1 byte)
|
||||
if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
|
||||
EmitByte(1, OS);
|
||||
} else {
|
||||
EmitByte(0, OS);
|
||||
}
|
||||
|
||||
// XXX: Emit relative addressing mode (1 byte)
|
||||
EmitByte(0, OS);
|
||||
|
||||
// Emit kc_bank, This will be adjusted later by r600_asm
|
||||
EmitByte(0, OS);
|
||||
|
||||
// Emit the literal value, if applicable (4 bytes).
|
||||
Emit(Value.i, OS);
|
||||
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
|
||||
uint64_t &Value, raw_ostream &OS) const {
|
||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} InlineConstant;
|
||||
InlineConstant.i = 0;
|
||||
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
||||
// For other potential instruction operands, (e.g. constant registers) the
|
||||
// value of the source select is defined in the r600isa docs.
|
||||
if (MO.isReg()) {
|
||||
unsigned Reg = MO.getReg();
|
||||
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
|
||||
EmitByte(1, OS);
|
||||
} else {
|
||||
EmitByte(0, OS);
|
||||
}
|
||||
|
||||
if (Reg == AMDGPU::ALU_LITERAL_X) {
|
||||
unsigned ImmOpIndex = MI.getNumOperands() - 1;
|
||||
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
|
||||
if (ImmOp.isFPImm()) {
|
||||
InlineConstant.f = ImmOp.getFPImm();
|
||||
} else {
|
||||
assert(ImmOp.isImm());
|
||||
InlineConstant.i = ImmOp.getImm();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// XXX: Handle other operand types.
|
||||
EmitTwoBytes(0, OS);
|
||||
}
|
||||
|
||||
// source channel
|
||||
uint64_t sourceChannelValue = getHWRegChan(MO.getReg());
|
||||
if (OpIdx == 1)
|
||||
Value |= sourceChannelValue << 10;
|
||||
if (OpIdx == 2)
|
||||
Value |= sourceChannelValue << 23;
|
||||
if (OpIdx == 3)
|
||||
Value |= sourceChannelValue << 42;
|
||||
|
||||
// isNegated
|
||||
if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
|
||||
&& (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
|
||||
(MO.isReg() &&
|
||||
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
|
||||
if (OpIdx == 1)
|
||||
Value |= 1ULL << 12;
|
||||
else if (OpIdx == 2)
|
||||
Value |= 1ULL << 25;
|
||||
else if (OpIdx == 3)
|
||||
Value |= 1ULL << 44;
|
||||
}
|
||||
|
||||
// isAbsolute
|
||||
if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
|
||||
assert(OpIdx < 3);
|
||||
Value |= 1ULL << (32+OpIdx-1);
|
||||
}
|
||||
|
||||
// XXX: relative addressing mode
|
||||
// XXX: kc_bank
|
||||
|
||||
// Emit the literal value, if applicable (4 bytes).
|
||||
Emit(InlineConstant.i, OS);
|
||||
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
|
||||
SmallVectorImpl<MCFixup> &Fixups,
|
||||
raw_ostream &OS) const {
|
||||
|
||||
unsigned opcode = MI.getOpcode();
|
||||
bool hasOffsets = (opcode == AMDGPU::TEX_LD);
|
||||
unsigned op_offset = hasOffsets ? 3 : 0;
|
||||
int64_t sampler = MI.getOperand(op_offset+2).getImm();
|
||||
int64_t textureType = MI.getOperand(op_offset+3).getImm();
|
||||
unsigned srcSelect[4] = {0, 1, 2, 3};
|
||||
|
||||
// Emit instruction type
|
||||
EmitByte(1, OS);
|
||||
|
||||
// Emit instruction
|
||||
EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
|
||||
|
||||
// XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
|
||||
EmitByte(sampler + 1 + 1, OS);
|
||||
|
||||
// Emit source register
|
||||
EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
|
||||
|
||||
// XXX: Emit src isRelativeAddress
|
||||
EmitByte(0, OS);
|
||||
|
||||
// Emit destination register
|
||||
EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
|
||||
|
||||
// XXX: Emit dst isRealtiveAddress
|
||||
EmitByte(0, OS);
|
||||
|
||||
// XXX: Emit dst select
|
||||
EmitByte(0, OS); // X
|
||||
EmitByte(1, OS); // Y
|
||||
EmitByte(2, OS); // Z
|
||||
EmitByte(3, OS); // W
|
||||
|
||||
// XXX: Emit lod bias
|
||||
EmitByte(0, OS);
|
||||
|
||||
// XXX: Emit coord types
|
||||
unsigned coordType[4] = {1, 1, 1, 1};
|
||||
|
||||
if (textureType == TEXTURE_RECT
|
||||
|| textureType == TEXTURE_SHADOWRECT) {
|
||||
coordType[ELEMENT_X] = 0;
|
||||
coordType[ELEMENT_Y] = 0;
|
||||
}
|
||||
|
||||
if (textureType == TEXTURE_1D_ARRAY
|
||||
|| textureType == TEXTURE_SHADOW1D_ARRAY) {
|
||||
if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
|
||||
coordType[ELEMENT_Y] = 0;
|
||||
} else {
|
||||
coordType[ELEMENT_Z] = 0;
|
||||
srcSelect[ELEMENT_Z] = ELEMENT_Y;
|
||||
}
|
||||
} else if (textureType == TEXTURE_2D_ARRAY
|
||||
|| textureType == TEXTURE_SHADOW2D_ARRAY) {
|
||||
coordType[ELEMENT_Z] = 0;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
EmitByte(coordType[i], OS);
|
||||
}
|
||||
|
||||
// XXX: Emit offsets
|
||||
if (hasOffsets)
|
||||
for (unsigned i = 2; i < 5; i++)
|
||||
EmitByte(MI.getOperand(i).getImm()<<1, OS);
|
||||
else
|
||||
EmitNullBytes(3, OS);
|
||||
|
||||
// Emit sampler id
|
||||
EmitByte(sampler, OS);
|
||||
|
||||
// XXX:Emit source select
|
||||
if ((textureType == TEXTURE_SHADOW1D
|
||||
|| textureType == TEXTURE_SHADOW2D
|
||||
|| textureType == TEXTURE_SHADOWRECT
|
||||
|| textureType == TEXTURE_SHADOW1D_ARRAY)
|
||||
&& opcode != AMDGPU::TEX_SAMPLE_C_L
|
||||
&& opcode != AMDGPU::TEX_SAMPLE_C_LB) {
|
||||
srcSelect[ELEMENT_W] = ELEMENT_Z;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
EmitByte(srcSelect[i], OS);
|
||||
}
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
|
||||
|
||||
// Emit instruction type
|
||||
EmitByte(INSTR_FC, OS);
|
||||
|
||||
// Emit SRC
|
||||
unsigned NumOperands = MI.getNumOperands();
|
||||
if (NumOperands > 0) {
|
||||
assert(NumOperands == 1);
|
||||
EmitSrc(MI, 0, OS);
|
||||
} else {
|
||||
EmitNullBytes(SRC_BYTE_COUNT, OS);
|
||||
}
|
||||
|
||||
// Emit FC Instruction
|
||||
enum FCInstr instr;
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::BREAK_LOGICALZ_f32:
|
||||
instr = FC_BREAK;
|
||||
break;
|
||||
case AMDGPU::BREAK_LOGICALNZ_f32:
|
||||
instr = FC_BREAK_NZ;
|
||||
break;
|
||||
case AMDGPU::BREAK_LOGICALNZ_i32:
|
||||
instr = FC_BREAK_NZ_INT;
|
||||
break;
|
||||
case AMDGPU::BREAK_LOGICALZ_i32:
|
||||
instr = FC_BREAK_Z_INT;
|
||||
break;
|
||||
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
||||
case AMDGPU::CONTINUE_LOGICALNZ_i32:
|
||||
instr = FC_CONTINUE;
|
||||
break;
|
||||
case AMDGPU::IF_LOGICALNZ_f32:
|
||||
instr = FC_IF;
|
||||
case AMDGPU::IF_LOGICALNZ_i32:
|
||||
instr = FC_IF_INT;
|
||||
break;
|
||||
case AMDGPU::IF_LOGICALZ_f32:
|
||||
abort();
|
||||
break;
|
||||
case AMDGPU::ELSE:
|
||||
instr = FC_ELSE;
|
||||
break;
|
||||
case AMDGPU::ENDIF:
|
||||
instr = FC_ENDIF;
|
||||
break;
|
||||
case AMDGPU::ENDLOOP:
|
||||
instr = FC_ENDLOOP;
|
||||
break;
|
||||
case AMDGPU::WHILELOOP:
|
||||
instr = FC_BGNLOOP;
|
||||
break;
|
||||
default:
|
||||
abort();
|
||||
break;
|
||||
}
|
||||
EmitByte(instr, OS);
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
|
||||
raw_ostream &OS) const {
|
||||
|
||||
for (unsigned int i = 0; i < ByteCount; i++) {
|
||||
EmitByte(0, OS);
|
||||
}
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
|
||||
OS.write((uint8_t) Byte & 0xff);
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
|
||||
raw_ostream &OS) const {
|
||||
OS.write((uint8_t) (Bytes & 0xff));
|
||||
OS.write((uint8_t) ((Bytes >> 8) & 0xff));
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
|
||||
}
|
||||
}
|
||||
|
||||
void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
EmitByte((Value >> (8 * i)) & 0xff, OS);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600MCCodeEmitter::getHWRegIndex(unsigned reg) const {
|
||||
switch(reg) {
|
||||
case AMDGPU::ZERO: return 248;
|
||||
case AMDGPU::ONE:
|
||||
case AMDGPU::NEG_ONE: return 249;
|
||||
case AMDGPU::ONE_INT: return 250;
|
||||
case AMDGPU::HALF:
|
||||
case AMDGPU::NEG_HALF: return 252;
|
||||
case AMDGPU::ALU_LITERAL_X: return 253;
|
||||
case AMDGPU::PREDICATE_BIT:
|
||||
case AMDGPU::PRED_SEL_OFF:
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
return 0;
|
||||
default: return getHWRegIndexGen(reg);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
|
||||
switch(reg) {
|
||||
case AMDGPU::ZERO:
|
||||
case AMDGPU::ONE:
|
||||
case AMDGPU::ONE_INT:
|
||||
case AMDGPU::NEG_ONE:
|
||||
case AMDGPU::HALF:
|
||||
case AMDGPU::NEG_HALF:
|
||||
case AMDGPU::ALU_LITERAL_X:
|
||||
case AMDGPU::PREDICATE_BIT:
|
||||
case AMDGPU::PRED_SEL_OFF:
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
return 0;
|
||||
default: return getHWRegChanGen(reg);
|
||||
}
|
||||
}
|
||||
unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
|
||||
unsigned HWReg;
|
||||
|
||||
HWReg = getHWRegIndex(RegNo);
|
||||
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) {
|
||||
HWReg += 512;
|
||||
}
|
||||
return HWReg;
|
||||
}
|
||||
|
||||
uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
||||
const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
||||
if (MO.isReg()) {
|
||||
return getHWRegIndex(MO.getReg());
|
||||
} else if (MO.isImm()) {
|
||||
return MO.getImm();
|
||||
} else {
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Encoding helper functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::BREAK_LOGICALZ_f32:
|
||||
case AMDGPU::BREAK_LOGICALNZ_i32:
|
||||
case AMDGPU::BREAK_LOGICALZ_i32:
|
||||
case AMDGPU::BREAK_LOGICALNZ_f32:
|
||||
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
||||
case AMDGPU::IF_LOGICALNZ_i32:
|
||||
case AMDGPU::IF_LOGICALZ_f32:
|
||||
case AMDGPU::ELSE:
|
||||
case AMDGPU::ENDIF:
|
||||
case AMDGPU::ENDLOOP:
|
||||
case AMDGPU::IF_LOGICALNZ_f32:
|
||||
case AMDGPU::WHILELOOP:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
|
||||
switch(opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::TEX_LD:
|
||||
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
|
||||
case AMDGPU::TEX_SAMPLE:
|
||||
case AMDGPU::TEX_SAMPLE_C:
|
||||
case AMDGPU::TEX_SAMPLE_L:
|
||||
case AMDGPU::TEX_SAMPLE_C_L:
|
||||
case AMDGPU::TEX_SAMPLE_LB:
|
||||
case AMDGPU::TEX_SAMPLE_C_LB:
|
||||
case AMDGPU::TEX_SAMPLE_G:
|
||||
case AMDGPU::TEX_SAMPLE_C_G:
|
||||
case AMDGPU::TEX_GET_GRADIENTS_H:
|
||||
case AMDGPU::TEX_GET_GRADIENTS_V:
|
||||
case AMDGPU::TEX_SET_GRADIENTS_H:
|
||||
case AMDGPU::TEX_SET_GRADIENTS_V:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
|
||||
unsigned Flag) const {
|
||||
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
|
||||
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
|
||||
if (FlagIndex == 0) {
|
||||
return false;
|
||||
}
|
||||
assert(MI.getOperand(FlagIndex).isImm());
|
||||
return !!((MI.getOperand(FlagIndex).getImm() >>
|
||||
(NUM_MO_FLAGS * Operand)) & Flag);
|
||||
}
|
||||
#define R600RegisterInfo R600MCCodeEmitter
|
||||
#include "R600HwRegInfo.include"
|
||||
#undef R600RegisterInfo
|
||||
|
||||
#include "AMDGPUGenMCCodeEmitter.inc"
|
||||
|
|
@ -1,296 +0,0 @@
|
|||
//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The SI code emitter produces machine code that can be executed directly on
|
||||
// the GPU device.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
|
||||
#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
|
||||
|
||||
// These must be kept in sync with SIInstructions.td and also the
|
||||
// InstrEncodingInfo array in SIInstrInfo.cpp.
|
||||
//
|
||||
// NOTE: This enum is only used to identify the encoding type within LLVM,
|
||||
// the actual encoding type that is part of the instruction format is different
|
||||
namespace SIInstrEncodingType {
|
||||
enum Encoding {
|
||||
EXP = 0,
|
||||
LDS = 1,
|
||||
MIMG = 2,
|
||||
MTBUF = 3,
|
||||
MUBUF = 4,
|
||||
SMRD = 5,
|
||||
SOP1 = 6,
|
||||
SOP2 = 7,
|
||||
SOPC = 8,
|
||||
SOPK = 9,
|
||||
SOPP = 10,
|
||||
VINTRP = 11,
|
||||
VOP1 = 12,
|
||||
VOP2 = 13,
|
||||
VOP3 = 14,
|
||||
VOPC = 15
|
||||
};
|
||||
}
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
||||
SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
||||
void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
||||
const MCInstrInfo &MCII;
|
||||
const MCSubtargetInfo &STI;
|
||||
MCContext &Ctx;
|
||||
|
||||
public:
|
||||
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
|
||||
MCContext &ctx)
|
||||
: MCII(mcii), STI(sti), Ctx(ctx) { }
|
||||
|
||||
~SIMCCodeEmitter() { }
|
||||
|
||||
/// EncodeInstruction - Encode the instruction and write it to the OS.
|
||||
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
||||
|
||||
/// getMachineOpValue - Reutrn the encoding for an MCOperand.
|
||||
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
||||
|
||||
public:
|
||||
|
||||
/// GPRAlign - Encode a sequence of registers with the correct alignment.
|
||||
unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
|
||||
|
||||
/// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
|
||||
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
||||
|
||||
/// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
|
||||
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
||||
|
||||
/// SMRDmemriEncode - Encoding for SMRD indexed loads
|
||||
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
||||
|
||||
/// VOPPostEncode - Post-Encoder method for VOP instructions
|
||||
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
|
||||
|
||||
private:
|
||||
|
||||
///getEncodingType = Return this SIInstrEncodingType for this instruction.
|
||||
unsigned getEncodingType(const MCInst &MI) const;
|
||||
|
||||
///getEncodingBytes - Get then size in bytes of this instructions encoding.
|
||||
unsigned getEncodingBytes(const MCInst &MI) const;
|
||||
|
||||
/// getRegBinaryCode - Returns the hardware encoding for a register
|
||||
unsigned getRegBinaryCode(unsigned reg) const;
|
||||
|
||||
/// getHWRegNum - Generated function that returns the hardware encoding for
|
||||
/// a register
|
||||
unsigned getHWRegNum(unsigned reg) const;
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
||||
const MCSubtargetInfo &STI,
|
||||
MCContext &Ctx) {
|
||||
return new SIMCCodeEmitter(MCII, STI, Ctx);
|
||||
}
|
||||
|
||||
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
|
||||
unsigned bytes = getEncodingBytes(MI);
|
||||
for (unsigned i = 0; i < bytes; i++) {
|
||||
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
||||
const MCOperand &MO,
|
||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
||||
if (MO.isReg()) {
|
||||
return getRegBinaryCode(MO.getReg());
|
||||
} else if (MO.isImm()) {
|
||||
return MO.getImm();
|
||||
} else if (MO.isFPImm()) {
|
||||
// XXX: Not all instructions can use inline literals
|
||||
// XXX: We should make sure this is a 32-bit constant
|
||||
union {
|
||||
float F;
|
||||
uint32_t I;
|
||||
} Imm;
|
||||
Imm.F = MO.getFPImm();
|
||||
return Imm.I;
|
||||
} else{
|
||||
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom Operand Encodings
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
|
||||
unsigned shift) const {
|
||||
unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
|
||||
return regCode >> shift;
|
||||
return 0;
|
||||
}
|
||||
unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
|
||||
unsigned OpNo ,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
||||
return GPRAlign(MI, OpNo, 1);
|
||||
}
|
||||
|
||||
unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
|
||||
unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
||||
return GPRAlign(MI, OpNo, 2);
|
||||
}
|
||||
|
||||
#define SMRD_OFFSET_MASK 0xff
|
||||
#define SMRD_IMM_SHIFT 8
|
||||
#define SMRD_SBASE_MASK 0x3f
|
||||
#define SMRD_SBASE_SHIFT 9
|
||||
/// SMRDmemriEncode - This function is responsibe for encoding the offset
|
||||
/// and the base ptr for SMRD instructions it should return a bit string in
|
||||
/// this format:
|
||||
///
|
||||
/// OFFSET = bits{7-0}
|
||||
/// IMM = bits{8}
|
||||
/// SBASE = bits{14-9}
|
||||
///
|
||||
uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
||||
uint32_t Encoding;
|
||||
|
||||
const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
|
||||
|
||||
//XXX: Use this function for SMRD loads with register offsets
|
||||
assert(OffsetOp.isImm());
|
||||
|
||||
Encoding =
|
||||
(getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
|
||||
| (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
|
||||
| ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
|
||||
;
|
||||
|
||||
return Encoding;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Post Encoder Callbacks
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
|
||||
unsigned encodingType = getEncodingType(MI);
|
||||
unsigned numSrcOps;
|
||||
unsigned vgprBitOffset;
|
||||
|
||||
if (encodingType == SIInstrEncodingType::VOP3) {
|
||||
numSrcOps = 3;
|
||||
vgprBitOffset = 32;
|
||||
} else {
|
||||
numSrcOps = 1;
|
||||
vgprBitOffset = 0;
|
||||
}
|
||||
|
||||
// Add one to skip over the destination reg operand.
|
||||
for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
|
||||
const MCOperand &MO = MI.getOperand(opIdx);
|
||||
if (MO.isReg()) {
|
||||
unsigned reg = MI.getOperand(opIdx).getReg();
|
||||
if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
|
||||
AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
|
||||
Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
|
||||
}
|
||||
} else if (MO.isFPImm()) {
|
||||
union {
|
||||
float f;
|
||||
uint32_t i;
|
||||
} Imm;
|
||||
// XXX: Not all instructions can use inline literals
|
||||
// XXX: We should make sure this is a 32-bit constant
|
||||
Imm.f = MO.getFPImm();
|
||||
Value |= ((uint64_t)Imm.i) << 32;
|
||||
}
|
||||
}
|
||||
return Value;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Encoding helper functions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
|
||||
return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
|
||||
}
|
||||
|
||||
unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
|
||||
|
||||
// These instructions aren't real instructions with an encoding type, so
|
||||
// we need to manually specify their size.
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_LOAD_LITERAL_I32:
|
||||
case AMDGPU::SI_LOAD_LITERAL_F32:
|
||||
return 4;
|
||||
}
|
||||
|
||||
unsigned encoding_type = getEncodingType(MI);
|
||||
switch (encoding_type) {
|
||||
case SIInstrEncodingType::EXP:
|
||||
case SIInstrEncodingType::LDS:
|
||||
case SIInstrEncodingType::MUBUF:
|
||||
case SIInstrEncodingType::MTBUF:
|
||||
case SIInstrEncodingType::MIMG:
|
||||
case SIInstrEncodingType::VOP3:
|
||||
return 8;
|
||||
default:
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
|
||||
switch (reg) {
|
||||
case AMDGPU::VCC: return 106;
|
||||
case AMDGPU::M0: return 124;
|
||||
case AMDGPU::EXEC: return 126;
|
||||
case AMDGPU::EXEC_LO: return 126;
|
||||
case AMDGPU::EXEC_HI: return 127;
|
||||
case AMDGPU::SREG_LIT_0: return 128;
|
||||
case AMDGPU::SI_LITERAL_CONSTANT: return 255;
|
||||
default: return getHWRegNum(reg);
|
||||
}
|
||||
}
|
||||
|
||||
#define SIRegisterInfo SIMCCodeEmitter
|
||||
#include "SIRegisterGetHWRegNum.inc"
|
||||
#undef SIRegisterInfo
|
||||
|
|
@ -8,74 +8,8 @@ LIBNAME = radeon
|
|||
|
||||
LIBRARY_INCLUDES = -I$(TOP)/include
|
||||
|
||||
TBLGEN = $(LLVM_BINDIR)/llvm-tblgen
|
||||
|
||||
CXXFLAGS+= $(LLVM_CXXFLAGS)
|
||||
|
||||
ifeq ($(LLVM_VERSION),3.1)
|
||||
CPP_SOURCES += $(LLVM_CPP_SOURCES)
|
||||
GENERATED_SOURCES = $(LLVM_GENERATED_SOURCES)
|
||||
else
|
||||
CXXFLAGS+= -DEXTERNAL_LLVM
|
||||
endif
|
||||
|
||||
include ../../Makefile.template
|
||||
|
||||
CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
|
||||
|
||||
tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
|
||||
|
||||
HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td)
|
||||
|
||||
SIRegisterInfo.td: SIGenRegisterInfo.pl
|
||||
$(PERL) $^ > $@
|
||||
|
||||
SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl
|
||||
$(PERL) $^ $@ > /dev/null
|
||||
|
||||
R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td
|
||||
ifeq ($(HAVE_LLVM_INTRINSICS),)
|
||||
cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td
|
||||
else
|
||||
cp R600IntrinsicsOpenCL.td R600Intrinsics.td
|
||||
endif
|
||||
|
||||
R600RegisterInfo.td: R600GenRegisterInfo.pl
|
||||
$(PERL) $^ > $@
|
||||
|
||||
AMDGPUGenRegisterInfo.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-register-info, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenInstrInfo.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-instr-info, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenAsmWriter.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-asm-writer, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenDAGISel.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-dag-isel, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenCallingConv.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-callingconv, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenSubtargetInfo.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-subtarget, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenEDInfo.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-enhanced-disassembly-info, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenIntrinsics.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-tgt-intrinsic, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenCodeEmitter.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-emitter, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenMCCodeEmitter.inc: $(TD_FILES)
|
||||
$(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@)
|
||||
|
||||
AMDGPUGenDFAPacketizer.inc: $(TD_FILES)
|
||||
$(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@)
|
||||
|
||||
LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser)
|
||||
loader: loader.o libradeon.a
|
||||
gcc -o loader $(LLVM_LDFLAGS) -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm
|
||||
|
|
|
|||
|
|
@ -1,86 +1,3 @@
|
|||
|
||||
TD_FILES := \
|
||||
AMDGPU.td \
|
||||
AMDGPUInstrInfo.td \
|
||||
AMDGPUInstructions.td \
|
||||
AMDGPUIntrinsics.td \
|
||||
AMDGPURegisterInfo.td \
|
||||
AMDILBase.td \
|
||||
AMDILInstrInfo.td \
|
||||
AMDILIntrinsics.td \
|
||||
AMDILRegisterInfo.td \
|
||||
Processors.td \
|
||||
R600Instructions.td \
|
||||
R600Intrinsics.td \
|
||||
R600IntrinsicsNoOpenCL.td \
|
||||
R600IntrinsicsOpenCL.td \
|
||||
R600RegisterInfo.td \
|
||||
R600Schedule.td \
|
||||
SIInstrFormats.td \
|
||||
SIInstrInfo.td \
|
||||
SIInstructions.td \
|
||||
SIIntrinsics.td \
|
||||
SIRegisterInfo.td \
|
||||
SISchedule.td
|
||||
|
||||
LLVM_GENERATED_SOURCES := \
|
||||
R600Intrinsics.td \
|
||||
R600RegisterInfo.td \
|
||||
SIRegisterInfo.td \
|
||||
SIRegisterGetHWRegNum.inc \
|
||||
AMDGPUGenRegisterInfo.inc \
|
||||
AMDGPUGenInstrInfo.inc \
|
||||
AMDGPUGenAsmWriter.inc \
|
||||
AMDGPUGenDAGISel.inc \
|
||||
AMDGPUGenCallingConv.inc \
|
||||
AMDGPUGenSubtargetInfo.inc \
|
||||
AMDGPUGenEDInfo.inc \
|
||||
AMDGPUGenIntrinsics.inc \
|
||||
AMDGPUGenCodeEmitter.inc \
|
||||
AMDGPUGenMCCodeEmitter.inc \
|
||||
AMDGPUGenDFAPacketizer.inc
|
||||
|
||||
LLVM_CPP_SOURCES := \
|
||||
AMDIL7XXDevice.cpp \
|
||||
AMDILCFGStructurizer.cpp \
|
||||
AMDILDevice.cpp \
|
||||
AMDILDeviceInfo.cpp \
|
||||
AMDILEvergreenDevice.cpp \
|
||||
AMDILFrameLowering.cpp \
|
||||
AMDILIntrinsicInfo.cpp \
|
||||
AMDILISelDAGToDAG.cpp \
|
||||
AMDILISelLowering.cpp \
|
||||
AMDILNIDevice.cpp \
|
||||
AMDILPeepholeOptimizer.cpp \
|
||||
AMDILSIDevice.cpp \
|
||||
AMDGPUAsmPrinter.cpp \
|
||||
AMDGPUMCInstLower.cpp \
|
||||
AMDGPUSubtarget.cpp \
|
||||
AMDGPUTargetMachine.cpp \
|
||||
AMDGPUISelLowering.cpp \
|
||||
AMDGPUConvertToISA.cpp \
|
||||
AMDGPUInstrInfo.cpp \
|
||||
AMDGPURegisterInfo.cpp \
|
||||
R600ExpandSpecialInstrs.cpp \
|
||||
R600ISelLowering.cpp \
|
||||
R600InstrInfo.cpp \
|
||||
R600MachineFunctionInfo.cpp \
|
||||
R600RegisterInfo.cpp \
|
||||
SIAssignInterpRegs.cpp \
|
||||
SIInstrInfo.cpp \
|
||||
SIISelLowering.cpp \
|
||||
SILowerLiteralConstants.cpp \
|
||||
SILowerFlowControl.cpp \
|
||||
SIMachineFunctionInfo.cpp \
|
||||
SIRegisterInfo.cpp \
|
||||
InstPrinter/AMDGPUInstPrinter.cpp \
|
||||
MCTargetDesc/AMDGPUMCAsmInfo.cpp \
|
||||
MCTargetDesc/AMDGPUAsmBackend.cpp \
|
||||
MCTargetDesc/AMDGPUMCTargetDesc.cpp \
|
||||
MCTargetDesc/SIMCCodeEmitter.cpp \
|
||||
MCTargetDesc/R600MCCodeEmitter.cpp \
|
||||
TargetInfo/AMDGPUTargetInfo.cpp \
|
||||
|
||||
CPP_SOURCES := \
|
||||
radeon_llvm_emit.cpp
|
||||
|
||||
|
|
|
|||
|
|
@ -1,29 +0,0 @@
|
|||
//===-- Processors.td - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// AMDIL processors supported.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
|
||||
: Processor<Name, itin, Features>;
|
||||
def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
|
||||
def : Proc<"rv710", R600_EG_Itin, []>;
|
||||
def : Proc<"rv730", R600_EG_Itin, []>;
|
||||
def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
|
||||
def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||
def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
||||
def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
||||
def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
|
||||
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
// Operand Flags
|
||||
#define MO_FLAG_CLAMP (1 << 0)
|
||||
#define MO_FLAG_NEG (1 << 1)
|
||||
#define MO_FLAG_ABS (1 << 2)
|
||||
#define MO_FLAG_MASK (1 << 3)
|
||||
#define MO_FLAG_PUSH (1 << 4)
|
||||
#define MO_FLAG_NOT_LAST (1 << 5)
|
||||
#define NUM_MO_FLAGS 6
|
||||
|
||||
// Helper for finding getting the operand index for the instruction flags
|
||||
// operand.
|
||||
#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
|
||||
|
||||
namespace R600_InstFlag {
|
||||
enum TIF {
|
||||
TRANS_ONLY = (1 << 0),
|
||||
TEX = (1 << 1),
|
||||
REDUCTION = (1 << 2),
|
||||
FC = (1 << 3),
|
||||
TRIG = (1 << 4),
|
||||
OP3 = (1 << 5),
|
||||
VECTOR = (1 << 6)
|
||||
//FlagOperand bits 7, 8
|
||||
};
|
||||
}
|
||||
|
|
@ -1,292 +0,0 @@
|
|||
//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector, Reduction, and Cube instructions need to fill the entire instruction
|
||||
// group to work correctly. This pass expands these individual instructions
|
||||
// into several instructions that will completely fill the instruction group.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
const R600InstrInfo *TII;
|
||||
|
||||
bool ExpandInputPerspective(MachineInstr& MI);
|
||||
bool ExpandInputConstant(MachineInstr& MI);
|
||||
|
||||
public:
|
||||
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
|
||||
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "R600 Expand special instructions pass";
|
||||
}
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char R600ExpandSpecialInstrsPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
|
||||
return new R600ExpandSpecialInstrsPass(TM);
|
||||
}
|
||||
|
||||
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
|
||||
{
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
if (MI.getOpcode() != AMDGPU::input_perspective)
|
||||
return false;
|
||||
|
||||
MachineBasicBlock::iterator I = &MI;
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
|
||||
->getInfo<R600MachineFunctionInfo>();
|
||||
unsigned IJIndexBase;
|
||||
|
||||
// In Evergreen ISA doc section 8.3.2 :
|
||||
// We need to interpolate XY and ZW in two different instruction groups.
|
||||
// An INTERP_* must occupy all 4 slots of an instruction group.
|
||||
// Output of INTERP_XY is written in X,Y slots
|
||||
// Output of INTERP_ZW is written in Z,W slots
|
||||
//
|
||||
// Thus interpolation requires the following sequences :
|
||||
//
|
||||
// AnyGPR.x = INTERP_ZW; (Write Masked Out)
|
||||
// AnyGPR.y = INTERP_ZW; (Write Masked Out)
|
||||
// DstGPR.z = INTERP_ZW;
|
||||
// DstGPR.w = INTERP_ZW; (End of first IG)
|
||||
// DstGPR.x = INTERP_XY;
|
||||
// DstGPR.y = INTERP_XY;
|
||||
// AnyGPR.z = INTERP_XY; (Write Masked Out)
|
||||
// AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
|
||||
//
|
||||
switch (MI.getOperand(1).getImm()) {
|
||||
case 0:
|
||||
IJIndexBase = MFI->GetIJPerspectiveIndex();
|
||||
break;
|
||||
case 1:
|
||||
IJIndexBase = MFI->GetIJLinearIndex();
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unknow ij index");
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
|
||||
2 * IJIndexBase + ((i + 1) % 2));
|
||||
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
|
||||
4 * MI.getOperand(2).getImm());
|
||||
|
||||
unsigned Sel;
|
||||
switch (i % 4) {
|
||||
case 0:Sel = AMDGPU::sel_x;break;
|
||||
case 1:Sel = AMDGPU::sel_y;break;
|
||||
case 2:Sel = AMDGPU::sel_z;break;
|
||||
case 3:Sel = AMDGPU::sel_w;break;
|
||||
default:break;
|
||||
}
|
||||
|
||||
unsigned Res = TRI.getSubReg(DstReg, Sel);
|
||||
|
||||
const MCInstrDesc &Opcode = (i < 4)?
|
||||
TII->get(AMDGPU::INTERP_ZW):
|
||||
TII->get(AMDGPU::INTERP_XY);
|
||||
|
||||
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
|
||||
I, MI.getParent()->findDebugLoc(I),
|
||||
Opcode, Res)
|
||||
.addReg(IJIndex)
|
||||
.addReg(ReadReg)
|
||||
.addImm(0);
|
||||
|
||||
if (!(i> 1 && i < 6)) {
|
||||
TII->addFlag(NewMI, 0, MO_FLAG_MASK);
|
||||
}
|
||||
|
||||
if (i % 4 != 3)
|
||||
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
|
||||
}
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
|
||||
{
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
if (MI.getOpcode() != AMDGPU::input_constant)
|
||||
return false;
|
||||
|
||||
MachineBasicBlock::iterator I = &MI;
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
|
||||
4 * MI.getOperand(1).getImm() + i);
|
||||
|
||||
unsigned Sel;
|
||||
switch (i % 4) {
|
||||
case 0:Sel = AMDGPU::sel_x;break;
|
||||
case 1:Sel = AMDGPU::sel_y;break;
|
||||
case 2:Sel = AMDGPU::sel_z;break;
|
||||
case 3:Sel = AMDGPU::sel_w;break;
|
||||
default:break;
|
||||
}
|
||||
|
||||
unsigned Res = TRI.getSubReg(DstReg, Sel);
|
||||
|
||||
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
|
||||
I, MI.getParent()->findDebugLoc(I),
|
||||
TII->get(AMDGPU::INTERP_LOAD_P0), Res)
|
||||
.addReg(ReadReg)
|
||||
.addImm(0);
|
||||
|
||||
if (i % 4 != 3)
|
||||
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
|
||||
}
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
MachineBasicBlock::iterator I = MBB.begin();
|
||||
while (I != MBB.end()) {
|
||||
MachineInstr &MI = *I;
|
||||
I = llvm::next(I);
|
||||
|
||||
if (ExpandInputPerspective(MI))
|
||||
continue;
|
||||
if (ExpandInputConstant(MI))
|
||||
continue;
|
||||
|
||||
bool IsReduction = TII->isReductionOp(MI.getOpcode());
|
||||
bool IsVector = TII->isVector(MI);
|
||||
bool IsCube = TII->isCubeOp(MI.getOpcode());
|
||||
if (!IsReduction && !IsVector && !IsCube) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Expand the instruction
|
||||
//
|
||||
// Reduction instructions:
|
||||
// T0_X = DP4 T1_XYZW, T2_XYZW
|
||||
// becomes:
|
||||
// TO_X = DP4 T1_X, T2_X
|
||||
// TO_Y (write masked) = DP4 T1_Y, T2_Y
|
||||
// TO_Z (write masked) = DP4 T1_Z, T2_Z
|
||||
// TO_W (write masked) = DP4 T1_W, T2_W
|
||||
//
|
||||
// Vector instructions:
|
||||
// T0_X = MULLO_INT T1_X, T2_X
|
||||
// becomes:
|
||||
// T0_X = MULLO_INT T1_X, T2_X
|
||||
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
|
||||
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
|
||||
// T0_W (write masked) = MULLO_INT T1_X, T2_X
|
||||
//
|
||||
// Cube instructions:
|
||||
// T0_XYZW = CUBE T1_XYZW
|
||||
// becomes:
|
||||
// TO_X = CUBE T1_Z, T1_Y
|
||||
// T0_Y = CUBE T1_Z, T1_X
|
||||
// T0_Z = CUBE T1_X, T1_Z
|
||||
// T0_W = CUBE T1_Y, T1_Z
|
||||
for (unsigned Chan = 0; Chan < 4; Chan++) {
|
||||
unsigned DstReg = MI.getOperand(0).getReg();
|
||||
unsigned Src0 = MI.getOperand(1).getReg();
|
||||
unsigned Src1 = 0;
|
||||
|
||||
// Determine the correct source registers
|
||||
if (!IsCube) {
|
||||
Src1 = MI.getOperand(2).getReg();
|
||||
}
|
||||
if (IsReduction) {
|
||||
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
|
||||
Src0 = TRI.getSubReg(Src0, SubRegIndex);
|
||||
Src1 = TRI.getSubReg(Src1, SubRegIndex);
|
||||
} else if (IsCube) {
|
||||
static const int CubeSrcSwz[] = {2, 2, 0, 1};
|
||||
unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
|
||||
unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
|
||||
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
|
||||
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
|
||||
}
|
||||
|
||||
// Determine the correct destination registers;
|
||||
unsigned Flags = 0;
|
||||
if (IsCube) {
|
||||
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
|
||||
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
|
||||
} else {
|
||||
// Mask the write if the original instruction does not write to
|
||||
// the current Channel.
|
||||
Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
|
||||
unsigned DstBase = TRI.getHWRegIndex(DstReg);
|
||||
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
||||
}
|
||||
|
||||
// Set the IsLast bit
|
||||
Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
|
||||
|
||||
// Add the new instruction
|
||||
unsigned Opcode;
|
||||
if (IsCube) {
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::CUBE_r600_pseudo:
|
||||
Opcode = AMDGPU::CUBE_r600_real;
|
||||
break;
|
||||
case AMDGPU::CUBE_eg_pseudo:
|
||||
Opcode = AMDGPU::CUBE_eg_real;
|
||||
break;
|
||||
default:
|
||||
assert(!"Unknown CUBE instruction");
|
||||
Opcode = 0;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
Opcode = MI.getOpcode();
|
||||
}
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
|
||||
.addReg(Src0)
|
||||
.addReg(Src1)
|
||||
.addImm(0); // Flag
|
||||
|
||||
NewMI->setIsInsideBundle(Chan != 0);
|
||||
TII->addFlag(NewMI, 0, Flags);
|
||||
}
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1,206 +0,0 @@
|
|||
#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
#
|
||||
# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
|
||||
# it also generates a file called R600HwRegInfo.include, which contains helper
|
||||
# functions for determining the hw encoding of registers.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use constant CONST_REG_COUNT => 512;
|
||||
use constant TEMP_REG_COUNT => 128;
|
||||
|
||||
my $CREG_MAX = CONST_REG_COUNT - 1;
|
||||
my $TREG_MAX = TEMP_REG_COUNT - 1;
|
||||
|
||||
print <<STRING;
|
||||
|
||||
class R600Reg <string name> : Register<name> {
|
||||
let Namespace = "AMDGPU";
|
||||
}
|
||||
|
||||
class R600Reg_128<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
||||
}
|
||||
|
||||
STRING
|
||||
|
||||
my $i;
|
||||
|
||||
### REG DEFS ###
|
||||
|
||||
my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
|
||||
my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
|
||||
|
||||
my @t128reg;
|
||||
my @treg_x;
|
||||
for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
|
||||
my $name = "T$i\_XYZW";
|
||||
print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W] >;\n};
|
||||
$t128reg[$i] = $name;
|
||||
$treg_x[$i] = "T$i\_X";
|
||||
}
|
||||
|
||||
my $treg_string = join(",", @treg_list);
|
||||
my $creg_list = join(",", @creg_list);
|
||||
my $t128_string = join(",", @t128reg);
|
||||
my $treg_x_string = join(",", @treg_x);
|
||||
print <<STRING;
|
||||
|
||||
class RegSet <dag s> {
|
||||
dag set = s;
|
||||
}
|
||||
|
||||
def ZERO : R600Reg<"0.0">;
|
||||
def HALF : R600Reg<"0.5">;
|
||||
def ONE : R600Reg<"1.0">;
|
||||
def ONE_INT : R600Reg<"1">;
|
||||
def NEG_HALF : R600Reg<"-0.5">;
|
||||
def NEG_ONE : R600Reg<"-1.0">;
|
||||
def PV_X : R600Reg<"pv.x">;
|
||||
def ALU_LITERAL_X : R600Reg<"literal.x">;
|
||||
def PREDICATE_BIT : R600Reg<"PredicateBit">;
|
||||
def PRED_SEL_OFF: R600Reg<"Pred_sel_off">;
|
||||
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">;
|
||||
def PRED_SEL_ONE : R600Reg<"Pred_sel_one">;
|
||||
|
||||
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
$creg_list)>;
|
||||
|
||||
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
$treg_string)>;
|
||||
|
||||
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
$treg_x_string)>;
|
||||
|
||||
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
||||
R600_TReg32,
|
||||
R600_CReg32,
|
||||
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
|
||||
|
||||
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
|
||||
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
|
||||
|
||||
def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
|
||||
PREDICATE_BIT)>;
|
||||
|
||||
def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
|
||||
$t128_string)>
|
||||
{
|
||||
let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)];
|
||||
let CopyCost = -1;
|
||||
}
|
||||
|
||||
STRING
|
||||
|
||||
my %index_map;
|
||||
my %chan_map;
|
||||
|
||||
for ($i = 0; $i <= $#creg_list; $i++) {
|
||||
push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
|
||||
push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
|
||||
}
|
||||
|
||||
for ($i = 0; $i <= $#treg_list; $i++) {
|
||||
push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
|
||||
push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
|
||||
}
|
||||
|
||||
for ($i = 0; $i <= $#t128reg; $i++) {
|
||||
push(@{$index_map{$i}}, $t128reg[$i]);
|
||||
push(@{$chan_map{'X'}}, $t128reg[$i]);
|
||||
}
|
||||
|
||||
open(OUTFILE, ">", "R600HwRegInfo.include");
|
||||
|
||||
print OUTFILE <<STRING;
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegIndexGen(unsigned reg) const
|
||||
{
|
||||
switch(reg) {
|
||||
default: assert(!"Unknown register"); return 0;
|
||||
STRING
|
||||
foreach my $key (keys(%index_map)) {
|
||||
foreach my $reg (@{$index_map{$key}}) {
|
||||
print OUTFILE " case AMDGPU::$reg:\n";
|
||||
}
|
||||
print OUTFILE " return $key;\n\n";
|
||||
}
|
||||
|
||||
print OUTFILE " }\n}\n\n";
|
||||
|
||||
print OUTFILE <<STRING;
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
|
||||
{
|
||||
switch(reg) {
|
||||
default: assert(!"Unknown register"); return 0;
|
||||
STRING
|
||||
|
||||
foreach my $key (keys(%chan_map)) {
|
||||
foreach my $reg (@{$chan_map{$key}}) {
|
||||
print OUTFILE " case AMDGPU::$reg:\n";
|
||||
}
|
||||
my $val;
|
||||
if ($key eq 'X') {
|
||||
$val = 0;
|
||||
} elsif ($key eq 'Y') {
|
||||
$val = 1;
|
||||
} elsif ($key eq 'Z') {
|
||||
$val = 2;
|
||||
} elsif ($key eq 'W') {
|
||||
$val = 3;
|
||||
} else {
|
||||
die("Unknown chan value; $key");
|
||||
}
|
||||
print OUTFILE " return $val;\n\n";
|
||||
}
|
||||
|
||||
print OUTFILE " }\n}\n\n";
|
||||
|
||||
sub print_reg_defs {
|
||||
my ($count, $prefix) = @_;
|
||||
|
||||
my @reg_list;
|
||||
|
||||
for ($i = 0; $i < $count; $i++) {
|
||||
my $hw_index = get_hw_index($i);
|
||||
my $chan= get_chan_str($i);
|
||||
my $name = "$prefix$hw_index\_$chan";
|
||||
print qq{def $name : R600Reg <"$prefix$hw_index.$chan">;\n};
|
||||
$reg_list[$i] = $name;
|
||||
}
|
||||
return @reg_list;
|
||||
}
|
||||
|
||||
#Helper functions
|
||||
sub get_hw_index {
|
||||
my ($index) = @_;
|
||||
return int($index / 4);
|
||||
}
|
||||
|
||||
sub get_chan_str {
|
||||
my ($index) = @_;
|
||||
my $chan = $index % 4;
|
||||
if ($chan == 0 ) {
|
||||
return 'X';
|
||||
} elsif ($chan == 1) {
|
||||
return 'Y';
|
||||
} elsif ($chan == 2) {
|
||||
return 'Z';
|
||||
} elsif ($chan == 3) {
|
||||
return 'W';
|
||||
} else {
|
||||
die("Unknown chan value: $chan");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,740 +0,0 @@
|
|||
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
|
||||
// is mostly EmitInstrWithCustomInserter().
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600ISelLowering.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600InstrInfo.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
#include "llvm/Argument.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
||||
AMDGPUTargetLowering(TM),
|
||||
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
|
||||
{
|
||||
setOperationAction(ISD::MUL, MVT::i64, Expand);
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
|
||||
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
|
||||
computeRegisterProperties();
|
||||
|
||||
setOperationAction(ISD::FADD, MVT::v4f32, Expand);
|
||||
setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
|
||||
|
||||
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
|
||||
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
|
||||
|
||||
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
|
||||
|
||||
setOperationAction(ISD::ROTL, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::SETCC, MVT::i32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
|
||||
|
||||
setTargetDAGCombine(ISD::FP_ROUND);
|
||||
|
||||
setSchedulingPreference(Sched::VLIW);
|
||||
}
|
||||
|
||||
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr * MI, MachineBasicBlock * BB) const
|
||||
{
|
||||
MachineFunction * MF = BB->getParent();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineBasicBlock::iterator I = *MI;
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
case AMDGPU::SHADER_TYPE: break;
|
||||
case AMDGPU::CLAMP_R600:
|
||||
{
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(0) // Flags
|
||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
||||
TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::FABS_R600:
|
||||
{
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(0) // Flags
|
||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
||||
TII->addFlag(NewMI, 1, MO_FLAG_ABS);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::FNEG_R600:
|
||||
{
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(0) // Flags
|
||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
||||
TII->addFlag(NewMI, 1, MO_FLAG_NEG);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::R600_LOAD_CONST:
|
||||
{
|
||||
int64_t RegIndex = MI->getOperand(1).getImm();
|
||||
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(ConstantReg);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::MASK_WRITE:
|
||||
{
|
||||
unsigned maskedRegister = MI->getOperand(0).getReg();
|
||||
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
|
||||
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
|
||||
TII->addFlag(defInstr, 0, MO_FLAG_MASK);
|
||||
// Return early so the instruction is not erased
|
||||
return BB;
|
||||
}
|
||||
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
|
||||
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
|
||||
{
|
||||
// Convert to DWORD address
|
||||
unsigned NewAddr = MRI.createVirtualRegister(
|
||||
&AMDGPU::R600_TReg32_XRegClass);
|
||||
unsigned ShiftValue = MRI.createVirtualRegister(
|
||||
&AMDGPU::R600_TReg32RegClass);
|
||||
unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
|
||||
|
||||
// XXX In theory, we should be able to pass ShiftValue directly to
|
||||
// the LSHR_eg instruction as an inline literal, but I tried doing it
|
||||
// this way and it didn't produce the correct results.
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
|
||||
ShiftValue)
|
||||
.addReg(AMDGPU::ALU_LITERAL_X)
|
||||
.addReg(AMDGPU::PRED_SEL_OFF)
|
||||
.addImm(2);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addReg(ShiftValue)
|
||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(NewAddr)
|
||||
.addImm(EOP); // Set End of program bit
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::RESERVE_REG:
|
||||
{
|
||||
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
|
||||
int64_t ReservedIndex = MI->getOperand(0).getImm();
|
||||
unsigned ReservedReg =
|
||||
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
|
||||
MFI->ReservedRegs.push_back(ReservedReg);
|
||||
break;
|
||||
}
|
||||
|
||||
case AMDGPU::TXD:
|
||||
{
|
||||
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
||||
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
||||
.addOperand(MI->getOperand(3))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
||||
.addOperand(MI->getOperand(2))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5))
|
||||
.addReg(t0, RegState::Implicit)
|
||||
.addReg(t1, RegState::Implicit);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::TXD_SHADOW:
|
||||
{
|
||||
unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
|
||||
unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
|
||||
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
||||
.addOperand(MI->getOperand(3))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
||||
.addOperand(MI->getOperand(2))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5));
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(4))
|
||||
.addOperand(MI->getOperand(5))
|
||||
.addReg(t0, RegState::Implicit)
|
||||
.addReg(t1, RegState::Implicit);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::BRANCH:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(0);
|
||||
break;
|
||||
case AMDGPU::BRANCH_COND_f32:
|
||||
{
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
|
||||
.addReg(AMDGPU::PREDICATE_BIT)
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(OPCODE_IS_NOT_ZERO)
|
||||
.addImm(0); // Flags
|
||||
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::BRANCH_COND_i32:
|
||||
{
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
|
||||
.addReg(AMDGPU::PREDICATE_BIT)
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(OPCODE_IS_NOT_ZERO_INT)
|
||||
.addImm(0); // Flags
|
||||
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
break;
|
||||
}
|
||||
case AMDGPU::input_perspective:
|
||||
{
|
||||
R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
|
||||
|
||||
// XXX Be more fine about register reservation
|
||||
for (unsigned i = 0; i < 4; i ++) {
|
||||
unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
|
||||
MFI->ReservedRegs.push_back(ReservedReg);
|
||||
}
|
||||
|
||||
switch (MI->getOperand(1).getImm()) {
|
||||
case 0:// Perspective
|
||||
MFI->HasPerspectiveInterpolation = true;
|
||||
break;
|
||||
case 1:// Linear
|
||||
MFI->HasLinearInterpolation = true;
|
||||
break;
|
||||
default:
|
||||
assert(0 && "Unknow ij index");
|
||||
}
|
||||
|
||||
return BB;
|
||||
}
|
||||
}
|
||||
|
||||
MI->eraseFromParent();
|
||||
return BB;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom DAG Lowering Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
using namespace llvm::Intrinsic;
|
||||
using namespace llvm::AMDGPUIntrinsic;
|
||||
|
||||
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
switch (Op.getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
|
||||
case ISD::ROTL: return LowerROTL(Op, DAG);
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
||||
case ISD::INTRINSIC_VOID: {
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
unsigned IntrinsicID =
|
||||
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
switch (IntrinsicID) {
|
||||
case AMDGPUIntrinsic::AMDGPU_store_output: {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
||||
if (!MRI.isLiveOut(Reg)) {
|
||||
MRI.addLiveOut(Reg);
|
||||
}
|
||||
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
|
||||
}
|
||||
// default for switch(IntrinsicID)
|
||||
default: break;
|
||||
}
|
||||
// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
|
||||
break;
|
||||
}
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IntrinsicID =
|
||||
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
EVT VT = Op.getValueType();
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
switch(IntrinsicID) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case AMDGPUIntrinsic::R600_load_input: {
|
||||
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_load_input_perspective: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDValue FullVector = DAG.getNode(
|
||||
AMDGPUISD::INTERP,
|
||||
DL, MVT::v4f32,
|
||||
DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_load_input_linear: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDValue FullVector = DAG.getNode(
|
||||
AMDGPUISD::INTERP,
|
||||
DL, MVT::v4f32,
|
||||
DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_load_input_constant: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
SDValue FullVector = DAG.getNode(
|
||||
AMDGPUISD::INTERP_P0,
|
||||
DL, MVT::v4f32,
|
||||
DAG.getConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_load_input_position: {
|
||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
|
||||
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
RegIndex, MVT::f32);
|
||||
if ((slot % 4) == 3) {
|
||||
return DAG.getNode(ISD::FDIV,
|
||||
DL, VT,
|
||||
DAG.getConstantFP(1.0f, MVT::f32),
|
||||
Reg);
|
||||
} else {
|
||||
return Reg;
|
||||
}
|
||||
}
|
||||
|
||||
case r600_read_ngroups_x:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 0);
|
||||
case r600_read_ngroups_y:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 1);
|
||||
case r600_read_ngroups_z:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 2);
|
||||
case r600_read_global_size_x:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 3);
|
||||
case r600_read_global_size_y:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 4);
|
||||
case r600_read_global_size_z:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 5);
|
||||
case r600_read_local_size_x:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 6);
|
||||
case r600_read_local_size_y:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 7);
|
||||
case r600_read_local_size_z:
|
||||
return LowerImplicitParameter(DAG, VT, DL, 8);
|
||||
|
||||
case r600_read_tgid_x:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_X, VT);
|
||||
case r600_read_tgid_y:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_Y, VT);
|
||||
case r600_read_tgid_z:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T1_Z, VT);
|
||||
case r600_read_tidig_x:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_X, VT);
|
||||
case r600_read_tidig_y:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_Y, VT);
|
||||
case r600_read_tidig_z:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
AMDGPU::T0_Z, VT);
|
||||
}
|
||||
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
|
||||
break;
|
||||
}
|
||||
} // end switch(Op.getOpcode())
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const
|
||||
{
|
||||
switch (N->getOpcode()) {
|
||||
default: return;
|
||||
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
{
|
||||
unsigned IntrinsicID =
|
||||
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
||||
if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
|
||||
Results.push_back(LowerInputFace(N, DAG));
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
|
||||
{
|
||||
unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
|
||||
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
|
||||
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
||||
RegIndex, MVT::f32);
|
||||
return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
|
||||
Reg, DAG.getConstantFP(0.0f, MVT::f32),
|
||||
DAG.getCondCode(ISD::SETUGT));
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
return DAG.getNode(
|
||||
ISD::SETCC,
|
||||
Op.getDebugLoc(),
|
||||
MVT::i1,
|
||||
Op, DAG.getConstantFP(0.0f, MVT::f32),
|
||||
DAG.getCondCode(ISD::SETNE)
|
||||
);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue CC = Op.getOperand(1);
|
||||
SDValue LHS = Op.getOperand(2);
|
||||
SDValue RHS = Op.getOperand(3);
|
||||
SDValue JumpT = Op.getOperand(4);
|
||||
SDValue CmpValue;
|
||||
SDValue Result;
|
||||
|
||||
if (LHS.getValueType() == MVT::i32) {
|
||||
CmpValue = DAG.getNode(
|
||||
ISD::SELECT_CC,
|
||||
Op.getDebugLoc(),
|
||||
MVT::i32,
|
||||
LHS, RHS,
|
||||
DAG.getConstant(-1, MVT::i32),
|
||||
DAG.getConstant(0, MVT::i32),
|
||||
CC);
|
||||
} else if (LHS.getValueType() == MVT::f32) {
|
||||
CmpValue = DAG.getNode(
|
||||
ISD::SELECT_CC,
|
||||
Op.getDebugLoc(),
|
||||
MVT::f32,
|
||||
LHS, RHS,
|
||||
DAG.getConstantFP(1.0f, MVT::f32),
|
||||
DAG.getConstantFP(0.0f, MVT::f32),
|
||||
CC);
|
||||
} else {
|
||||
assert(0 && "Not valid type for br_cc");
|
||||
}
|
||||
Result = DAG.getNode(
|
||||
AMDGPUISD::BRANCH_COND,
|
||||
CmpValue.getDebugLoc(),
|
||||
MVT::Other, Chain,
|
||||
JumpT, CmpValue);
|
||||
return Result;
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
||||
DebugLoc DL,
|
||||
unsigned DwordOffset) const
|
||||
{
|
||||
unsigned ByteOffset = DwordOffset * 4;
|
||||
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUAS::PARAM_I_ADDRESS);
|
||||
|
||||
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
|
||||
assert(isInt<16>(ByteOffset));
|
||||
|
||||
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
|
||||
DAG.getConstant(ByteOffset, MVT::i32), // PTR
|
||||
MachinePointerInfo(ConstantPointerNull::get(PtrType)),
|
||||
false, false, false, 0);
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
|
||||
Op.getOperand(0),
|
||||
Op.getOperand(0),
|
||||
DAG.getNode(ISD::SUB, DL, VT,
|
||||
DAG.getConstant(32, MVT::i32),
|
||||
Op.getOperand(1)));
|
||||
}
|
||||
|
||||
bool R600TargetLowering::isZero(SDValue Op) const
|
||||
{
|
||||
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
|
||||
return Cst->isNullValue();
|
||||
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
|
||||
return CstFP->isZero();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
SDValue True = Op.getOperand(2);
|
||||
SDValue False = Op.getOperand(3);
|
||||
SDValue CC = Op.getOperand(4);
|
||||
SDValue Temp;
|
||||
|
||||
// LHS and RHS are guaranteed to be the same value type
|
||||
EVT CompareVT = LHS.getValueType();
|
||||
|
||||
// We need all the operands of SELECT_CC to have the same value type, so if
|
||||
// necessary we need to convert LHS and RHS to be the same type True and
|
||||
// False. True and False are guaranteed to have the same type as this
|
||||
// SELECT_CC node.
|
||||
|
||||
if (isHWTrueValue(True) && isHWFalseValue(False)) {
|
||||
if (CompareVT != VT) {
|
||||
if (VT == MVT::f32 && CompareVT == MVT::i32) {
|
||||
SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
||||
LHS, RHS,
|
||||
DAG.getConstant(-1, MVT::i32),
|
||||
DAG.getConstant(0, MVT::i32),
|
||||
CC);
|
||||
return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
|
||||
} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
|
||||
SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
||||
LHS, RHS,
|
||||
DAG.getConstantFP(1.0f, MVT::f32),
|
||||
DAG.getConstantFP(0.0f, MVT::f32),
|
||||
CC);
|
||||
return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
|
||||
} else {
|
||||
// I don't think there will be any other type pairings.
|
||||
assert(!"Unhandled operand type parings in SELECT_CC");
|
||||
}
|
||||
} else {
|
||||
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// XXX If True is a hardware TRUE value and False is a hardware FALSE value,
|
||||
// we can handle this with a native instruction, but we need to swap true
|
||||
// and false and change the conditional.
|
||||
if (isHWTrueValue(False) && isHWFalseValue(True)) {
|
||||
}
|
||||
|
||||
// Check if we can lower this to a native operation.
|
||||
// CND* instructions requires all operands to have the same type,
|
||||
// and RHS to be zero.
|
||||
|
||||
if (isZero(LHS) || isZero(RHS)) {
|
||||
SDValue Cond = (isZero(LHS) ? RHS : LHS);
|
||||
SDValue Zero = (isZero(LHS) ? LHS : RHS);
|
||||
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
||||
if (CompareVT != VT) {
|
||||
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
|
||||
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
|
||||
}
|
||||
if (isZero(LHS)) {
|
||||
CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
|
||||
}
|
||||
|
||||
switch (CCOpcode) {
|
||||
case ISD::SETONE:
|
||||
case ISD::SETUNE:
|
||||
case ISD::SETNE:
|
||||
case ISD::SETULE:
|
||||
case ISD::SETULT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETLE:
|
||||
case ISD::SETLT:
|
||||
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
|
||||
Temp = True;
|
||||
True = False;
|
||||
False = Temp;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
||||
Cond, Zero,
|
||||
True, False,
|
||||
DAG.getCondCode(CCOpcode));
|
||||
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
|
||||
}
|
||||
|
||||
|
||||
// If we make it this for it means we have no native instructions to handle
|
||||
// this SELECT_CC, so we must lower it.
|
||||
SDValue HWTrue, HWFalse;
|
||||
|
||||
if (CompareVT == MVT::f32) {
|
||||
HWTrue = DAG.getConstantFP(1.0f, CompareVT);
|
||||
HWFalse = DAG.getConstantFP(0.0f, CompareVT);
|
||||
} else if (CompareVT == MVT::i32) {
|
||||
HWTrue = DAG.getConstant(-1, CompareVT);
|
||||
HWFalse = DAG.getConstant(0, CompareVT);
|
||||
}
|
||||
else {
|
||||
assert(!"Unhandled value type in LowerSELECT_CC");
|
||||
}
|
||||
|
||||
// Lower this unsupported SELECT_CC into a combination of two supported
|
||||
// SELECT_CC operations.
|
||||
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
|
||||
|
||||
return DAG.getNode(ISD::SELECT_CC, DL, VT,
|
||||
Cond, HWFalse,
|
||||
True, False,
|
||||
DAG.getCondCode(ISD::SETNE));
|
||||
}
|
||||
|
||||
SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
SDValue Cond;
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
SDValue CC = Op.getOperand(2);
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
assert(Op.getValueType() == MVT::i32);
|
||||
if (LHS.getValueType() == MVT::i32) {
|
||||
Cond = DAG.getNode(
|
||||
ISD::SELECT_CC,
|
||||
Op.getDebugLoc(),
|
||||
MVT::i32,
|
||||
LHS, RHS,
|
||||
DAG.getConstant(-1, MVT::i32),
|
||||
DAG.getConstant(0, MVT::i32),
|
||||
CC);
|
||||
} else if (LHS.getValueType() == MVT::f32) {
|
||||
Cond = DAG.getNode(
|
||||
ISD::SELECT_CC,
|
||||
Op.getDebugLoc(),
|
||||
MVT::f32,
|
||||
LHS, RHS,
|
||||
DAG.getConstantFP(1.0f, MVT::f32),
|
||||
DAG.getConstantFP(0.0f, MVT::f32),
|
||||
CC);
|
||||
Cond = DAG.getNode(
|
||||
ISD::FP_TO_SINT,
|
||||
DL,
|
||||
MVT::i32,
|
||||
Cond);
|
||||
} else {
|
||||
assert(0 && "Not valid type for set_cc");
|
||||
}
|
||||
Cond = DAG.getNode(
|
||||
ISD::AND,
|
||||
DL,
|
||||
MVT::i32,
|
||||
DAG.getConstant(1, MVT::i32),
|
||||
Cond);
|
||||
return Cond;
|
||||
}
|
||||
|
||||
// XXX Only kernel functions are supporte, so we can assume for now that
|
||||
// every function is a kernel function, but in the future we should use
|
||||
// separate calling conventions for kernel and non-kernel functions.
|
||||
// Only kernel functions are supported, so we can assume for now
|
||||
SDValue R600TargetLowering::LowerFormalArguments(
|
||||
SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const
|
||||
{
|
||||
unsigned ParamOffsetBytes = 36;
|
||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
||||
EVT VT = Ins[i].VT;
|
||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUAS::PARAM_I_ADDRESS);
|
||||
SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(),
|
||||
DAG.getConstant(ParamOffsetBytes, MVT::i32),
|
||||
MachinePointerInfo(new Argument(PtrTy)),
|
||||
false, false, false, 4);
|
||||
InVals.push_back(Arg);
|
||||
ParamOffsetBytes += (VT.getStoreSize());
|
||||
}
|
||||
return Chain;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom DAG Optimizations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const
|
||||
{
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
|
||||
case ISD::FP_ROUND: {
|
||||
SDValue Arg = N->getOperand(0);
|
||||
if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
|
||||
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
|
||||
Arg.getOperand(0));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 DAG Lowering interface definition
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600ISELLOWERING_H
|
||||
#define R600ISELLOWERING_H
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class R600InstrInfo;
|
||||
|
||||
class R600TargetLowering : public AMDGPUTargetLowering
|
||||
{
|
||||
public:
|
||||
R600TargetLowering(TargetMachine &TM);
|
||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
MachineBasicBlock * BB) const;
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
void ReplaceNodeResults(SDNode * N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
virtual SDValue LowerFormalArguments(
|
||||
SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc DL, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
private:
|
||||
const R600InstrInfo * TII;
|
||||
|
||||
/// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
|
||||
/// that are stored in the first nine dwords of a Vertex Buffer. These
|
||||
/// implicit parameters are lowered to load instructions which retreive the
|
||||
/// values from the Vertex Buffer.
|
||||
SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
||||
DebugLoc DL, unsigned DwordOffset) const;
|
||||
|
||||
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineRegisterInfo & MRI, unsigned dword_offset) const;
|
||||
|
||||
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
/// LowerROTL - Lower ROTL opcode to BITALIGN
|
||||
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
bool isZero(SDValue Op) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm;
|
||||
|
||||
#endif // R600ISELLOWERING_H
|
||||
|
|
@ -1,512 +0,0 @@
|
|||
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 Implementation of TargetInstrInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600InstrInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "R600Defines.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "AMDILUtilityFunctions.h"
|
||||
|
||||
#define GET_INSTRINFO_CTOR
|
||||
#include "AMDGPUGenDFAPacketizer.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
|
||||
: AMDGPUInstrInfo(tm),
|
||||
RI(tm, *this),
|
||||
TM(tm)
|
||||
{ }
|
||||
|
||||
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
|
||||
{
|
||||
return RI;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isTrig(const MachineInstr &MI) const
|
||||
{
|
||||
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isVector(const MachineInstr &MI) const
|
||||
{
|
||||
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
|
||||
}
|
||||
|
||||
void
|
||||
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const
|
||||
{
|
||||
if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
||||
&& AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
|
||||
for (unsigned I = 0; I < 4; I++) {
|
||||
unsigned SubRegIndex = RI.getSubRegFromChannel(I);
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
|
||||
.addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
|
||||
.addReg(RI.getSubReg(SrcReg, SubRegIndex))
|
||||
.addImm(0) // Flag
|
||||
.addReg(0) // PREDICATE_BIT
|
||||
.addReg(DestReg, RegState::Define | RegState::Implicit);
|
||||
}
|
||||
} else {
|
||||
|
||||
/* We can't copy vec4 registers */
|
||||
assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
||||
&& !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
|
||||
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.addImm(0) // Flag
|
||||
.addReg(0); // PREDICATE_BIT
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
|
||||
unsigned DstReg, int64_t Imm) const
|
||||
{
|
||||
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
|
||||
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
||||
MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
|
||||
MachineInstrBuilder(MI).addImm(Imm);
|
||||
MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
|
||||
|
||||
return MI;
|
||||
}
|
||||
|
||||
unsigned R600InstrInfo::getIEQOpcode() const
|
||||
{
|
||||
return AMDGPU::SETE_INT;
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isMov(unsigned Opcode) const
|
||||
{
|
||||
|
||||
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::MOV:
|
||||
case AMDGPU::MOV_IMM_F32:
|
||||
case AMDGPU::MOV_IMM_I32:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Some instructions act as place holders to emulate operations that the GPU
|
||||
// hardware does automatically. This function can be used to check if
|
||||
// an opcode falls into this category.
|
||||
bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
|
||||
{
|
||||
switch (Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::RETURN:
|
||||
case AMDGPU::MASK_WRITE:
|
||||
case AMDGPU::RESERVE_REG:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isReductionOp(unsigned Opcode) const
|
||||
{
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::DOT4_r600:
|
||||
case AMDGPU::DOT4_eg:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool R600InstrInfo::isCubeOp(unsigned Opcode) const
|
||||
{
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::CUBE_r600_pseudo:
|
||||
case AMDGPU::CUBE_r600_real:
|
||||
case AMDGPU::CUBE_eg_pseudo:
|
||||
case AMDGPU::CUBE_eg_real:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const
|
||||
{
|
||||
const InstrItineraryData *II = TM->getInstrItineraryData();
|
||||
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
|
||||
}
|
||||
|
||||
static bool
|
||||
isPredicateSetter(unsigned Opcode)
|
||||
{
|
||||
switch (Opcode) {
|
||||
case AMDGPU::PRED_X:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static MachineInstr *
|
||||
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I)
|
||||
{
|
||||
while (I != MBB.begin()) {
|
||||
--I;
|
||||
MachineInstr *MI = I;
|
||||
if (isPredicateSetter(MI->getOpcode()))
|
||||
return MI;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const
|
||||
{
|
||||
// Most of the following comes from the ARM implementation of AnalyzeBranch
|
||||
|
||||
// If the block has no terminators, it just falls into the block after it.
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin())
|
||||
return false;
|
||||
--I;
|
||||
while (I->isDebugValue()) {
|
||||
if (I == MBB.begin())
|
||||
return false;
|
||||
--I;
|
||||
}
|
||||
if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the last instruction in the block.
|
||||
MachineInstr *LastInst = I;
|
||||
|
||||
// If there is only one terminator instruction, process it.
|
||||
unsigned LastOpc = LastInst->getOpcode();
|
||||
if (I == MBB.begin() ||
|
||||
static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
|
||||
if (LastOpc == AMDGPU::JUMP) {
|
||||
if(!isPredicated(LastInst)) {
|
||||
TBB = LastInst->getOperand(0).getMBB();
|
||||
return false;
|
||||
} else {
|
||||
MachineInstr *predSet = I;
|
||||
while (!isPredicateSetter(predSet->getOpcode())) {
|
||||
predSet = --I;
|
||||
}
|
||||
TBB = LastInst->getOperand(0).getMBB();
|
||||
Cond.push_back(predSet->getOperand(1));
|
||||
Cond.push_back(predSet->getOperand(2));
|
||||
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true; // Can't handle indirect branch.
|
||||
}
|
||||
|
||||
// Get the instruction before it if it is a terminator.
|
||||
MachineInstr *SecondLastInst = I;
|
||||
unsigned SecondLastOpc = SecondLastInst->getOpcode();
|
||||
|
||||
// If the block ends with a B and a Bcc, handle it.
|
||||
if (SecondLastOpc == AMDGPU::JUMP &&
|
||||
isPredicated(SecondLastInst) &&
|
||||
LastOpc == AMDGPU::JUMP &&
|
||||
!isPredicated(LastInst)) {
|
||||
MachineInstr *predSet = --I;
|
||||
while (!isPredicateSetter(predSet->getOpcode())) {
|
||||
predSet = --I;
|
||||
}
|
||||
TBB = SecondLastInst->getOperand(0).getMBB();
|
||||
FBB = LastInst->getOperand(0).getMBB();
|
||||
Cond.push_back(predSet->getOperand(1));
|
||||
Cond.push_back(predSet->getOperand(2));
|
||||
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, can't handle this.
|
||||
return true;
|
||||
}
|
||||
|
||||
int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
|
||||
const MachineInstr *MI = op.getParent();
|
||||
|
||||
switch (MI->getDesc().OpInfo->RegClass) {
|
||||
default: // FIXME: fallthrough??
|
||||
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
|
||||
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
|
||||
};
|
||||
}
|
||||
|
||||
unsigned
|
||||
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const
|
||||
{
|
||||
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
||||
|
||||
if (FBB == 0) {
|
||||
if (Cond.empty()) {
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
|
||||
return 1;
|
||||
} else {
|
||||
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
|
||||
assert(PredSet && "No previous predicate !");
|
||||
addFlag(PredSet, 1, MO_FLAG_PUSH);
|
||||
PredSet->getOperand(2).setImm(Cond[1].getImm());
|
||||
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
|
||||
.addMBB(TBB)
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
return 1;
|
||||
}
|
||||
} else {
|
||||
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
|
||||
assert(PredSet && "No previous predicate !");
|
||||
addFlag(PredSet, 1, MO_FLAG_PUSH);
|
||||
PredSet->getOperand(2).setImm(Cond[1].getImm());
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
|
||||
.addMBB(TBB)
|
||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
|
||||
{
|
||||
|
||||
// Note : we leave PRED* instructions there.
|
||||
// They may be needed when predicating instructions.
|
||||
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
|
||||
if (I == MBB.begin()) {
|
||||
return 0;
|
||||
}
|
||||
--I;
|
||||
switch (I->getOpcode()) {
|
||||
default:
|
||||
return 0;
|
||||
case AMDGPU::JUMP:
|
||||
if (isPredicated(I)) {
|
||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
||||
clearFlag(predSet, 1, MO_FLAG_PUSH);
|
||||
}
|
||||
I->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
I = MBB.end();
|
||||
|
||||
if (I == MBB.begin()) {
|
||||
return 1;
|
||||
}
|
||||
--I;
|
||||
switch (I->getOpcode()) {
|
||||
// FIXME: only one case??
|
||||
default:
|
||||
return 1;
|
||||
case AMDGPU::JUMP:
|
||||
if (isPredicated(I)) {
|
||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
||||
clearFlag(predSet, 1, MO_FLAG_PUSH);
|
||||
}
|
||||
I->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::isPredicated(const MachineInstr *MI) const
|
||||
{
|
||||
int idx = MI->findFirstPredOperandIdx();
|
||||
if (idx < 0)
|
||||
return false;
|
||||
|
||||
unsigned Reg = MI->getOperand(idx).getReg();
|
||||
switch (Reg) {
|
||||
default: return false;
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
case AMDGPU::PREDICATE_BIT:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::isPredicable(MachineInstr *MI) const
|
||||
{
|
||||
return AMDGPUInstrInfo::isPredicable(MI);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
||||
unsigned NumCyles,
|
||||
unsigned ExtraPredCycles,
|
||||
const BranchProbability &Probability) const{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||
unsigned NumTCycles,
|
||||
unsigned ExtraTCycles,
|
||||
MachineBasicBlock &FMBB,
|
||||
unsigned NumFCycles,
|
||||
unsigned ExtraFCycles,
|
||||
const BranchProbability &Probability) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
|
||||
unsigned NumCyles,
|
||||
const BranchProbability &Probability)
|
||||
const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
|
||||
MachineBasicBlock &FMBB) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
|
||||
{
|
||||
MachineOperand &MO = Cond[1];
|
||||
switch (MO.getImm()) {
|
||||
case OPCODE_IS_ZERO_INT:
|
||||
MO.setImm(OPCODE_IS_NOT_ZERO_INT);
|
||||
break;
|
||||
case OPCODE_IS_NOT_ZERO_INT:
|
||||
MO.setImm(OPCODE_IS_ZERO_INT);
|
||||
break;
|
||||
case OPCODE_IS_ZERO:
|
||||
MO.setImm(OPCODE_IS_NOT_ZERO);
|
||||
break;
|
||||
case OPCODE_IS_NOT_ZERO:
|
||||
MO.setImm(OPCODE_IS_ZERO);
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineOperand &MO2 = Cond[2];
|
||||
switch (MO2.getReg()) {
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
MO2.setReg(AMDGPU::PRED_SEL_ONE);
|
||||
break;
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
MO2.setReg(AMDGPU::PRED_SEL_ZERO);
|
||||
break;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const
|
||||
{
|
||||
return isPredicateSetter(MI->getOpcode());
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2) const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
R600InstrInfo::PredicateInstruction(MachineInstr *MI,
|
||||
const SmallVectorImpl<MachineOperand> &Pred) const
|
||||
{
|
||||
int PIdx = MI->findFirstPredOperandIdx();
|
||||
|
||||
if (PIdx != -1) {
|
||||
MachineOperand &PMO = MI->getOperand(PIdx);
|
||||
PMO.setReg(Pred[2].getReg());
|
||||
MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||
const MachineInstr *MI,
|
||||
unsigned *PredCost) const
|
||||
{
|
||||
if (PredCost)
|
||||
*PredCost = 2;
|
||||
return 2;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction flag getters/setters
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
|
||||
{
|
||||
return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
|
||||
}
|
||||
|
||||
MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
|
||||
{
|
||||
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
|
||||
assert(FlagIndex != 0 &&
|
||||
"Instruction flags not supported for this instruction");
|
||||
MachineOperand &FlagOp = MI->getOperand(FlagIndex);
|
||||
assert(FlagOp.isImm());
|
||||
return FlagOp;
|
||||
}
|
||||
|
||||
void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
|
||||
unsigned Flag) const
|
||||
{
|
||||
MachineOperand &FlagOp = getFlagOp(MI);
|
||||
FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
|
||||
}
|
||||
|
||||
void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
|
||||
unsigned Flag) const
|
||||
{
|
||||
MachineOperand &FlagOp = getFlagOp(MI);
|
||||
unsigned InstFlags = FlagOp.getImm();
|
||||
InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
|
||||
FlagOp.setImm(InstFlags);
|
||||
}
|
||||
|
|
@ -1,132 +0,0 @@
|
|||
//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface definition for R600InstrInfo
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600INSTRUCTIONINFO_H_
|
||||
#define R600INSTRUCTIONINFO_H_
|
||||
|
||||
#include "AMDIL.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "R600RegisterInfo.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class DFAPacketizer;
|
||||
class ScheduleDAG;
|
||||
class MachineFunction;
|
||||
class MachineInstr;
|
||||
class MachineInstrBuilder;
|
||||
|
||||
class R600InstrInfo : public AMDGPUInstrInfo {
|
||||
private:
|
||||
const R600RegisterInfo RI;
|
||||
AMDGPUTargetMachine &TM;
|
||||
|
||||
int getBranchInstr(const MachineOperand &op) const;
|
||||
|
||||
public:
|
||||
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
||||
const R600RegisterInfo &getRegisterInfo() const;
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const;
|
||||
|
||||
bool isTrig(const MachineInstr &MI) const;
|
||||
bool isPlaceHolderOpcode(unsigned opcode) const;
|
||||
bool isReductionOp(unsigned opcode) const;
|
||||
bool isCubeOp(unsigned opcode) const;
|
||||
|
||||
/// isVector - Vector instructions are instructions that must fill all
|
||||
/// instruction slots within an instruction group.
|
||||
bool isVector(const MachineInstr &MI) const;
|
||||
|
||||
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const;
|
||||
|
||||
virtual unsigned getIEQOpcode() const;
|
||||
virtual bool isMov(unsigned Opcode) const;
|
||||
|
||||
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const;
|
||||
|
||||
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
|
||||
|
||||
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
|
||||
|
||||
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
|
||||
|
||||
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
||||
|
||||
bool isPredicated(const MachineInstr *MI) const;
|
||||
|
||||
bool isPredicable(MachineInstr *MI) const;
|
||||
|
||||
bool
|
||||
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
|
||||
const BranchProbability &Probability) const;
|
||||
|
||||
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
|
||||
unsigned ExtraPredCycles,
|
||||
const BranchProbability &Probability) const ;
|
||||
|
||||
bool
|
||||
isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||
unsigned NumTCycles, unsigned ExtraTCycles,
|
||||
MachineBasicBlock &FMBB,
|
||||
unsigned NumFCycles, unsigned ExtraFCycles,
|
||||
const BranchProbability &Probability) const;
|
||||
|
||||
bool DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const;
|
||||
|
||||
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2) const;
|
||||
|
||||
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
|
||||
MachineBasicBlock &FMBB) const;
|
||||
|
||||
bool PredicateInstruction(MachineInstr *MI,
|
||||
const SmallVectorImpl<MachineOperand> &Pred) const;
|
||||
|
||||
int getInstrLatency(const InstrItineraryData *ItinData,
|
||||
const MachineInstr *MI,
|
||||
unsigned *PredCost = 0) const;
|
||||
|
||||
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
||||
SDNode *Node) const { return 1;}
|
||||
|
||||
///hasFlagOperand - Returns true if this instruction has an operand for
|
||||
/// storing target flags.
|
||||
bool hasFlagOperand(const MachineInstr &MI) const;
|
||||
|
||||
///addFlag - Add one of the MO_FLAG* flags to the specified Operand.
|
||||
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
|
||||
|
||||
///isFlagSet - Determine if the specified flag is set on this Operand.
|
||||
bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
|
||||
|
||||
///getFlagOp - Return the operand containing the flags for this instruction.
|
||||
MachineOperand &getFlagOp(MachineInstr *MI) const;
|
||||
|
||||
///clearFlag - Clear the specified flag on the instruction.
|
||||
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // R600INSTRINFO_H_
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,50 +0,0 @@
|
|||
//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 Intrinsic Definitions
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "R600", isTarget = 1 in {
|
||||
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_load_input_perspective :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_constant :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_linear :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_position :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_face :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
||||
let TargetPrefix = "r600", isTarget = 1 in {
|
||||
|
||||
class R600ReadPreloadRegisterIntrinsic<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<name>;
|
||||
|
||||
multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
|
||||
def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
|
||||
def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
|
||||
def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
|
||||
}
|
||||
|
||||
defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
|
||||
"__builtin_r600_read_global_size">;
|
||||
defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
|
||||
"__builtin_r600_read_local_size">;
|
||||
defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
|
||||
"__builtin_r600_read_ngroups">;
|
||||
defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
|
||||
"__builtin_r600_read_tgid">;
|
||||
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
|
||||
"__builtin_r600_read_tidig">;
|
||||
} // End TargetPrefix = "r600"
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
//===-- R600Intrinsics.td - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// TODO: Add full description
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "R600", isTarget = 1 in {
|
||||
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_load_input_perspective :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_constant :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_linear :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_position :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
def int_R600_load_input_face :
|
||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
|
||||
}
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
|
||||
: MachineFunctionInfo(),
|
||||
HasLinearInterpolation(false),
|
||||
HasPerspectiveInterpolation(false)
|
||||
{ }
|
||||
|
||||
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
|
||||
{
|
||||
assert(HasPerspectiveInterpolation);
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
|
||||
{
|
||||
assert(HasLinearInterpolation);
|
||||
if (HasPerspectiveInterpolation)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600MachineFunctionInfo is used for keeping track of which registers have
|
||||
// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600MACHINEFUNCTIONINFO_H
|
||||
#define R600MACHINEFUNCTIONINFO_H
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class R600MachineFunctionInfo : public MachineFunctionInfo {
|
||||
|
||||
public:
|
||||
R600MachineFunctionInfo(const MachineFunction &MF);
|
||||
std::vector<unsigned> ReservedRegs;
|
||||
bool HasLinearInterpolation;
|
||||
bool HasPerspectiveInterpolation;
|
||||
|
||||
unsigned GetIJLinearIndex() const;
|
||||
unsigned GetIJPerspectiveIndex() const;
|
||||
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif //R600MACHINEFUNCTIONINFO_H
|
||||
|
|
@ -1,128 +0,0 @@
|
|||
//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// The file contains the R600 implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "R600RegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "R600MachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
|
||||
const TargetInstrInfo &tii)
|
||||
: AMDGPURegisterInfo(tm, tii),
|
||||
TM(tm),
|
||||
TII(tii)
|
||||
{ }
|
||||
|
||||
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
||||
{
|
||||
BitVector Reserved(getNumRegs());
|
||||
const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
|
||||
|
||||
Reserved.set(AMDGPU::ZERO);
|
||||
Reserved.set(AMDGPU::HALF);
|
||||
Reserved.set(AMDGPU::ONE);
|
||||
Reserved.set(AMDGPU::ONE_INT);
|
||||
Reserved.set(AMDGPU::NEG_HALF);
|
||||
Reserved.set(AMDGPU::NEG_ONE);
|
||||
Reserved.set(AMDGPU::PV_X);
|
||||
Reserved.set(AMDGPU::ALU_LITERAL_X);
|
||||
Reserved.set(AMDGPU::PREDICATE_BIT);
|
||||
Reserved.set(AMDGPU::PRED_SEL_OFF);
|
||||
Reserved.set(AMDGPU::PRED_SEL_ZERO);
|
||||
Reserved.set(AMDGPU::PRED_SEL_ONE);
|
||||
|
||||
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
|
||||
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
}
|
||||
|
||||
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
|
||||
E = MFI->ReservedRegs.end(); I != E; ++I) {
|
||||
Reserved.set(*I);
|
||||
Reserved.set(*(getSuperRegisters(*I)));
|
||||
}
|
||||
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
|
||||
{
|
||||
switch (rc->getID()) {
|
||||
case AMDGPU::GPRF32RegClassID:
|
||||
case AMDGPU::GPRI32RegClassID:
|
||||
return &AMDGPU::R600_Reg32RegClass;
|
||||
default: return rc;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
|
||||
{
|
||||
switch(reg) {
|
||||
case AMDGPU::ZERO: return 248;
|
||||
case AMDGPU::ONE:
|
||||
case AMDGPU::NEG_ONE: return 249;
|
||||
case AMDGPU::ONE_INT: return 250;
|
||||
case AMDGPU::HALF:
|
||||
case AMDGPU::NEG_HALF: return 252;
|
||||
case AMDGPU::ALU_LITERAL_X: return 253;
|
||||
case AMDGPU::PREDICATE_BIT:
|
||||
case AMDGPU::PRED_SEL_OFF:
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
return 0;
|
||||
default: return getHWRegIndexGen(reg);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
|
||||
{
|
||||
switch(reg) {
|
||||
case AMDGPU::ZERO:
|
||||
case AMDGPU::ONE:
|
||||
case AMDGPU::ONE_INT:
|
||||
case AMDGPU::NEG_ONE:
|
||||
case AMDGPU::HALF:
|
||||
case AMDGPU::NEG_HALF:
|
||||
case AMDGPU::ALU_LITERAL_X:
|
||||
case AMDGPU::PREDICATE_BIT:
|
||||
case AMDGPU::PRED_SEL_OFF:
|
||||
case AMDGPU::PRED_SEL_ZERO:
|
||||
case AMDGPU::PRED_SEL_ONE:
|
||||
return 0;
|
||||
default: return getHWRegChanGen(reg);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
|
||||
MVT VT) const
|
||||
{
|
||||
switch(VT.SimpleTy) {
|
||||
default:
|
||||
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const
|
||||
{
|
||||
switch (Channel) {
|
||||
default: assert(!"Invalid channel index"); return 0;
|
||||
case 0: return AMDGPU::sel_x;
|
||||
case 1: return AMDGPU::sel_y;
|
||||
case 2: return AMDGPU::sel_z;
|
||||
case 3: return AMDGPU::sel_w;
|
||||
}
|
||||
}
|
||||
|
||||
#include "R600HwRegInfo.include"
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface definition for R600RegisterInfo
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef R600REGISTERINFO_H_
|
||||
#define R600REGISTERINFO_H_
|
||||
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class R600TargetMachine;
|
||||
class TargetInstrInfo;
|
||||
|
||||
struct R600RegisterInfo : public AMDGPURegisterInfo
|
||||
{
|
||||
AMDGPUTargetMachine &TM;
|
||||
const TargetInstrInfo &TII;
|
||||
|
||||
R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||
|
||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
||||
/// R600 reg class that is equivalent to the given AMDIL reg class.
|
||||
virtual const TargetRegisterClass * getISARegClass(
|
||||
const TargetRegisterClass * rc) const;
|
||||
|
||||
/// getHWRegIndex - get the HW encoding for a register.
|
||||
unsigned getHWRegIndex(unsigned reg) const;
|
||||
|
||||
/// getHWRegChan - get the HW encoding for a register's channel.
|
||||
unsigned getHWRegChan(unsigned reg) const;
|
||||
|
||||
/// getCFGStructurizerRegClass - get the register class of the specified
|
||||
/// type to use in the CFGStructurizer
|
||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
||||
|
||||
/// getSubRegFromChannel - Return the sub reg enum value for the given
|
||||
/// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
|
||||
unsigned getSubRegFromChannel(unsigned Channel) const;
|
||||
|
||||
private:
|
||||
/// getHWRegIndexGen - Generated function returns a register's encoding
|
||||
unsigned getHWRegIndexGen(unsigned reg) const;
|
||||
/// getHWRegChanGen - Generated function returns a register's channel
|
||||
/// encoding.
|
||||
unsigned getHWRegChanGen(unsigned reg) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // AMDIDSAREGISTERINFO_H_
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
|
||||
// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
|
||||
// slot has been removed.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
def ALU_X : FuncUnit;
|
||||
def ALU_Y : FuncUnit;
|
||||
def ALU_Z : FuncUnit;
|
||||
def ALU_W : FuncUnit;
|
||||
def TRANS : FuncUnit;
|
||||
|
||||
def AnyALU : InstrItinClass;
|
||||
def VecALU : InstrItinClass;
|
||||
def TransALU : InstrItinClass;
|
||||
|
||||
def R600_EG_Itin : ProcessorItineraries <
|
||||
[ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
|
||||
[],
|
||||
[
|
||||
InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
|
||||
InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
|
||||
InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
|
||||
InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
|
||||
]
|
||||
>;
|
||||
|
|
@ -1,151 +0,0 @@
|
|||
//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass maps the pseudo interpolation registers to the correct physical
|
||||
// registers. Prior to executing a fragment shader, the GPU loads interpolation
|
||||
// parameters into physical registers. The specific physical register that each
|
||||
// interpolation parameter ends up in depends on the type of the interpolation
|
||||
// parameter as well as how many interpolation parameters are used by the
|
||||
// shader.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDIL.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SIAssignInterpRegsPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
TargetMachine &TM;
|
||||
|
||||
void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
|
||||
unsigned physReg, unsigned virtReg);
|
||||
|
||||
public:
|
||||
SIAssignInterpRegsPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TM(tm) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const { return "SI Assign intrpolation registers"; }
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SIAssignInterpRegsPass::ID = 0;
|
||||
|
||||
#define INTERP_VALUES 16
|
||||
#define REQUIRED_VALUE_MAX_INDEX 7
|
||||
|
||||
struct InterpInfo {
|
||||
bool Enabled;
|
||||
unsigned Regs[3];
|
||||
unsigned RegCount;
|
||||
};
|
||||
|
||||
|
||||
FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
|
||||
return new SIAssignInterpRegsPass(tm);
|
||||
}
|
||||
|
||||
bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
|
||||
{
|
||||
|
||||
struct InterpInfo InterpUse[INTERP_VALUES] = {
|
||||
{false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
|
||||
{false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
|
||||
{false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
|
||||
{false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
|
||||
{false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
|
||||
{false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
|
||||
{false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
|
||||
{false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
|
||||
{false, {AMDGPU::POS_X_FLOAT}, 1},
|
||||
{false, {AMDGPU::POS_Y_FLOAT}, 1},
|
||||
{false, {AMDGPU::POS_Z_FLOAT}, 1},
|
||||
{false, {AMDGPU::POS_W_FLOAT}, 1},
|
||||
{false, {AMDGPU::FRONT_FACE}, 1},
|
||||
{false, {AMDGPU::ANCILLARY}, 1},
|
||||
{false, {AMDGPU::SAMPLE_COVERAGE}, 1},
|
||||
{false, {AMDGPU::POS_FIXED_PT}, 1}
|
||||
};
|
||||
|
||||
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
// This pass is only needed for pixel shaders.
|
||||
if (MFI->ShaderType != ShaderType::PIXEL) {
|
||||
return false;
|
||||
}
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
bool ForceEnable = true;
|
||||
|
||||
// First pass, mark the interpolation values that are used.
|
||||
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
|
||||
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
|
||||
RegIdx++) {
|
||||
InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
|
||||
!MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
|
||||
if (InterpUse[InterpIdx].Enabled &&
|
||||
InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
|
||||
ForceEnable = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// At least one interpolation mode must be enabled or else the GPU will hang.
|
||||
if (ForceEnable) {
|
||||
InterpUse[0].Enabled = true;
|
||||
}
|
||||
|
||||
unsigned UsedVgprs = 0;
|
||||
|
||||
// Second pass, replace with VGPRs.
|
||||
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
|
||||
if (!InterpUse[InterpIdx].Enabled) {
|
||||
continue;
|
||||
}
|
||||
MFI->SPIPSInputAddr |= (1 << InterpIdx);
|
||||
|
||||
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
|
||||
RegIdx++, UsedVgprs++) {
|
||||
unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
|
||||
unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
|
||||
addLiveIn(&MF, MRI, NewReg, VirtReg);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
|
||||
MachineRegisterInfo & MRI,
|
||||
unsigned physReg, unsigned virtReg)
|
||||
{
|
||||
const TargetInstrInfo * TII = TM.getInstrInfo();
|
||||
if (!MRI.isLiveIn(physReg)) {
|
||||
MRI.addLiveIn(physReg, virtReg);
|
||||
MF->front().addLiveIn(physReg);
|
||||
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), virtReg)
|
||||
.addReg(physReg);
|
||||
} else {
|
||||
MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,291 +0,0 @@
|
|||
#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
#
|
||||
# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
|
||||
# it also generates a file called SIHwRegInfo.include, which contains helper
|
||||
# functions for determining the hw encoding of registers.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
my $SGPR_COUNT = 104;
|
||||
my $VGPR_COUNT = 256;
|
||||
|
||||
my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
|
||||
my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
|
||||
|
||||
my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
|
||||
|
||||
print <<STRING;
|
||||
|
||||
let Namespace = "AMDGPU" in {
|
||||
def low : SubRegIndex;
|
||||
def high : SubRegIndex;
|
||||
|
||||
def sub0 : SubRegIndex;
|
||||
def sub1 : SubRegIndex;
|
||||
def sub2 : SubRegIndex;
|
||||
def sub3 : SubRegIndex;
|
||||
def sub4 : SubRegIndex;
|
||||
def sub5 : SubRegIndex;
|
||||
def sub6 : SubRegIndex;
|
||||
def sub7 : SubRegIndex;
|
||||
}
|
||||
|
||||
class SIReg <string n> : Register<n> {
|
||||
let Namespace = "AMDGPU";
|
||||
}
|
||||
|
||||
class SI_64 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [low, high];
|
||||
}
|
||||
|
||||
class SI_128 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
||||
}
|
||||
|
||||
class SI_256 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
||||
let Namespace = "AMDGPU";
|
||||
let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
|
||||
}
|
||||
|
||||
class SGPR_32 <bits<8> num, string name> : SIReg<name> {
|
||||
field bits<8> Num;
|
||||
|
||||
let Num = num;
|
||||
}
|
||||
|
||||
|
||||
class VGPR_32 <bits<9> num, string name> : SIReg<name> {
|
||||
field bits<9> Num;
|
||||
|
||||
let Num = num;
|
||||
}
|
||||
|
||||
class SGPR_64 <bits<8> num, string name, list<Register> subregs> :
|
||||
SI_64 <name, subregs>;
|
||||
|
||||
class VGPR_64 <bits<9> num, string name, list<Register> subregs> :
|
||||
SI_64 <name, subregs>;
|
||||
|
||||
class SGPR_128 <bits<8> num, string name, list<Register> subregs> :
|
||||
SI_128 <name, subregs>;
|
||||
|
||||
class VGPR_128 <bits<9> num, string name, list<Register> subregs> :
|
||||
SI_128 <name, subregs>;
|
||||
|
||||
class SGPR_256 <bits<8> num, string name, list<Register> subregs> :
|
||||
SI_256 <name, subregs>;
|
||||
|
||||
def VCC : SIReg<"VCC">;
|
||||
def EXEC_LO : SIReg<"EXEC LO">;
|
||||
def EXEC_HI : SIReg<"EXEC HI">;
|
||||
def EXEC : SI_64<"EXEC", [EXEC_LO,EXEC_HI]>;
|
||||
def SCC : SIReg<"SCC">;
|
||||
def SREG_LIT_0 : SIReg <"S LIT 0">;
|
||||
def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">;
|
||||
|
||||
def M0 : SIReg <"M0">;
|
||||
|
||||
//Interpolation registers
|
||||
|
||||
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
|
||||
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
|
||||
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
|
||||
def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
|
||||
def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
|
||||
def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
|
||||
def PERSP_I_W : SIReg <"PERSP_I_W">;
|
||||
def PERSP_J_W : SIReg <"PERSP_J_W">;
|
||||
def PERSP_1_W : SIReg <"PERSP_1_W">;
|
||||
def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
|
||||
def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
|
||||
def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
|
||||
def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
|
||||
def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
|
||||
def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
|
||||
def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
|
||||
def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
|
||||
def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
|
||||
def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
|
||||
def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
|
||||
def FRONT_FACE : SIReg <"FRONT_FACE">;
|
||||
def ANCILLARY : SIReg <"ANCILLARY">;
|
||||
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
|
||||
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
|
||||
|
||||
STRING
|
||||
|
||||
#32 bit register
|
||||
|
||||
my @SGPR;
|
||||
for (my $i = 0; $i < $SGPR_COUNT; $i++) {
|
||||
print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
|
||||
$SGPR[$i] = "SGPR$i";
|
||||
}
|
||||
|
||||
my @VGPR;
|
||||
for (my $i = 0; $i < $VGPR_COUNT; $i++) {
|
||||
print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
|
||||
$VGPR[$i] = "VGPR$i";
|
||||
}
|
||||
|
||||
print <<STRING;
|
||||
|
||||
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
|
||||
>;
|
||||
|
||||
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
|
||||
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
|
||||
PERSP_CENTER_I, PERSP_CENTER_J,
|
||||
PERSP_CENTROID_I, PERSP_CENTROID_J,
|
||||
PERSP_I_W, PERSP_J_W, PERSP_1_W,
|
||||
LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
|
||||
LINEAR_CENTER_I, LINEAR_CENTER_J,
|
||||
LINEAR_CENTROID_I, LINEAR_CENTROID_J,
|
||||
LINE_STIPPLE_TEX_COORD,
|
||||
POS_X_FLOAT,
|
||||
POS_Y_FLOAT,
|
||||
POS_Z_FLOAT,
|
||||
POS_W_FLOAT,
|
||||
FRONT_FACE,
|
||||
ANCILLARY,
|
||||
SAMPLE_COVERAGE,
|
||||
POS_FIXED_PT
|
||||
)
|
||||
>;
|
||||
|
||||
def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
||||
(add VReg_32, SReg_32)
|
||||
>;
|
||||
|
||||
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
|
||||
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
|
||||
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
|
||||
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
|
||||
|
||||
|
||||
STRING
|
||||
|
||||
my @subregs_64 = ('low', 'high');
|
||||
my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
|
||||
my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
|
||||
|
||||
my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
|
||||
my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
|
||||
my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
|
||||
|
||||
my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
|
||||
my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
|
||||
|
||||
|
||||
my $sgpr64_list = join(',', @SGPR64);
|
||||
my $vgpr64_list = join(',', @VGPR64);
|
||||
print <<STRING;
|
||||
|
||||
def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
|
||||
(add $sgpr64_list, $vgpr64_list)
|
||||
>;
|
||||
|
||||
STRING
|
||||
|
||||
if ($INDEX_FILE ne '') {
|
||||
open(my $fh, ">", $INDEX_FILE);
|
||||
my %hw_values;
|
||||
|
||||
for (my $i = 0; $i <= $#SGPR; $i++) {
|
||||
push (@{$hw_values{$i}}, $SGPR[$i]);
|
||||
}
|
||||
|
||||
for (my $i = 0; $i <= $#SGPR64; $i++) {
|
||||
push (@{$hw_values{$i * 2}}, $SGPR64[$i])
|
||||
}
|
||||
|
||||
for (my $i = 0; $i <= $#SGPR128; $i++) {
|
||||
push (@{$hw_values{$i * 4}}, $SGPR128[$i]);
|
||||
}
|
||||
|
||||
for (my $i = 0; $i <= $#SGPR256; $i++) {
|
||||
push (@{$hw_values{$i * 8}}, $SGPR256[$i]);
|
||||
}
|
||||
|
||||
for (my $i = 0; $i <= $#VGPR; $i++) {
|
||||
push (@{$hw_values{$i}}, $VGPR[$i]);
|
||||
}
|
||||
for (my $i = 0; $i <= $#VGPR64; $i++) {
|
||||
push (@{$hw_values{$i * 2}}, $VGPR64[$i]);
|
||||
}
|
||||
|
||||
for (my $i = 0; $i <= $#VGPR128; $i++) {
|
||||
push (@{$hw_values{$i * 4}}, $VGPR128[$i]);
|
||||
}
|
||||
|
||||
|
||||
print $fh "unsigned SIRegisterInfo::getHWRegNum(unsigned reg) const\n{\n switch(reg) {\n";
|
||||
for my $key (keys(%hw_values)) {
|
||||
my @names = @{$hw_values{$key}};
|
||||
for my $regname (@names) {
|
||||
print $fh " case AMDGPU::$regname:\n"
|
||||
}
|
||||
print $fh " return $key;\n";
|
||||
}
|
||||
print $fh " default: assert(!\"Unknown Register\"); return 0;\n }\n}\n"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
sub print_sgpr_class {
|
||||
my ($reg_width, $sub_reg_ref, @types) = @_;
|
||||
return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
|
||||
}
|
||||
|
||||
sub print_vgpr_class {
|
||||
my ($reg_width, $sub_reg_ref, @types) = @_;
|
||||
return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
|
||||
}
|
||||
|
||||
sub print_reg_class {
|
||||
my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
|
||||
my @registers;
|
||||
my $component_count = $reg_width / 32;
|
||||
|
||||
for (my $i = 0; $i < $reg_count; $i += $component_count) {
|
||||
my $reg_name = $reg_prefix . $i . '_' . $reg_width;
|
||||
my @sub_regs;
|
||||
for (my $idx = 0; $idx < $component_count; $idx++) {
|
||||
my $sub_idx = $i + $idx;
|
||||
push(@sub_regs, $reg_prefix . $sub_idx);
|
||||
}
|
||||
print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
|
||||
push (@registers, $reg_name);
|
||||
}
|
||||
|
||||
#Add VCC to SReg_64
|
||||
if ($class_prefix eq 'SReg' and $reg_width == 64) {
|
||||
push (@registers, 'VCC')
|
||||
}
|
||||
|
||||
#Add EXEC to SReg_64
|
||||
if ($class_prefix eq 'SReg' and $reg_width == 64) {
|
||||
push (@registers, 'EXEC')
|
||||
}
|
||||
|
||||
my $reg_list = join(', ', @registers);
|
||||
|
||||
print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
|
||||
print " let SubRegClasses = [($class_prefix\_", ($reg_width / $component_count) , ' ', join(', ', @{$sub_reg_ref}), ")];\n}\n";
|
||||
return @registers;
|
||||
}
|
||||
|
|
@ -1,466 +0,0 @@
|
|||
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
|
||||
// mostly EmitInstrWithCustomInserter().
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "SIISelLowering.h"
|
||||
#include "AMDIL.h"
|
||||
#include "AMDILIntrinsicInfo.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
||||
AMDGPUTargetLowering(TM),
|
||||
TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
|
||||
{
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
||||
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
|
||||
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
|
||||
addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
|
||||
addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
|
||||
addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
|
||||
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
|
||||
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
setOperationAction(ISD::AND, MVT::i1, Custom);
|
||||
|
||||
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
||||
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
||||
|
||||
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
||||
|
||||
// We need to custom lower loads from the USER_SGPR address space, so we can
|
||||
// add the SGPRs as livein registers.
|
||||
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
||||
setOperationAction(ISD::LOAD, MVT::i64, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
||||
setTargetDAGCombine(ISD::SELECT_CC);
|
||||
|
||||
setTargetDAGCombine(ISD::SETCC);
|
||||
}
|
||||
|
||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
||||
MachineInstr * MI, MachineBasicBlock * BB) const
|
||||
{
|
||||
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
|
||||
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
|
||||
MachineBasicBlock::iterator I = MI;
|
||||
|
||||
if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
|
||||
AppendS_WAITCNT(MI, *BB, llvm::next(I));
|
||||
return BB;
|
||||
}
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
||||
case AMDGPU::BRANCH: return BB;
|
||||
case AMDGPU::CLAMP_SI:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
// VSRC1-2 are unused, but we still need to fill all the
|
||||
// operand slots, so we just reuse the VSRC0 operand
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(0) // ABS
|
||||
.addImm(1) // CLAMP
|
||||
.addImm(0) // OMOD
|
||||
.addImm(0); // NEG
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
|
||||
case AMDGPU::FABS_SI:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
// VSRC1-2 are unused, but we still need to fill all the
|
||||
// operand slots, so we just reuse the VSRC0 operand
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(1) // ABS
|
||||
.addImm(0) // CLAMP
|
||||
.addImm(0) // OMOD
|
||||
.addImm(0); // NEG
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
|
||||
case AMDGPU::FNEG_SI:
|
||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(1))
|
||||
// VSRC1-2 are unused, but we still need to fill all the
|
||||
// operand slots, so we just reuse the VSRC0 operand
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addOperand(MI->getOperand(1))
|
||||
.addImm(0) // ABS
|
||||
.addImm(0) // CLAMP
|
||||
.addImm(0) // OMOD
|
||||
.addImm(1); // NEG
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
case AMDGPU::SHADER_TYPE:
|
||||
BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
|
||||
MI->getOperand(0).getImm();
|
||||
MI->eraseFromParent();
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_INTERP:
|
||||
LowerSI_INTERP(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_INTERP_CONST:
|
||||
LowerSI_INTERP_CONST(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_KIL:
|
||||
LowerSI_KIL(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_WQM:
|
||||
LowerSI_WQM(MI, *BB, I, MRI);
|
||||
break;
|
||||
case AMDGPU::SI_V_CNDLT:
|
||||
LowerSI_V_CNDLT(MI, *BB, I, MRI);
|
||||
break;
|
||||
}
|
||||
return BB;
|
||||
}
|
||||
|
||||
void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I) const
|
||||
{
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
|
||||
.addImm(0);
|
||||
}
|
||||
|
||||
|
||||
void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||
{
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||
{
|
||||
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
||||
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
|
||||
MachineOperand dst = MI->getOperand(0);
|
||||
MachineOperand iReg = MI->getOperand(1);
|
||||
MachineOperand jReg = MI->getOperand(2);
|
||||
MachineOperand attr_chan = MI->getOperand(3);
|
||||
MachineOperand attr = MI->getOperand(4);
|
||||
MachineOperand params = MI->getOperand(5);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
|
||||
.addOperand(params);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
|
||||
.addOperand(iReg)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr)
|
||||
.addReg(M0);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
|
||||
.addOperand(dst)
|
||||
.addReg(tmp)
|
||||
.addOperand(jReg)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr)
|
||||
.addReg(M0);
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
|
||||
MachineBasicBlock &BB, MachineBasicBlock::iterator I,
|
||||
MachineRegisterInfo &MRI) const
|
||||
{
|
||||
MachineOperand dst = MI->getOperand(0);
|
||||
MachineOperand attr_chan = MI->getOperand(1);
|
||||
MachineOperand attr = MI->getOperand(2);
|
||||
MachineOperand params = MI->getOperand(3);
|
||||
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
|
||||
.addOperand(params);
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
|
||||
.addOperand(dst)
|
||||
.addOperand(attr_chan)
|
||||
.addOperand(attr)
|
||||
.addReg(M0);
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||
{
|
||||
// Clear this pixel from the exec mask if the operand is negative
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
|
||||
AMDGPU::VCC)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addOperand(MI->getOperand(0));
|
||||
|
||||
// If the exec mask is non-zero, skip the next two instructions
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addImm(3)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
// Exec mask is zero: Export to NULL target...
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
|
||||
.addImm(0)
|
||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
||||
.addImm(0)
|
||||
.addImm(1)
|
||||
.addImm(1)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addReg(AMDGPU::SREG_LIT_0);
|
||||
|
||||
// ... and terminate wavefront
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
||||
{
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32),
|
||||
AMDGPU::VCC)
|
||||
.addReg(AMDGPU::SREG_LIT_0)
|
||||
.addOperand(MI->getOperand(1));
|
||||
|
||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
|
||||
.addOperand(MI->getOperand(0))
|
||||
.addOperand(MI->getOperand(3))
|
||||
.addOperand(MI->getOperand(2))
|
||||
.addReg(AMDGPU::VCC);
|
||||
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
|
||||
EVT SITargetLowering::getSetCCResultType(EVT VT) const
|
||||
{
|
||||
return MVT::i1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom DAG Lowering Operations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
switch (Op.getOpcode()) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
|
||||
case ISD::LOAD: return LowerLOAD(Op, DAG);
|
||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
||||
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
unsigned IntrinsicID =
|
||||
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
||||
EVT VT = Op.getValueType();
|
||||
switch (IntrinsicID) {
|
||||
case AMDGPUIntrinsic::SI_vs_load_buffer_index:
|
||||
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
|
||||
AMDGPU::VGPR0, VT);
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// Loweri1ContextSwitch - The function is for lowering i1 operations on the
|
||||
/// VCC register. In the VALU context, VCC is a one bit register, but in the
|
||||
/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
|
||||
/// the SALU can perform operations on the VCC register, we need to promote
|
||||
/// the operand types from i1 to i64 in order for tablegen to be able to match
|
||||
/// this operation to the correct SALU instruction. We do this promotion by
|
||||
/// wrapping the operands in a CopyToReg node.
|
||||
///
|
||||
SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
|
||||
SelectionDAG &DAG,
|
||||
unsigned VCCNode) const
|
||||
{
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
|
||||
SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
|
||||
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
|
||||
Op.getOperand(0)),
|
||||
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
|
||||
Op.getOperand(1)));
|
||||
|
||||
return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
SDValue CC = Op.getOperand(1);
|
||||
SDValue LHS = Op.getOperand(2);
|
||||
SDValue RHS = Op.getOperand(3);
|
||||
SDValue JumpT = Op.getOperand(4);
|
||||
SDValue CmpValue;
|
||||
SDValue Result;
|
||||
CmpValue = DAG.getNode(
|
||||
ISD::SETCC,
|
||||
Op.getDebugLoc(),
|
||||
MVT::i1,
|
||||
LHS, RHS,
|
||||
CC);
|
||||
|
||||
Result = DAG.getNode(
|
||||
AMDGPUISD::BRANCH_COND,
|
||||
CmpValue.getDebugLoc(),
|
||||
MVT::Other, Chain,
|
||||
JumpT, CmpValue);
|
||||
return Result;
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
EVT VT = Op.getValueType();
|
||||
LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
|
||||
|
||||
assert(Ptr);
|
||||
|
||||
unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
|
||||
|
||||
// We only need to lower USER_SGPR address space loads
|
||||
if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// Loads from the USER_SGPR address space can only have constant value
|
||||
// pointers.
|
||||
ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
|
||||
assert(BasePtr);
|
||||
|
||||
unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
|
||||
const TargetRegisterClass * dstClass;
|
||||
switch (TypeDwordWidth) {
|
||||
default:
|
||||
assert(!"USER_SGPR value size not implemented");
|
||||
return SDValue();
|
||||
case 1:
|
||||
dstClass = &AMDGPU::SReg_32RegClass;
|
||||
break;
|
||||
case 2:
|
||||
dstClass = &AMDGPU::SReg_64RegClass;
|
||||
break;
|
||||
}
|
||||
uint64_t Index = BasePtr->getZExtValue();
|
||||
assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
|
||||
unsigned SGPRIndex = Index / TypeDwordWidth;
|
||||
unsigned Reg = dstClass->getRegister(SGPRIndex);
|
||||
|
||||
DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
|
||||
VT));
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
||||
{
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
SDValue True = Op.getOperand(2);
|
||||
SDValue False = Op.getOperand(3);
|
||||
SDValue CC = Op.getOperand(4);
|
||||
EVT VT = Op.getValueType();
|
||||
DebugLoc DL = Op.getDebugLoc();
|
||||
|
||||
SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
|
||||
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom DAG optimizations
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::SELECT_CC: {
|
||||
N->dump();
|
||||
ConstantSDNode *True, *False;
|
||||
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
|
||||
if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
|
||||
&& (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
|
||||
&& True->isAllOnesValue()
|
||||
&& False->isNullValue()
|
||||
&& VT == MVT::i1) {
|
||||
return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
|
||||
N->getOperand(1), N->getOperand(4));
|
||||
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::SETCC: {
|
||||
SDValue Arg0 = N->getOperand(0);
|
||||
SDValue Arg1 = N->getOperand(1);
|
||||
SDValue CC = N->getOperand(2);
|
||||
ConstantSDNode * C = NULL;
|
||||
ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
|
||||
|
||||
// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
|
||||
if (VT == MVT::i1
|
||||
&& Arg0.getOpcode() == ISD::SIGN_EXTEND
|
||||
&& Arg0.getOperand(0).getValueType() == MVT::i1
|
||||
&& (C = dyn_cast<ConstantSDNode>(Arg1))
|
||||
&& C->isNullValue()
|
||||
&& CCOp == ISD::SETNE) {
|
||||
return SimplifySetCC(VT, Arg0.getOperand(0),
|
||||
DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
#define NODE_NAME_CASE(node) case SIISD::node: return #node;
|
||||
|
||||
const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const
|
||||
{
|
||||
switch (Opcode) {
|
||||
default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
|
||||
NODE_NAME_CASE(VCC_AND)
|
||||
NODE_NAME_CASE(VCC_BITCAST)
|
||||
}
|
||||
}
|
||||
|
|
@ -1,63 +0,0 @@
|
|||
//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI DAG Lowering interface definition
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef SIISELLOWERING_H
|
||||
#define SIISELLOWERING_H
|
||||
|
||||
#include "AMDGPUISelLowering.h"
|
||||
#include "SIInstrInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class SITargetLowering : public AMDGPUTargetLowering
|
||||
{
|
||||
const SIInstrInfo * TII;
|
||||
|
||||
/// AppendS_WAITCNT - Memory reads and writes are syncronized using the
|
||||
/// S_WAITCNT instruction. This function takes the most conservative
|
||||
/// approach and inserts an S_WAITCNT instruction after every read and
|
||||
/// write.
|
||||
void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I) const;
|
||||
void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, unsigned Opocde) const;
|
||||
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
|
||||
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
||||
|
||||
SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned VCCNode) const;
|
||||
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
public:
|
||||
SITargetLowering(TargetMachine &tm);
|
||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
|
||||
MachineBasicBlock * BB) const;
|
||||
virtual EVT getSetCCResultType(EVT VT) const;
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
virtual const char* getTargetNodeName(unsigned Opcode) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif //SIISELLOWERING_H
|
||||
|
|
@ -1,131 +0,0 @@
|
|||
//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI Instruction format definitions.
|
||||
//
|
||||
// Instructions with _32 take 32-bit operands.
|
||||
// Instructions with _64 take 64-bit operands.
|
||||
//
|
||||
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
|
||||
// encoding is the standard encoding, but instruction that make use of
|
||||
// any of the instruction modifiers must use the 64-bit encoding.
|
||||
//
|
||||
// Instructions with _e32 use the 32-bit encoding.
|
||||
// Instructions with _e64 use the 64-bit encoding.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
|
||||
: VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
||||
|
||||
class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
|
||||
: VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
||||
|
||||
|
||||
class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
|
||||
: SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
|
||||
|
||||
class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
|
||||
: SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
|
||||
|
||||
class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
||||
|
||||
class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
||||
|
||||
class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
||||
|
||||
class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOP1 <
|
||||
op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
|
||||
>;
|
||||
|
||||
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
|
||||
def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||
def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_64 <
|
||||
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOP2 <
|
||||
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
|
||||
>;
|
||||
|
||||
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
|
||||
def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_64 <
|
||||
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
|
||||
: SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
|
||||
|
||||
class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
|
||||
: SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
|
||||
|
||||
class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
||||
string opName, list<dag> pattern> :
|
||||
VOPC <
|
||||
op, (ins arc:$src0, vrc:$src1), opName, pattern
|
||||
>;
|
||||
|
||||
multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_32 <
|
||||
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
|
||||
|
||||
def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
||||
|
||||
def _e64 : VOP3_64 <
|
||||
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
||||
opName, []
|
||||
>;
|
||||
}
|
||||
|
||||
class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
||||
|
||||
class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
|
||||
: SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
||||
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI Implementation of TargetInstrInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "SIInstrInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/MC/MCInstrDesc.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
|
||||
: AMDGPUInstrInfo(tm),
|
||||
RI(tm, *this),
|
||||
TM(tm)
|
||||
{ }
|
||||
|
||||
const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
|
||||
{
|
||||
return RI;
|
||||
}
|
||||
|
||||
void
|
||||
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const
|
||||
{
|
||||
|
||||
// If we are trying to copy to or from SCC, there is a bug somewhere else in
|
||||
// the backend. While it may be theoretically possible to do this, it should
|
||||
// never be necessary.
|
||||
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
|
||||
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
}
|
||||
|
||||
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const
|
||||
{
|
||||
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
|
||||
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
||||
MachineInstrBuilder(MI).addImm(Imm);
|
||||
|
||||
return MI;
|
||||
|
||||
}
|
||||
|
||||
bool SIInstrInfo::isMov(unsigned Opcode) const
|
||||
{
|
||||
switch(Opcode) {
|
||||
default: return false;
|
||||
case AMDGPU::S_MOV_B32:
|
||||
case AMDGPU::S_MOV_B64:
|
||||
case AMDGPU::V_MOV_B32_e32:
|
||||
case AMDGPU::V_MOV_B32_e64:
|
||||
case AMDGPU::V_MOV_IMM_F32:
|
||||
case AMDGPU::V_MOV_IMM_I32:
|
||||
case AMDGPU::S_MOV_IMM_I32:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface definition for SIInstrInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef SIINSTRINFO_H
|
||||
#define SIINSTRINFO_H
|
||||
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class SIInstrInfo : public AMDGPUInstrInfo {
|
||||
private:
|
||||
const SIRegisterInfo RI;
|
||||
AMDGPUTargetMachine &TM;
|
||||
|
||||
public:
|
||||
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
|
||||
|
||||
const SIRegisterInfo &getRegisterInfo() const;
|
||||
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const;
|
||||
|
||||
/// getEncodingType - Returns the encoding type of this instruction.
|
||||
unsigned getEncodingType(const MachineInstr &MI) const;
|
||||
|
||||
/// getEncodingBytes - Returns the size of this instructions encoding in
|
||||
/// number of bytes.
|
||||
unsigned getEncodingBytes(const MachineInstr &MI) const;
|
||||
|
||||
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
||||
int64_t Imm) const;
|
||||
|
||||
virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
|
||||
virtual bool isMov(unsigned Opcode) const;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
namespace SIInstrFlags {
|
||||
enum Flags {
|
||||
// First 4 bits are the instruction encoding
|
||||
NEED_WAIT = 1 << 4
|
||||
};
|
||||
}
|
||||
|
||||
#endif //SIINSTRINFO_H
|
||||
|
|
@ -1,506 +0,0 @@
|
|||
//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI DAG Profiles
|
||||
//===----------------------------------------------------------------------===//
|
||||
def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
|
||||
SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SI DAG Nodes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// and operation on 64-bit wide vcc
|
||||
def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
|
||||
[SDNPCommutative, SDNPAssociative]
|
||||
>;
|
||||
|
||||
// Special bitcast node for sharing VCC register between VALU and SALU
|
||||
def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
|
||||
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
|
||||
>;
|
||||
|
||||
class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
AMDGPUInst<outs, ins, asm, pattern> {
|
||||
|
||||
field bits<4> EncodingType = 0;
|
||||
field bits<1> NeedWait = 0;
|
||||
|
||||
let TSFlags{3-0} = EncodingType;
|
||||
let TSFlags{4} = NeedWait;
|
||||
|
||||
}
|
||||
|
||||
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
|
||||
field bits<32> Inst;
|
||||
}
|
||||
|
||||
class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
InstSI <outs, ins, asm, pattern> {
|
||||
|
||||
field bits<64> Inst;
|
||||
}
|
||||
|
||||
class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
|
||||
let EncoderMethod = "encodeOperand";
|
||||
let MIOperandInfo = opInfo;
|
||||
}
|
||||
|
||||
def IMM16bit : ImmLeaf <
|
||||
i16,
|
||||
[{return isInt<16>(Imm);}]
|
||||
>;
|
||||
|
||||
def IMM8bit : ImmLeaf <
|
||||
i32,
|
||||
[{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
|
||||
>;
|
||||
|
||||
def IMM12bit : ImmLeaf <
|
||||
i16,
|
||||
[{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
|
||||
>;
|
||||
|
||||
def IMM32bitIn64bit : ImmLeaf <
|
||||
i64,
|
||||
[{return isInt<32>(Imm);}]
|
||||
>;
|
||||
|
||||
class GPR4Align <RegisterClass rc> : Operand <vAny> {
|
||||
let EncoderMethod = "GPR4AlignEncode";
|
||||
let MIOperandInfo = (ops rc:$reg);
|
||||
}
|
||||
|
||||
class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
|
||||
let EncoderMethod = "GPR2AlignEncode";
|
||||
let MIOperandInfo = (ops rc:$reg);
|
||||
}
|
||||
|
||||
def SMRDmemrr : Operand<iPTR> {
|
||||
let MIOperandInfo = (ops SReg_64, SReg_32);
|
||||
let EncoderMethod = "GPR2AlignEncode";
|
||||
}
|
||||
|
||||
def SMRDmemri : Operand<iPTR> {
|
||||
let MIOperandInfo = (ops SReg_64, i32imm);
|
||||
let EncoderMethod = "SMRDmemriEncode";
|
||||
}
|
||||
|
||||
def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
|
||||
def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
def EXP : Enc64<
|
||||
(outs),
|
||||
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
|
||||
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
|
||||
"EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
|
||||
[] > {
|
||||
|
||||
bits<4> EN;
|
||||
bits<6> TGT;
|
||||
bits<1> COMPR;
|
||||
bits<1> DONE;
|
||||
bits<1> VM;
|
||||
bits<8> VSRC0;
|
||||
bits<8> VSRC1;
|
||||
bits<8> VSRC2;
|
||||
bits<8> VSRC3;
|
||||
|
||||
let Inst{3-0} = EN;
|
||||
let Inst{9-4} = TGT;
|
||||
let Inst{10} = COMPR;
|
||||
let Inst{11} = DONE;
|
||||
let Inst{12} = VM;
|
||||
let Inst{31-26} = 0x3e;
|
||||
let Inst{39-32} = VSRC0;
|
||||
let Inst{47-40} = VSRC1;
|
||||
let Inst{55-48} = VSRC2;
|
||||
let Inst{63-56} = VSRC3;
|
||||
let EncodingType = 0; //SIInstrEncodingType::EXP
|
||||
|
||||
let NeedWait = 1;
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
|
||||
class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc64 <outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> VDATA;
|
||||
bits<4> DMASK;
|
||||
bits<1> UNORM;
|
||||
bits<1> GLC;
|
||||
bits<1> DA;
|
||||
bits<1> R128;
|
||||
bits<1> TFE;
|
||||
bits<1> LWE;
|
||||
bits<1> SLC;
|
||||
bits<8> VADDR;
|
||||
bits<5> SRSRC;
|
||||
bits<5> SSAMP;
|
||||
|
||||
let Inst{11-8} = DMASK;
|
||||
let Inst{12} = UNORM;
|
||||
let Inst{13} = GLC;
|
||||
let Inst{14} = DA;
|
||||
let Inst{15} = R128;
|
||||
let Inst{16} = TFE;
|
||||
let Inst{17} = LWE;
|
||||
let Inst{24-18} = op;
|
||||
let Inst{25} = SLC;
|
||||
let Inst{31-26} = 0x3c;
|
||||
let Inst{39-32} = VADDR;
|
||||
let Inst{47-40} = VDATA;
|
||||
let Inst{52-48} = SRSRC;
|
||||
let Inst{57-53} = SSAMP;
|
||||
|
||||
let EncodingType = 2; //SIInstrEncodingType::MIMG
|
||||
|
||||
let NeedWait = 1;
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
|
||||
class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc64<outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> VDATA;
|
||||
bits<12> OFFSET;
|
||||
bits<1> OFFEN;
|
||||
bits<1> IDXEN;
|
||||
bits<1> GLC;
|
||||
bits<1> ADDR64;
|
||||
bits<4> DFMT;
|
||||
bits<3> NFMT;
|
||||
bits<8> VADDR;
|
||||
bits<5> SRSRC;
|
||||
bits<1> SLC;
|
||||
bits<1> TFE;
|
||||
bits<8> SOFFSET;
|
||||
|
||||
let Inst{11-0} = OFFSET;
|
||||
let Inst{12} = OFFEN;
|
||||
let Inst{13} = IDXEN;
|
||||
let Inst{14} = GLC;
|
||||
let Inst{15} = ADDR64;
|
||||
let Inst{18-16} = op;
|
||||
let Inst{22-19} = DFMT;
|
||||
let Inst{25-23} = NFMT;
|
||||
let Inst{31-26} = 0x3a; //encoding
|
||||
let Inst{39-32} = VADDR;
|
||||
let Inst{47-40} = VDATA;
|
||||
let Inst{52-48} = SRSRC;
|
||||
let Inst{54} = SLC;
|
||||
let Inst{55} = TFE;
|
||||
let Inst{63-56} = SOFFSET;
|
||||
let EncodingType = 3; //SIInstrEncodingType::MTBUF
|
||||
|
||||
let NeedWait = 1;
|
||||
let usesCustomInserter = 1;
|
||||
let neverHasSideEffects = 1;
|
||||
}
|
||||
|
||||
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc64<outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> VDATA;
|
||||
bits<12> OFFSET;
|
||||
bits<1> OFFEN;
|
||||
bits<1> IDXEN;
|
||||
bits<1> GLC;
|
||||
bits<1> ADDR64;
|
||||
bits<1> LDS;
|
||||
bits<8> VADDR;
|
||||
bits<5> SRSRC;
|
||||
bits<1> SLC;
|
||||
bits<1> TFE;
|
||||
bits<8> SOFFSET;
|
||||
|
||||
let Inst{11-0} = OFFSET;
|
||||
let Inst{12} = OFFEN;
|
||||
let Inst{13} = IDXEN;
|
||||
let Inst{14} = GLC;
|
||||
let Inst{15} = ADDR64;
|
||||
let Inst{16} = LDS;
|
||||
let Inst{24-18} = op;
|
||||
let Inst{31-26} = 0x38; //encoding
|
||||
let Inst{39-32} = VADDR;
|
||||
let Inst{47-40} = VDATA;
|
||||
let Inst{52-48} = SRSRC;
|
||||
let Inst{54} = SLC;
|
||||
let Inst{55} = TFE;
|
||||
let Inst{63-56} = SOFFSET;
|
||||
let EncodingType = 4; //SIInstrEncodingType::MUBUF
|
||||
|
||||
let NeedWait = 1;
|
||||
let usesCustomInserter = 1;
|
||||
let neverHasSideEffects = 1;
|
||||
}
|
||||
} // End Uses = [EXEC]
|
||||
|
||||
class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32<outs, ins, asm, pattern> {
|
||||
|
||||
bits<7> SDST;
|
||||
bits<15> PTR;
|
||||
bits<8> OFFSET = PTR{7-0};
|
||||
bits<1> IMM = PTR{8};
|
||||
bits<6> SBASE = PTR{14-9};
|
||||
|
||||
let Inst{7-0} = OFFSET;
|
||||
let Inst{8} = IMM;
|
||||
let Inst{14-9} = SBASE;
|
||||
let Inst{21-15} = SDST;
|
||||
let Inst{26-22} = op;
|
||||
let Inst{31-27} = 0x18; //encoding
|
||||
let EncodingType = 5; //SIInstrEncodingType::SMRD
|
||||
|
||||
let NeedWait = 1;
|
||||
let usesCustomInserter = 1;
|
||||
}
|
||||
|
||||
class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32<outs, ins, asm, pattern> {
|
||||
|
||||
bits<7> SDST;
|
||||
bits<8> SSRC0;
|
||||
|
||||
let Inst{7-0} = SSRC0;
|
||||
let Inst{15-8} = op;
|
||||
let Inst{22-16} = SDST;
|
||||
let Inst{31-23} = 0x17d; //encoding;
|
||||
let EncodingType = 6; //SIInstrEncodingType::SOP1
|
||||
}
|
||||
|
||||
class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32 <outs, ins, asm, pattern> {
|
||||
|
||||
bits<7> SDST;
|
||||
bits<8> SSRC0;
|
||||
bits<8> SSRC1;
|
||||
|
||||
let Inst{7-0} = SSRC0;
|
||||
let Inst{15-8} = SSRC1;
|
||||
let Inst{22-16} = SDST;
|
||||
let Inst{29-23} = op;
|
||||
let Inst{31-30} = 0x2; // encoding
|
||||
let EncodingType = 7; // SIInstrEncodingType::SOP2
|
||||
}
|
||||
|
||||
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32<outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> SSRC0;
|
||||
bits<8> SSRC1;
|
||||
|
||||
let Inst{7-0} = SSRC0;
|
||||
let Inst{15-8} = SSRC1;
|
||||
let Inst{22-16} = op;
|
||||
let Inst{31-23} = 0x17e;
|
||||
let EncodingType = 8; // SIInstrEncodingType::SOPC
|
||||
|
||||
let DisableEncoding = "$dst";
|
||||
}
|
||||
|
||||
class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32 <outs, ins , asm, pattern> {
|
||||
|
||||
bits <7> SDST;
|
||||
bits <16> SIMM16;
|
||||
|
||||
let Inst{15-0} = SIMM16;
|
||||
let Inst{22-16} = SDST;
|
||||
let Inst{27-23} = op;
|
||||
let Inst{31-28} = 0xb; //encoding
|
||||
let EncodingType = 9; // SIInstrEncodingType::SOPK
|
||||
}
|
||||
|
||||
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
|
||||
(outs),
|
||||
ins,
|
||||
asm,
|
||||
pattern > {
|
||||
|
||||
bits <16> SIMM16;
|
||||
|
||||
let Inst{15-0} = SIMM16;
|
||||
let Inst{22-16} = op;
|
||||
let Inst{31-23} = 0x17f; // encoding
|
||||
let EncodingType = 10; // SIInstrEncodingType::SOPP
|
||||
}
|
||||
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32 <outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> VDST;
|
||||
bits<8> VSRC;
|
||||
bits<2> ATTRCHAN;
|
||||
bits<6> ATTR;
|
||||
|
||||
let Inst{7-0} = VSRC;
|
||||
let Inst{9-8} = ATTRCHAN;
|
||||
let Inst{15-10} = ATTR;
|
||||
let Inst{17-16} = op;
|
||||
let Inst{25-18} = VDST;
|
||||
let Inst{31-26} = 0x32; // encoding
|
||||
let EncodingType = 11; // SIInstrEncodingType::VINTRP
|
||||
|
||||
let neverHasSideEffects = 1;
|
||||
}
|
||||
|
||||
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32 <outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> VDST;
|
||||
bits<9> SRC0;
|
||||
|
||||
let Inst{8-0} = SRC0;
|
||||
let Inst{16-9} = op;
|
||||
let Inst{24-17} = VDST;
|
||||
let Inst{31-25} = 0x3f; //encoding
|
||||
|
||||
let EncodingType = 12; // SIInstrEncodingType::VOP1
|
||||
let PostEncoderMethod = "VOPPostEncode";
|
||||
}
|
||||
|
||||
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32 <outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> VDST;
|
||||
bits<9> SRC0;
|
||||
bits<8> VSRC1;
|
||||
|
||||
let Inst{8-0} = SRC0;
|
||||
let Inst{16-9} = VSRC1;
|
||||
let Inst{24-17} = VDST;
|
||||
let Inst{30-25} = op;
|
||||
let Inst{31} = 0x0; //encoding
|
||||
|
||||
let EncodingType = 13; // SIInstrEncodingType::VOP2
|
||||
let PostEncoderMethod = "VOPPostEncode";
|
||||
}
|
||||
|
||||
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
||||
Enc64 <outs, ins, asm, pattern> {
|
||||
|
||||
bits<8> VDST;
|
||||
bits<9> SRC0;
|
||||
bits<9> SRC1;
|
||||
bits<9> SRC2;
|
||||
bits<3> ABS;
|
||||
bits<1> CLAMP;
|
||||
bits<2> OMOD;
|
||||
bits<3> NEG;
|
||||
|
||||
let Inst{7-0} = VDST;
|
||||
let Inst{10-8} = ABS;
|
||||
let Inst{11} = CLAMP;
|
||||
let Inst{25-17} = op;
|
||||
let Inst{31-26} = 0x34; //encoding
|
||||
let Inst{40-32} = SRC0;
|
||||
let Inst{49-41} = SRC1;
|
||||
let Inst{58-50} = SRC2;
|
||||
let Inst{60-59} = OMOD;
|
||||
let Inst{63-61} = NEG;
|
||||
|
||||
let EncodingType = 14; // SIInstrEncodingType::VOP3
|
||||
let PostEncoderMethod = "VOPPostEncode";
|
||||
}
|
||||
|
||||
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
|
||||
Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
|
||||
|
||||
bits<9> SRC0;
|
||||
bits<8> VSRC1;
|
||||
|
||||
let Inst{8-0} = SRC0;
|
||||
let Inst{16-9} = VSRC1;
|
||||
let Inst{24-17} = op;
|
||||
let Inst{31-25} = 0x3e;
|
||||
|
||||
let EncodingType = 15; //SIInstrEncodingType::VOPC
|
||||
let PostEncoderMethod = "VOPPostEncode";
|
||||
let DisableEncoding = "$dst";
|
||||
}
|
||||
} // End Uses = [EXEC]
|
||||
|
||||
class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
|
||||
op,
|
||||
(outs VReg_128:$vdata),
|
||||
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
|
||||
i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
|
||||
GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
|
||||
asm,
|
||||
[]
|
||||
>;
|
||||
|
||||
class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
|
||||
op,
|
||||
(outs regClass:$dst),
|
||||
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
|
||||
i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
|
||||
i1imm:$tfe, SReg_32:$soffset),
|
||||
asm,
|
||||
[]> {
|
||||
let mayLoad = 1;
|
||||
}
|
||||
|
||||
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
||||
op,
|
||||
(outs regClass:$dst),
|
||||
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
|
||||
i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
|
||||
i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
|
||||
asm,
|
||||
[]> {
|
||||
let mayLoad = 1;
|
||||
}
|
||||
|
||||
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
||||
op,
|
||||
(outs),
|
||||
(ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
|
||||
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
|
||||
GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
|
||||
asm,
|
||||
[]> {
|
||||
let mayStore = 1;
|
||||
}
|
||||
|
||||
multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
|
||||
ValueType vt> {
|
||||
def _IMM : SMRD <
|
||||
op,
|
||||
(outs dstClass:$dst),
|
||||
(ins SMRDmemri:$src0),
|
||||
asm,
|
||||
[(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
|
||||
>;
|
||||
|
||||
def _SGPR : SMRD <
|
||||
op,
|
||||
(outs dstClass:$dst),
|
||||
(ins SMRDmemrr:$src0),
|
||||
asm,
|
||||
[(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
|
||||
>;
|
||||
}
|
||||
|
||||
multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
|
||||
defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
|
||||
defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
|
||||
}
|
||||
|
||||
include "SIInstrFormats.td"
|
||||
include "SIInstructions.td"
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,40 +0,0 @@
|
|||
//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SI Intrinsic Definitions
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
let TargetPrefix = "SI", isTarget = 1 in {
|
||||
|
||||
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
|
||||
/* XXX: We may need a seperate intrinsic here for loading integer values */
|
||||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
|
||||
def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
|
||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
|
||||
def int_SI_wqm : Intrinsic <[], [], []>;
|
||||
|
||||
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
|
||||
|
||||
/* Interpolation Intrinsics */
|
||||
|
||||
def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
|
||||
class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
|
||||
|
||||
def int_SI_fs_interp_linear_center : Interp;
|
||||
def int_SI_fs_interp_linear_centroid : Interp;
|
||||
def int_SI_fs_interp_persp_center : Interp;
|
||||
def int_SI_fs_interp_persp_centroid : Interp;
|
||||
def int_SI_fs_interp_constant : Interp;
|
||||
|
||||
def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
|
||||
def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF)
|
||||
// to predicated instructions.
|
||||
//
|
||||
// All flow control (except loops) is handled using predicated instructions and
|
||||
// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
|
||||
// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
|
||||
// by writting to the 64-bit EXEC register (each bit corresponds to a
|
||||
// single vector ALU). Typically, for predicates, a vector ALU will write
|
||||
// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
|
||||
// Vector ALU) and then the ScalarALU will AND the VCC register with the
|
||||
// EXEC to update the predicates.
|
||||
//
|
||||
// For example:
|
||||
// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
|
||||
// SI_IF_NZ %VCC
|
||||
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
|
||||
// ELSE
|
||||
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
|
||||
// ENDIF
|
||||
//
|
||||
// becomes:
|
||||
//
|
||||
// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask
|
||||
// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask
|
||||
// S_CBRANCH_EXECZ label0 // This instruction is an
|
||||
// // optimization which allows us to
|
||||
// // branch if all the bits of
|
||||
// // EXEC are zero.
|
||||
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
|
||||
//
|
||||
// label0:
|
||||
// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the
|
||||
// // Then block.
|
||||
// %EXEC = S_AND_B64 %SGPR0, %EXEC
|
||||
// S_BRANCH_EXECZ label1 // Use our branch optimization
|
||||
// // instruction again.
|
||||
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
|
||||
// label1:
|
||||
// S_MOV_B64 // Restore the old EXEC value
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "SIInstrInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SILowerFlowControlPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
const TargetInstrInfo *TII;
|
||||
std::vector<unsigned> PredicateStack;
|
||||
std::vector<unsigned> UnusedRegisters;
|
||||
|
||||
void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||
void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||
|
||||
public:
|
||||
SILowerFlowControlPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "SI Lower flow control instructions";
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SILowerFlowControlPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) {
|
||||
return new SILowerFlowControlPass(tm);
|
||||
}
|
||||
|
||||
bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
// Find all the unused registers that can be used for the predicate stack.
|
||||
for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(),
|
||||
I = AMDGPU::SReg_64RegClass.end();
|
||||
I != S; --I) {
|
||||
unsigned Reg = *I;
|
||||
if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
|
||||
UnusedRegisters.push_back(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
||||
I != MBB.end(); I = Next, Next = llvm::next(I)) {
|
||||
MachineInstr &MI = *I;
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::SI_IF_NZ:
|
||||
pushExecMask(MBB, I);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addOperand(MI.getOperand(0)) // VCC
|
||||
.addReg(AMDGPU::EXEC);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
case AMDGPU::ELSE:
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(PredicateStack.back())
|
||||
.addReg(AMDGPU::EXEC);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
case AMDGPU::ENDIF:
|
||||
popExecMask(MBB, I);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
|
||||
assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
|
||||
unsigned StackReg = UnusedRegisters.back();
|
||||
UnusedRegisters.pop_back();
|
||||
PredicateStack.push_back(StackReg);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
|
||||
StackReg)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
}
|
||||
|
||||
void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
unsigned StackReg = PredicateStack.back();
|
||||
PredicateStack.pop_back();
|
||||
UnusedRegisters.push_back(StackReg);
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
|
||||
AMDGPU::EXEC)
|
||||
.addReg(StackReg);
|
||||
}
|
||||
|
|
@ -1,105 +0,0 @@
|
|||
//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This pass performs the following transformation on instructions with
|
||||
// literal constants:
|
||||
//
|
||||
// %VGPR0 = V_MOV_IMM_I32 1
|
||||
//
|
||||
// becomes:
|
||||
//
|
||||
// BUNDLE
|
||||
// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
|
||||
// * SI_LOAD_LITERAL 1
|
||||
//
|
||||
// The resulting sequence matches exactly how the hardware handles immediate
|
||||
// operands, so this transformation greatly simplifies the code generator.
|
||||
//
|
||||
// Only the *_MOV_IMM_* support immediate operands at the moment, but when
|
||||
// support for immediate operands is added to other instructions, they
|
||||
// will be lowered here as well.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineInstrBundle.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class SILowerLiteralConstantsPass : public MachineFunctionPass {
|
||||
|
||||
private:
|
||||
static char ID;
|
||||
const TargetInstrInfo *TII;
|
||||
|
||||
public:
|
||||
SILowerLiteralConstantsPass(TargetMachine &tm) :
|
||||
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
const char *getPassName() const {
|
||||
return "SI Lower literal constants pass";
|
||||
}
|
||||
};
|
||||
|
||||
} // End anonymous namespace
|
||||
|
||||
char SILowerLiteralConstantsPass::ID = 0;
|
||||
|
||||
FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
|
||||
return new SILowerLiteralConstantsPass(tm);
|
||||
}
|
||||
|
||||
bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||
BB != BB_E; ++BB) {
|
||||
MachineBasicBlock &MBB = *BB;
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
||||
I != MBB.end(); I = Next, Next = llvm::next(I)) {
|
||||
MachineInstr &MI = *I;
|
||||
switch (MI.getOpcode()) {
|
||||
default: break;
|
||||
case AMDGPU::S_MOV_IMM_I32:
|
||||
case AMDGPU::S_MOV_IMM_I64:
|
||||
case AMDGPU::V_MOV_IMM_F32:
|
||||
case AMDGPU::V_MOV_IMM_I32: {
|
||||
unsigned MovOpcode;
|
||||
unsigned LoadLiteralOpcode;
|
||||
MachineOperand LiteralOp = MI.getOperand(1);
|
||||
if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
|
||||
MovOpcode = AMDGPU::V_MOV_B32_e32;
|
||||
} else {
|
||||
MovOpcode = AMDGPU::S_MOV_B32;
|
||||
}
|
||||
if (LiteralOp.isImm()) {
|
||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
|
||||
} else {
|
||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
|
||||
}
|
||||
MachineInstr *First =
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
|
||||
MI.getOperand(0).getReg())
|
||||
.addReg(AMDGPU::SI_LITERAL_CONSTANT);
|
||||
MachineInstr *Last =
|
||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
|
||||
.addOperand(MI.getOperand(1));
|
||||
Last->setIsInsideBundle();
|
||||
llvm::finalizeBundle(MBB, First, Last);
|
||||
MI.eraseFromParent();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
||||
: MachineFunctionInfo(),
|
||||
SPIPSInputAddr(0),
|
||||
ShaderType(0)
|
||||
{ }
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
|
||||
// register, which is to tell the hardware which interpolation parameters to
|
||||
// load.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef _SIMACHINEFUNCTIONINFO_H_
|
||||
#define _SIMACHINEFUNCTIONINFO_H_
|
||||
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class SIMachineFunctionInfo : public MachineFunctionInfo {
|
||||
|
||||
private:
|
||||
|
||||
public:
|
||||
SIMachineFunctionInfo(const MachineFunction &MF);
|
||||
unsigned SPIPSInputAddr;
|
||||
unsigned ShaderType;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
|
||||
#endif //_SIMACHINEFUNCTIONINFO_H_
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the SI implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "AMDGPUTargetMachine.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
|
||||
const TargetInstrInfo &tii)
|
||||
: AMDGPURegisterInfo(tm, tii),
|
||||
TM(tm),
|
||||
TII(tii)
|
||||
{ }
|
||||
|
||||
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
||||
{
|
||||
BitVector Reserved(getNumRegs());
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const
|
||||
{
|
||||
switch (reg) {
|
||||
case AMDGPU::M0: return 124;
|
||||
case AMDGPU::SREG_LIT_0: return 128;
|
||||
default: return getHWRegNum(reg);
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass *
|
||||
SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
|
||||
{
|
||||
switch (rc->getID()) {
|
||||
case AMDGPU::GPRF32RegClassID:
|
||||
return &AMDGPU::VReg_32RegClass;
|
||||
default: return rc;
|
||||
}
|
||||
}
|
||||
|
||||
const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
|
||||
MVT VT) const
|
||||
{
|
||||
switch(VT.SimpleTy) {
|
||||
default:
|
||||
case MVT::i32: return &AMDGPU::VReg_32RegClass;
|
||||
}
|
||||
}
|
||||
#include "SIRegisterGetHWRegNum.inc"
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Interface definition for SIRegisterInfo
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef SIREGISTERINFO_H_
|
||||
#define SIREGISTERINFO_H_
|
||||
|
||||
#include "AMDGPURegisterInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class AMDGPUTargetMachine;
|
||||
class TargetInstrInfo;
|
||||
|
||||
struct SIRegisterInfo : public AMDGPURegisterInfo
|
||||
{
|
||||
AMDGPUTargetMachine &TM;
|
||||
const TargetInstrInfo &TII;
|
||||
|
||||
SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||
|
||||
/// getBinaryCode - Returns the hardware encoding for a register
|
||||
virtual unsigned getBinaryCode(unsigned reg) const;
|
||||
|
||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
||||
/// SI register class that is equivalent to the given AMDIL register class.
|
||||
virtual const TargetRegisterClass *
|
||||
getISARegClass(const TargetRegisterClass * rc) const;
|
||||
|
||||
/// getHWRegNum - Generated function that returns the hardware encoding for
|
||||
/// a register
|
||||
unsigned getHWRegNum(unsigned reg) const;
|
||||
|
||||
/// getCFGStructurizerRegClass - get the register class of the specified
|
||||
/// type to use in the CFGStructurizer
|
||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
||||
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
||||
#endif // SIREGISTERINFO_H_
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// TODO: This is just a place holder for now.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
def SI_Itin : ProcessorItineraries <[], [], []>;
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
//===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// TODO: Add full description
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/// The target for the AMDGPU backend
|
||||
Target llvm::TheAMDGPUTarget;
|
||||
|
||||
/// Extern function to initialize the targets for the AMDGPU backend
|
||||
extern "C" void LLVMInitializeAMDGPUTargetInfo() {
|
||||
RegisterTarget<Triple::r600, false>
|
||||
R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
|
||||
}
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
|
||||
#include "radeon_llvm_emit.h"
|
||||
|
||||
#include <llvm/Support/CommandLine.h>
|
||||
#include <llvm/Support/IRReader.h>
|
||||
#include <llvm/Support/SourceMgr.h>
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<std::string>
|
||||
InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
|
||||
|
||||
static cl::opt<std::string>
|
||||
TargetGPUName("gpu", cl::desc("target gpu name"), cl::value_desc("gpu_name"));
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
unsigned char * bytes;
|
||||
unsigned byte_count;
|
||||
|
||||
std::auto_ptr<Module> M;
|
||||
LLVMContext &Context = getGlobalContext();
|
||||
SMDiagnostic Err;
|
||||
cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
|
||||
M.reset(ParseIRFile(InputFilename, Err, Context));
|
||||
|
||||
Module * mod = M.get();
|
||||
|
||||
radeon_llvm_compile(wrap(mod), &bytes, &byte_count, TargetGPUName.c_str(), 1);
|
||||
}
|
||||
|
|
@ -39,12 +39,7 @@
|
|||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/Transforms/Scalar.h>
|
||||
#include <llvm-c/Target.h>
|
||||
|
||||
#if HAVE_LLVM < 0x0302
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#else
|
||||
#include <llvm/DataLayout.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
|
|
@ -52,16 +47,6 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
#ifndef EXTERNAL_LLVM
|
||||
extern "C" {
|
||||
|
||||
void LLVMInitializeAMDGPUAsmPrinter(void);
|
||||
void LLVMInitializeAMDGPUTargetMC(void);
|
||||
void LLVMInitializeAMDGPUTarget(void);
|
||||
void LLVMInitializeAMDGPUTargetInfo(void);
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
class LLVMEnsureMultithreaded {
|
||||
|
|
@ -89,17 +74,10 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
|
|||
|
||||
Triple AMDGPUTriple(sys::getDefaultTargetTriple());
|
||||
|
||||
#if HAVE_LLVM == 0x0302
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
#else
|
||||
LLVMInitializeR600TargetInfo();
|
||||
LLVMInitializeR600Target();
|
||||
LLVMInitializeR600TargetMC();
|
||||
LLVMInitializeR600AsmPrinter();
|
||||
#endif
|
||||
|
||||
std::string err;
|
||||
const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err);
|
||||
|
|
@ -130,11 +108,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
|
|||
));
|
||||
TargetMachine &AMDGPUTargetMachine = *tm.get();
|
||||
PassManager PM;
|
||||
#if HAVE_LLVM < 0x0302
|
||||
PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData()));
|
||||
#else
|
||||
PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout()));
|
||||
#endif
|
||||
PM.add(createPromoteMemoryToRegisterPass());
|
||||
AMDGPUTargetMachine.setAsmVerbosityDefault(true);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue