mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-09 04:38:03 +02:00
radeon/llvm: Remove backend code from Mesa
This code now lives in an external tree. For the next Mesa release fetch the code from the master branch of this LLVM repo: http://cgit.freedesktop.org/~tstellar/llvm/ For all subsequent Mesa releases, fetch the code from the official LLVM project: www.llvm.org
This commit is contained in:
parent
05c143cc04
commit
aed37cbee8
99 changed files with 0 additions and 19168 deletions
18
src/gallium/drivers/radeon/.gitignore
vendored
18
src/gallium/drivers/radeon/.gitignore
vendored
|
|
@ -1,18 +0,0 @@
|
||||||
AMDGPUInstrEnums.h.include
|
|
||||||
AMDGPUInstrEnums.include
|
|
||||||
AMDGPUInstrEnums.td
|
|
||||||
AMDILGenAsmWriter.inc
|
|
||||||
AMDILGenCallingConv.inc
|
|
||||||
AMDILGenCodeEmitter.inc
|
|
||||||
AMDILGenDAGISel.inc
|
|
||||||
AMDILGenEDInfo.inc
|
|
||||||
AMDILGenInstrInfo.inc
|
|
||||||
AMDILGenIntrinsics.inc
|
|
||||||
AMDILGenRegisterInfo.inc
|
|
||||||
AMDILGenSubtargetInfo.inc
|
|
||||||
R600HwRegInfo.include
|
|
||||||
R600Intrinsics.td
|
|
||||||
R600RegisterInfo.td
|
|
||||||
SIRegisterGetHWRegNum.inc
|
|
||||||
SIRegisterInfo.td
|
|
||||||
loader
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPU_H
|
|
||||||
#define AMDGPU_H
|
|
||||||
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
|
||||||
#include "llvm/Target/TargetMachine.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class FunctionPass;
|
|
||||||
class AMDGPUTargetMachine;
|
|
||||||
|
|
||||||
// R600 Passes
|
|
||||||
FunctionPass* createR600KernelParametersPass(const TargetData* TD);
|
|
||||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
|
||||||
|
|
||||||
// SI Passes
|
|
||||||
FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
|
|
||||||
FunctionPass *createSILowerFlowControlPass(TargetMachine &tm);
|
|
||||||
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
|
|
||||||
FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
|
|
||||||
|
|
||||||
// Passes common to R600 and SI
|
|
||||||
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
namespace ShaderType {
|
|
||||||
enum Type {
|
|
||||||
PIXEL = 0,
|
|
||||||
VERTEX = 1,
|
|
||||||
GEOMETRY = 2,
|
|
||||||
COMPUTE = 3
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // AMDGPU_H
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
||||||
//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// Include AMDIL TD files
|
|
||||||
include "AMDILBase.td"
|
|
||||||
|
|
||||||
|
|
||||||
def AMDGPUInstrInfo : InstrInfo {}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Declare the target which we are implementing
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
def AMDGPUAsmWriter : AsmWriter {
|
|
||||||
string AsmWriterClassName = "InstPrinter";
|
|
||||||
int Variant = 0;
|
|
||||||
bit isMCAsmWriter = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
def AMDGPU : Target {
|
|
||||||
// Pull in Instruction Info:
|
|
||||||
let InstructionSet = AMDGPUInstrInfo;
|
|
||||||
let AssemblyWriters = [AMDGPUAsmWriter];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Include AMDGPU TD files
|
|
||||||
include "R600Schedule.td"
|
|
||||||
include "SISchedule.td"
|
|
||||||
include "Processors.td"
|
|
||||||
include "AMDGPUInstrInfo.td"
|
|
||||||
include "AMDGPUIntrinsics.td"
|
|
||||||
include "AMDGPURegisterInfo.td"
|
|
||||||
include "AMDGPUInstructions.td"
|
|
||||||
|
|
@ -1,134 +0,0 @@
|
||||||
//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// The AMDGPUAsmPrinter is used to print both assembly string and also binary
|
|
||||||
// code. When passed an MCAsmStreamer it prints assembly and when passed
|
|
||||||
// an MCObjectStreamer it outputs binary code.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
|
|
||||||
|
|
||||||
#include "AMDGPUAsmPrinter.h"
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "SIMachineFunctionInfo.h"
|
|
||||||
#include "SIRegisterInfo.h"
|
|
||||||
#include "llvm/MC/MCStreamer.h"
|
|
||||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
|
|
||||||
static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
|
|
||||||
MCStreamer &Streamer) {
|
|
||||||
return new AMDGPUAsmPrinter(tm, Streamer);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
|
|
||||||
TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// runOnMachineFunction - We need to override this function so we can avoid
|
|
||||||
/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
|
|
||||||
bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
|
||||||
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
|
|
||||||
if (STM.dumpCode()) {
|
|
||||||
MF.dump();
|
|
||||||
}
|
|
||||||
SetupMachineFunction(MF);
|
|
||||||
if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
|
||||||
EmitProgramInfo(MF);
|
|
||||||
}
|
|
||||||
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
|
|
||||||
EmitFunctionBody();
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
|
|
||||||
unsigned MaxSGPR = 0;
|
|
||||||
unsigned MaxVGPR = 0;
|
|
||||||
bool VCCUsed = false;
|
|
||||||
const SIRegisterInfo * RI =
|
|
||||||
static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
|
|
||||||
|
|
||||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
|
||||||
BB != BB_E; ++BB) {
|
|
||||||
MachineBasicBlock &MBB = *BB;
|
|
||||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
|
||||||
I != E; ++I) {
|
|
||||||
MachineInstr &MI = *I;
|
|
||||||
|
|
||||||
unsigned numOperands = MI.getNumOperands();
|
|
||||||
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
|
|
||||||
MachineOperand & MO = MI.getOperand(op_idx);
|
|
||||||
unsigned maxUsed;
|
|
||||||
unsigned width = 0;
|
|
||||||
bool isSGPR = false;
|
|
||||||
unsigned reg;
|
|
||||||
unsigned hwReg;
|
|
||||||
if (!MO.isReg()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
reg = MO.getReg();
|
|
||||||
if (reg == AMDGPU::VCC) {
|
|
||||||
VCCUsed = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
switch (reg) {
|
|
||||||
default: break;
|
|
||||||
case AMDGPU::EXEC:
|
|
||||||
case AMDGPU::SI_LITERAL_CONSTANT:
|
|
||||||
case AMDGPU::SREG_LIT_0:
|
|
||||||
case AMDGPU::M0:
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (AMDGPU::SReg_32RegClass.contains(reg)) {
|
|
||||||
isSGPR = true;
|
|
||||||
width = 1;
|
|
||||||
} else if (AMDGPU::VReg_32RegClass.contains(reg)) {
|
|
||||||
isSGPR = false;
|
|
||||||
width = 1;
|
|
||||||
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
|
|
||||||
isSGPR = true;
|
|
||||||
width = 2;
|
|
||||||
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
|
|
||||||
isSGPR = false;
|
|
||||||
width = 2;
|
|
||||||
} else if (AMDGPU::SReg_128RegClass.contains(reg)) {
|
|
||||||
isSGPR = true;
|
|
||||||
width = 4;
|
|
||||||
} else if (AMDGPU::VReg_128RegClass.contains(reg)) {
|
|
||||||
isSGPR = false;
|
|
||||||
width = 4;
|
|
||||||
} else if (AMDGPU::SReg_256RegClass.contains(reg)) {
|
|
||||||
isSGPR = true;
|
|
||||||
width = 8;
|
|
||||||
} else {
|
|
||||||
assert(!"Unknown register class");
|
|
||||||
}
|
|
||||||
hwReg = RI->getHWRegNum(reg);
|
|
||||||
maxUsed = hwReg + width - 1;
|
|
||||||
if (isSGPR) {
|
|
||||||
MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
|
|
||||||
} else {
|
|
||||||
MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (VCCUsed) {
|
|
||||||
MaxSGPR += 2;
|
|
||||||
}
|
|
||||||
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
||||||
OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
|
|
||||||
OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
|
|
||||||
OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
|
|
||||||
}
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// AMDGPU Assembly printer class.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPU_ASMPRINTER_H
|
|
||||||
#define AMDGPU_ASMPRINTER_H
|
|
||||||
|
|
||||||
#include "llvm/CodeGen/AsmPrinter.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUAsmPrinter : public AsmPrinter {
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
|
||||||
: AsmPrinter(TM, Streamer) { }
|
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
||||||
|
|
||||||
virtual const char *getPassName() const {
|
|
||||||
return "AMDGPU Assembly Printer";
|
|
||||||
}
|
|
||||||
|
|
||||||
/// EmitProgramInfo - Emit register usage information so that the GPU driver
|
|
||||||
/// can correctly setup the GPU state.
|
|
||||||
void EmitProgramInfo(MachineFunction &MF);
|
|
||||||
|
|
||||||
/// EmitInstuction - Implemented in AMDGPUMCInstLower.cpp
|
|
||||||
virtual void EmitInstruction(const MachineInstr *MI);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous llvm
|
|
||||||
|
|
||||||
#endif //AMDGPU_ASMPRINTER_H
|
|
||||||
|
|
@ -1,48 +0,0 @@
|
||||||
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// CodeEmitter interface for R600 and SI codegen.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPUCODEEMITTER_H
|
|
||||||
#define AMDGPUCODEEMITTER_H
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUCodeEmitter {
|
|
||||||
public:
|
|
||||||
uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
|
|
||||||
virtual uint64_t getMachineOpValue(const MachineInstr &MI,
|
|
||||||
const MachineOperand &MO) const { return 0; }
|
|
||||||
virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
|
|
||||||
unsigned OpNo) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
|
|
||||||
unsigned OpNo) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
virtual uint64_t VOPPostEncode(const MachineInstr &MI,
|
|
||||||
uint64_t Value) const {
|
|
||||||
return Value;
|
|
||||||
}
|
|
||||||
virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
|
|
||||||
unsigned OpNo) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
|
|
||||||
const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDGPUCODEEMITTER_H
|
|
||||||
|
|
@ -1,62 +0,0 @@
|
||||||
//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This pass lowers AMDIL machine instructions to the appropriate hardware
|
|
||||||
// instructions.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "AMDGPUInstrInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class AMDGPUConvertToISAPass : public MachineFunctionPass {
|
|
||||||
|
|
||||||
private:
|
|
||||||
static char ID;
|
|
||||||
TargetMachine &TM;
|
|
||||||
|
|
||||||
public:
|
|
||||||
AMDGPUConvertToISAPass(TargetMachine &tm) :
|
|
||||||
MachineFunctionPass(ID), TM(tm) { }
|
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
||||||
|
|
||||||
virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
char AMDGPUConvertToISAPass::ID = 0;
|
|
||||||
|
|
||||||
FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
|
|
||||||
return new AMDGPUConvertToISAPass(tm);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF)
|
|
||||||
{
|
|
||||||
const AMDGPUInstrInfo * TII =
|
|
||||||
static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
|
|
||||||
|
|
||||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
|
||||||
BB != BB_E; ++BB) {
|
|
||||||
MachineBasicBlock &MBB = *BB;
|
|
||||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
|
||||||
I != E; ++I) {
|
|
||||||
MachineInstr &MI = *I;
|
|
||||||
TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
@ -1,351 +0,0 @@
|
||||||
//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This is the parent TargetLowering class for hardware code gen targets.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPUISelLowering.h"
|
|
||||||
#include "AMDILIntrinsicInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
|
||||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
|
|
||||||
TargetLowering(TM, new TargetLoweringObjectFileELF())
|
|
||||||
{
|
|
||||||
|
|
||||||
// Initialize target lowering borrowed from AMDIL
|
|
||||||
InitAMDILLowering();
|
|
||||||
|
|
||||||
// We need to custom lower some of the intrinsics
|
|
||||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
|
||||||
|
|
||||||
// Library functions. These default to Expand, but we have instructions
|
|
||||||
// for them.
|
|
||||||
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
|
|
||||||
setOperationAction(ISD::FEXP2, MVT::f32, Legal);
|
|
||||||
setOperationAction(ISD::FPOW, MVT::f32, Legal);
|
|
||||||
setOperationAction(ISD::FLOG2, MVT::f32, Legal);
|
|
||||||
setOperationAction(ISD::FABS, MVT::f32, Legal);
|
|
||||||
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
|
|
||||||
setOperationAction(ISD::FRINT, MVT::f32, Legal);
|
|
||||||
|
|
||||||
setOperationAction(ISD::UDIV, MVT::i32, Expand);
|
|
||||||
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
|
|
||||||
setOperationAction(ISD::UREM, MVT::i32, Expand);
|
|
||||||
}
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// TargetLowering Callbacks
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::LowerFormalArguments(
|
|
||||||
SDValue Chain,
|
|
||||||
CallingConv::ID CallConv,
|
|
||||||
bool isVarArg,
|
|
||||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
||||||
DebugLoc DL, SelectionDAG &DAG,
|
|
||||||
SmallVectorImpl<SDValue> &InVals) const
|
|
||||||
{
|
|
||||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
|
||||||
InVals.push_back(SDValue());
|
|
||||||
}
|
|
||||||
return Chain;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::LowerReturn(
|
|
||||||
SDValue Chain,
|
|
||||||
CallingConv::ID CallConv,
|
|
||||||
bool isVarArg,
|
|
||||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
||||||
const SmallVectorImpl<SDValue> &OutVals,
|
|
||||||
DebugLoc DL, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
|
|
||||||
}
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// Target specific lowering
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
|
|
||||||
const
|
|
||||||
{
|
|
||||||
switch (Op.getOpcode()) {
|
|
||||||
default:
|
|
||||||
Op.getNode()->dump();
|
|
||||||
assert(0 && "Custom lowering code for this"
|
|
||||||
"instruction is not implemented yet!");
|
|
||||||
break;
|
|
||||||
// AMDIL DAG lowering
|
|
||||||
case ISD::SDIV: return LowerSDIV(Op, DAG);
|
|
||||||
case ISD::SREM: return LowerSREM(Op, DAG);
|
|
||||||
case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
|
|
||||||
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
|
||||||
// AMDGPU DAG lowering
|
|
||||||
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
|
|
||||||
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
|
|
||||||
}
|
|
||||||
return Op;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|
||||||
SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
|
|
||||||
switch (IntrinsicID) {
|
|
||||||
default: return Op;
|
|
||||||
case AMDGPUIntrinsic::AMDIL_abs:
|
|
||||||
return LowerIntrinsicIABS(Op, DAG);
|
|
||||||
case AMDGPUIntrinsic::AMDIL_exp:
|
|
||||||
return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_lrp:
|
|
||||||
return LowerIntrinsicLRP(Op, DAG);
|
|
||||||
case AMDGPUIntrinsic::AMDIL_fraction:
|
|
||||||
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
|
|
||||||
case AMDGPUIntrinsic::AMDIL_mad:
|
|
||||||
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2), Op.getOperand(3));
|
|
||||||
case AMDGPUIntrinsic::AMDIL_max:
|
|
||||||
return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2));
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_imax:
|
|
||||||
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2));
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_umax:
|
|
||||||
return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2));
|
|
||||||
case AMDGPUIntrinsic::AMDIL_min:
|
|
||||||
return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2));
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_imin:
|
|
||||||
return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2));
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_umin:
|
|
||||||
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2));
|
|
||||||
case AMDGPUIntrinsic::AMDIL_round_nearest:
|
|
||||||
return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
///IABS(a) = SMAX(sub(0, a), a)
|
|
||||||
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
|
|
||||||
SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
|
||||||
Op.getOperand(1));
|
|
||||||
|
|
||||||
return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Linear Interpolation
|
|
||||||
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
|
|
||||||
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
|
|
||||||
SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
|
|
||||||
DAG.getConstantFP(1.0f, MVT::f32),
|
|
||||||
Op.getOperand(1));
|
|
||||||
SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
|
|
||||||
Op.getOperand(3));
|
|
||||||
return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
|
|
||||||
Op.getOperand(2),
|
|
||||||
OneSubAC);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
|
|
||||||
SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
|
|
||||||
SDValue Num = Op.getOperand(0);
|
|
||||||
SDValue Den = Op.getOperand(1);
|
|
||||||
|
|
||||||
SmallVector<SDValue, 8> Results;
|
|
||||||
|
|
||||||
// RCP = URECIP(Den) = 2^32 / Den + e
|
|
||||||
// e is rounding error.
|
|
||||||
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
|
|
||||||
|
|
||||||
// RCP_LO = umulo(RCP, Den) */
|
|
||||||
SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
|
|
||||||
|
|
||||||
// RCP_HI = mulhu (RCP, Den) */
|
|
||||||
SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
|
|
||||||
|
|
||||||
// NEG_RCP_LO = -RCP_LO
|
|
||||||
SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
|
|
||||||
RCP_LO);
|
|
||||||
|
|
||||||
// ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
|
|
||||||
SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
|
||||||
NEG_RCP_LO, RCP_LO,
|
|
||||||
ISD::SETEQ);
|
|
||||||
// Calculate the rounding error from the URECIP instruction
|
|
||||||
// E = mulhu(ABS_RCP_LO, RCP)
|
|
||||||
SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
|
|
||||||
|
|
||||||
// RCP_A_E = RCP + E
|
|
||||||
SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
|
|
||||||
|
|
||||||
// RCP_S_E = RCP - E
|
|
||||||
SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
|
|
||||||
|
|
||||||
// Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
|
|
||||||
SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
|
|
||||||
RCP_A_E, RCP_S_E,
|
|
||||||
ISD::SETEQ);
|
|
||||||
// Quotient = mulhu(Tmp0, Num)
|
|
||||||
SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
|
|
||||||
|
|
||||||
// Num_S_Remainder = Quotient * Den
|
|
||||||
SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
|
|
||||||
|
|
||||||
// Remainder = Num - Num_S_Remainder
|
|
||||||
SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
|
|
||||||
|
|
||||||
// Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
|
|
||||||
SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
|
|
||||||
DAG.getConstant(-1, VT),
|
|
||||||
DAG.getConstant(0, VT),
|
|
||||||
ISD::SETGE);
|
|
||||||
// Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
|
|
||||||
SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
|
|
||||||
DAG.getConstant(0, VT),
|
|
||||||
DAG.getConstant(-1, VT),
|
|
||||||
DAG.getConstant(0, VT),
|
|
||||||
ISD::SETGE);
|
|
||||||
// Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
|
|
||||||
SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
|
|
||||||
Remainder_GE_Zero);
|
|
||||||
|
|
||||||
// Calculate Division result:
|
|
||||||
|
|
||||||
// Quotient_A_One = Quotient + 1
|
|
||||||
SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
|
|
||||||
DAG.getConstant(1, VT));
|
|
||||||
|
|
||||||
// Quotient_S_One = Quotient - 1
|
|
||||||
SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
|
|
||||||
DAG.getConstant(1, VT));
|
|
||||||
|
|
||||||
// Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
|
|
||||||
SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
|
||||||
Quotient, Quotient_A_One, ISD::SETEQ);
|
|
||||||
|
|
||||||
// Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
|
|
||||||
Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
|
||||||
Quotient_S_One, Div, ISD::SETEQ);
|
|
||||||
|
|
||||||
// Calculate Rem result:
|
|
||||||
|
|
||||||
// Remainder_S_Den = Remainder - Den
|
|
||||||
SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
|
|
||||||
|
|
||||||
// Remainder_A_Den = Remainder + Den
|
|
||||||
SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
|
|
||||||
|
|
||||||
// Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
|
|
||||||
SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
|
|
||||||
Remainder, Remainder_S_Den, ISD::SETEQ);
|
|
||||||
|
|
||||||
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
|
|
||||||
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
|
|
||||||
Remainder_A_Den, Rem, ISD::SETEQ);
|
|
||||||
|
|
||||||
DAG.ReplaceAllUsesWith(Op.getValue(0).getNode(), &Div);
|
|
||||||
DAG.ReplaceAllUsesWith(Op.getValue(1).getNode(), &Rem);
|
|
||||||
|
|
||||||
return Op;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Helper functions
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const
|
|
||||||
{
|
|
||||||
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
|
||||||
return CFP->isExactlyValue(1.0);
|
|
||||||
}
|
|
||||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
|
||||||
return C->isAllOnesValue();
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const
|
|
||||||
{
|
|
||||||
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
|
||||||
return CFP->getValueAPF().isZero();
|
|
||||||
}
|
|
||||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
|
|
||||||
return C->isNullValue();
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
|
|
||||||
const TargetRegisterClass *RC,
|
|
||||||
unsigned Reg, EVT VT) const {
|
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
||||||
unsigned VirtualRegister;
|
|
||||||
if (!MRI.isLiveIn(Reg)) {
|
|
||||||
VirtualRegister = MRI.createVirtualRegister(RC);
|
|
||||||
MRI.addLiveIn(Reg, VirtualRegister);
|
|
||||||
} else {
|
|
||||||
VirtualRegister = MRI.getLiveInVirtReg(Reg);
|
|
||||||
}
|
|
||||||
return DAG.getRegister(VirtualRegister, VT);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
|
|
||||||
|
|
||||||
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const
|
|
||||||
{
|
|
||||||
switch (Opcode) {
|
|
||||||
default: return 0;
|
|
||||||
// AMDIL DAG nodes
|
|
||||||
NODE_NAME_CASE(MAD);
|
|
||||||
NODE_NAME_CASE(CALL);
|
|
||||||
NODE_NAME_CASE(UMUL);
|
|
||||||
NODE_NAME_CASE(DIV_INF);
|
|
||||||
NODE_NAME_CASE(RET_FLAG);
|
|
||||||
NODE_NAME_CASE(BRANCH_COND);
|
|
||||||
|
|
||||||
// AMDGPU DAG nodes
|
|
||||||
NODE_NAME_CASE(FRACT)
|
|
||||||
NODE_NAME_CASE(FMAX)
|
|
||||||
NODE_NAME_CASE(SMAX)
|
|
||||||
NODE_NAME_CASE(UMAX)
|
|
||||||
NODE_NAME_CASE(FMIN)
|
|
||||||
NODE_NAME_CASE(SMIN)
|
|
||||||
NODE_NAME_CASE(UMIN)
|
|
||||||
NODE_NAME_CASE(URECIP)
|
|
||||||
NODE_NAME_CASE(INTERP)
|
|
||||||
NODE_NAME_CASE(INTERP_P0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,142 +0,0 @@
|
||||||
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains the interface defintiion of the TargetLowering class
|
|
||||||
// that is common to all AMD GPUs.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPUISELLOWERING_H
|
|
||||||
#define AMDGPUISELLOWERING_H
|
|
||||||
|
|
||||||
#include "llvm/Target/TargetLowering.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class MachineRegisterInfo;
|
|
||||||
|
|
||||||
class AMDGPUTargetLowering : public TargetLowering
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
|
|
||||||
/// CreateLiveInRegister - Helper function that adds Reg to the LiveIn list
|
|
||||||
/// of the DAG's MachineFunction. This returns a Register SDNode representing
|
|
||||||
/// Reg.
|
|
||||||
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
|
|
||||||
unsigned Reg, EVT VT) const;
|
|
||||||
|
|
||||||
bool isHWTrueValue(SDValue Op) const;
|
|
||||||
bool isHWFalseValue(SDValue Op) const;
|
|
||||||
|
|
||||||
public:
|
|
||||||
AMDGPUTargetLowering(TargetMachine &TM);
|
|
||||||
|
|
||||||
virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
|
||||||
bool isVarArg,
|
|
||||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
||||||
DebugLoc DL, SelectionDAG &DAG,
|
|
||||||
SmallVectorImpl<SDValue> &InVals) const;
|
|
||||||
|
|
||||||
virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
|
|
||||||
bool isVarArg,
|
|
||||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
|
||||||
const SmallVectorImpl<SDValue> &OutVals,
|
|
||||||
DebugLoc DL, SelectionDAG &DAG) const;
|
|
||||||
|
|
||||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
virtual const char* getTargetNodeName(unsigned Opcode) const;
|
|
||||||
|
|
||||||
// Functions defined in AMDILISelLowering.cpp
|
|
||||||
public:
|
|
||||||
|
|
||||||
/// computeMaskedBitsForTargetNode - Determine which of the bits specified
|
|
||||||
/// in Mask are known to be either zero or one and return them in the
|
|
||||||
/// KnownZero/KnownOne bitsets.
|
|
||||||
virtual void computeMaskedBitsForTargetNode(const SDValue Op,
|
|
||||||
APInt &KnownZero,
|
|
||||||
APInt &KnownOne,
|
|
||||||
const SelectionDAG &DAG,
|
|
||||||
unsigned Depth = 0) const;
|
|
||||||
|
|
||||||
virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|
||||||
const CallInst &I, unsigned Intrinsic) const;
|
|
||||||
|
|
||||||
/// isFPImmLegal - We want to mark f32/f64 floating point values as legal.
|
|
||||||
bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
|
|
||||||
|
|
||||||
/// ShouldShrinkFPConstant - We don't want to shrink f64/f32 constants.
|
|
||||||
bool ShouldShrinkFPConstant(EVT VT) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void InitAMDILLowering();
|
|
||||||
SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
|
|
||||||
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
namespace AMDGPUISD
|
|
||||||
{
|
|
||||||
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
// AMDIL ISD Opcodes
|
|
||||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
|
||||||
MAD, // 32bit Fused Multiply Add instruction
|
|
||||||
CALL, // Function call based on a single integer
|
|
||||||
UMUL, // 32bit unsigned multiplication
|
|
||||||
DIV_INF, // Divide with infinity returned on zero divisor
|
|
||||||
RET_FLAG,
|
|
||||||
BRANCH_COND,
|
|
||||||
// End AMDIL ISD Opcodes
|
|
||||||
BITALIGN,
|
|
||||||
FRACT,
|
|
||||||
FMAX,
|
|
||||||
SMAX,
|
|
||||||
UMAX,
|
|
||||||
FMIN,
|
|
||||||
SMIN,
|
|
||||||
UMIN,
|
|
||||||
URECIP,
|
|
||||||
INTERP,
|
|
||||||
INTERP_P0,
|
|
||||||
LAST_AMDGPU_ISD_NUMBER
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
} // End namespace AMDGPUISD
|
|
||||||
|
|
||||||
namespace SIISD {
|
|
||||||
|
|
||||||
enum {
|
|
||||||
SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
|
|
||||||
VCC_AND,
|
|
||||||
VCC_BITCAST
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace SIISD
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDGPUISELLOWERING_H
|
|
||||||
|
|
@ -1,258 +0,0 @@
|
||||||
//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains the implementation of the TargetInstrInfo class that is
|
|
||||||
// common to all AMD GPUs.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPUInstrInfo.h"
|
|
||||||
#include "AMDGPURegisterInfo.h"
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
#include "AMDIL.h"
|
|
||||||
#include "AMDILUtilityFunctions.h"
|
|
||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
|
|
||||||
#define GET_INSTRINFO_CTOR
|
|
||||||
#include "AMDGPUGenInstrInfo.inc"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
|
|
||||||
: AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
|
|
||||||
|
|
||||||
const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
|
|
||||||
return RI;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
|
|
||||||
unsigned &SrcReg, unsigned &DstReg,
|
|
||||||
unsigned &SubIdx) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
|
|
||||||
int &FrameIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
|
||||||
int &FrameIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
|
|
||||||
const MachineMemOperand *&MMO,
|
|
||||||
int &FrameIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
|
|
||||||
int &FrameIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
|
||||||
int &FrameIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
|
|
||||||
const MachineMemOperand *&MMO,
|
|
||||||
int &FrameIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineInstr *
|
|
||||||
AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
|
||||||
MachineBasicBlock::iterator &MBBI,
|
|
||||||
LiveVariables *LV) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
|
||||||
MachineBasicBlock &MBB) const {
|
|
||||||
while (iter != MBB.end()) {
|
|
||||||
switch (iter->getOpcode()) {
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
|
|
||||||
case AMDGPU::BRANCH:
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
++iter;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
|
|
||||||
MachineBasicBlock::iterator tmp = MBB->end();
|
|
||||||
if (!MBB->size()) {
|
|
||||||
return MBB->end();
|
|
||||||
}
|
|
||||||
while (--tmp) {
|
|
||||||
if (tmp->getOpcode() == AMDGPU::ENDLOOP
|
|
||||||
|| tmp->getOpcode() == AMDGPU::ENDIF
|
|
||||||
|| tmp->getOpcode() == AMDGPU::ELSE) {
|
|
||||||
if (tmp == MBB->begin()) {
|
|
||||||
return tmp;
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return ++tmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return MBB->end();
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI,
|
|
||||||
unsigned SrcReg, bool isKill,
|
|
||||||
int FrameIndex,
|
|
||||||
const TargetRegisterClass *RC,
|
|
||||||
const TargetRegisterInfo *TRI) const {
|
|
||||||
assert(!"Not Implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI,
|
|
||||||
unsigned DestReg, int FrameIndex,
|
|
||||||
const TargetRegisterClass *RC,
|
|
||||||
const TargetRegisterInfo *TRI) const {
|
|
||||||
assert(!"Not Implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineInstr *
|
|
||||||
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|
||||||
MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<unsigned> &Ops,
|
|
||||||
int FrameIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
MachineInstr*
|
|
||||||
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
|
|
||||||
MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<unsigned> &Ops,
|
|
||||||
MachineInstr *LoadMI) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
bool
|
|
||||||
AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<unsigned> &Ops) const
|
|
||||||
{
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool
|
|
||||||
AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
|
||||||
unsigned Reg, bool UnfoldLoad,
|
|
||||||
bool UnfoldStore,
|
|
||||||
SmallVectorImpl<MachineInstr*> &NewMIs) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
|
||||||
SmallVectorImpl<SDNode*> &NewNodes) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned
|
|
||||||
AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
|
|
||||||
bool UnfoldLoad, bool UnfoldStore,
|
|
||||||
unsigned *LoadRegIndex) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
|
||||||
int64_t Offset1, int64_t Offset2,
|
|
||||||
unsigned NumLoads) const {
|
|
||||||
assert(Offset2 > Offset1
|
|
||||||
&& "Second offset should be larger than first offset!");
|
|
||||||
// If we have less than 16 loads in a row, and the offsets are within 16,
|
|
||||||
// then schedule together.
|
|
||||||
// TODO: Make the loads schedule near if it fits in a cacheline
|
|
||||||
return (NumLoads < 16 && (Offset2 - Offset1) < 16);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
|
|
||||||
const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool
|
|
||||||
AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
|
||||||
const SmallVectorImpl<MachineOperand> &Pred2)
|
|
||||||
const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
|
|
||||||
std::vector<MachineOperand> &Pred) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return MI->getDesc().isPredicable();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
|
|
||||||
// TODO: Implement this function
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
|
|
||||||
DebugLoc DL) const
|
|
||||||
{
|
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
||||||
const AMDGPURegisterInfo & RI = getRegisterInfo();
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
|
|
||||||
MachineOperand &MO = MI.getOperand(i);
|
|
||||||
// Convert dst regclass to one that is supported by the ISA
|
|
||||||
if (MO.isReg() && MO.isDef()) {
|
|
||||||
if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
|
|
||||||
const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
|
|
||||||
const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
|
|
||||||
|
|
||||||
assert(newRegClass);
|
|
||||||
|
|
||||||
MRI.setRegClass(MO.getReg(), newRegClass);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,148 +0,0 @@
|
||||||
//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains the definition of a TargetInstrInfo class that is common
|
|
||||||
// to all AMD GPUs.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPUINSTRUCTIONINFO_H_
|
|
||||||
#define AMDGPUINSTRUCTIONINFO_H_
|
|
||||||
|
|
||||||
#include "AMDGPURegisterInfo.h"
|
|
||||||
#include "AMDGPUInstrInfo.h"
|
|
||||||
#include "llvm/Target/TargetInstrInfo.h"
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
#define GET_INSTRINFO_HEADER
|
|
||||||
#define GET_INSTRINFO_ENUM
|
|
||||||
#include "AMDGPUGenInstrInfo.inc"
|
|
||||||
|
|
||||||
#define OPCODE_IS_ZERO_INT 0x00000042
|
|
||||||
#define OPCODE_IS_NOT_ZERO_INT 0x00000045
|
|
||||||
#define OPCODE_IS_ZERO 0x00000020
|
|
||||||
#define OPCODE_IS_NOT_ZERO 0x00000023
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUTargetMachine;
|
|
||||||
class MachineFunction;
|
|
||||||
class MachineInstr;
|
|
||||||
class MachineInstrBuilder;
|
|
||||||
|
|
||||||
class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
|
|
||||||
private:
|
|
||||||
const AMDGPURegisterInfo RI;
|
|
||||||
TargetMachine &TM;
|
|
||||||
bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
|
|
||||||
MachineBasicBlock &MBB) const;
|
|
||||||
public:
|
|
||||||
explicit AMDGPUInstrInfo(TargetMachine &tm);
|
|
||||||
|
|
||||||
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
|
|
||||||
|
|
||||||
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
|
||||||
unsigned &DstReg, unsigned &SubIdx) const;
|
|
||||||
|
|
||||||
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
|
||||||
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
|
|
||||||
int &FrameIndex) const;
|
|
||||||
bool hasLoadFromStackSlot(const MachineInstr *MI,
|
|
||||||
const MachineMemOperand *&MMO,
|
|
||||||
int &FrameIndex) const;
|
|
||||||
unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
|
|
||||||
unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
|
|
||||||
int &FrameIndex) const;
|
|
||||||
bool hasStoreFromStackSlot(const MachineInstr *MI,
|
|
||||||
const MachineMemOperand *&MMO,
|
|
||||||
int &FrameIndex) const;
|
|
||||||
|
|
||||||
MachineInstr *
|
|
||||||
convertToThreeAddress(MachineFunction::iterator &MFI,
|
|
||||||
MachineBasicBlock::iterator &MBBI,
|
|
||||||
LiveVariables *LV) const;
|
|
||||||
|
|
||||||
|
|
||||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
|
||||||
unsigned DestReg, unsigned SrcReg,
|
|
||||||
bool KillSrc) const = 0;
|
|
||||||
|
|
||||||
void storeRegToStackSlot(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI,
|
|
||||||
unsigned SrcReg, bool isKill, int FrameIndex,
|
|
||||||
const TargetRegisterClass *RC,
|
|
||||||
const TargetRegisterInfo *TRI) const;
|
|
||||||
void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI,
|
|
||||||
unsigned DestReg, int FrameIndex,
|
|
||||||
const TargetRegisterClass *RC,
|
|
||||||
const TargetRegisterInfo *TRI) const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
|
||||||
MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<unsigned> &Ops,
|
|
||||||
int FrameIndex) const;
|
|
||||||
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
|
|
||||||
MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<unsigned> &Ops,
|
|
||||||
MachineInstr *LoadMI) const;
|
|
||||||
public:
|
|
||||||
bool canFoldMemoryOperand(const MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<unsigned> &Ops) const;
|
|
||||||
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
|
|
||||||
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
|
|
||||||
SmallVectorImpl<MachineInstr *> &NewMIs) const;
|
|
||||||
bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
|
||||||
SmallVectorImpl<SDNode *> &NewNodes) const;
|
|
||||||
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
|
|
||||||
bool UnfoldLoad, bool UnfoldStore,
|
|
||||||
unsigned *LoadRegIndex = 0) const;
|
|
||||||
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
|
|
||||||
int64_t Offset1, int64_t Offset2,
|
|
||||||
unsigned NumLoads) const;
|
|
||||||
|
|
||||||
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
|
|
||||||
void insertNoop(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI) const;
|
|
||||||
bool isPredicated(const MachineInstr *MI) const;
|
|
||||||
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
|
||||||
const SmallVectorImpl<MachineOperand> &Pred2) const;
|
|
||||||
bool DefinesPredicate(MachineInstr *MI,
|
|
||||||
std::vector<MachineOperand> &Pred) const;
|
|
||||||
bool isPredicable(MachineInstr *MI) const;
|
|
||||||
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
|
|
||||||
|
|
||||||
// Helper functions that check the opcode for status information
|
|
||||||
bool isLoadInst(llvm::MachineInstr *MI) const;
|
|
||||||
bool isExtLoadInst(llvm::MachineInstr *MI) const;
|
|
||||||
bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
|
|
||||||
bool isSExtLoadInst(llvm::MachineInstr *MI) const;
|
|
||||||
bool isZExtLoadInst(llvm::MachineInstr *MI) const;
|
|
||||||
bool isAExtLoadInst(llvm::MachineInstr *MI) const;
|
|
||||||
bool isStoreInst(llvm::MachineInstr *MI) const;
|
|
||||||
bool isTruncStoreInst(llvm::MachineInstr *MI) const;
|
|
||||||
|
|
||||||
virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
|
||||||
int64_t Imm) const = 0;
|
|
||||||
virtual unsigned getIEQOpcode() const = 0;
|
|
||||||
virtual bool isMov(unsigned opcode) const = 0;
|
|
||||||
|
|
||||||
/// convertToISA - Convert the AMDIL MachineInstr to a supported ISA
|
|
||||||
/// MachineInstr
|
|
||||||
virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
|
|
||||||
DebugLoc DL) const;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End llvm namespace
|
|
||||||
|
|
||||||
#endif // AMDGPUINSTRINFO_H_
|
|
||||||
|
|
@ -1,71 +0,0 @@
|
||||||
//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains DAG node defintions for the AMDGPU target.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// AMDGPU DAG Profiles
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
|
|
||||||
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
|
|
||||||
]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// AMDGPU DAG Nodes
|
|
||||||
//
|
|
||||||
|
|
||||||
// out = ((a << 32) | b) >> c)
|
|
||||||
//
|
|
||||||
// Can be used to optimize rtol:
|
|
||||||
// rotl(a, b) = bitalign(a, a, 32 - b)
|
|
||||||
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
|
|
||||||
|
|
||||||
// out = a - floor(a)
|
|
||||||
def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
|
|
||||||
|
|
||||||
// out = max(a, b) a and b are floats
|
|
||||||
def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// out = max(a, b) a and b are signed ints
|
|
||||||
def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// out = max(a, b) a and b are unsigned ints
|
|
||||||
def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// out = min(a, b) a and b are floats
|
|
||||||
def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// out = min(a, b) a snd b are signed ints
|
|
||||||
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// out = min(a, b) a and b are unsigned ints
|
|
||||||
def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// urecip - This operation is a helper for integer division, it returns the
|
|
||||||
// result of 1 / a as a fractional unsigned integer.
|
|
||||||
// out = (2^32 / a) + e
|
|
||||||
// e is rounding error
|
|
||||||
def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
|
|
||||||
|
|
||||||
def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
|
|
||||||
|
|
@ -1,183 +0,0 @@
|
||||||
//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains instruction defs that are common to all hw codegen
|
|
||||||
// targets.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
|
|
||||||
field bits<16> AMDILOp = 0;
|
|
||||||
field bits<3> Gen = 0;
|
|
||||||
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
let OutOperandList = outs;
|
|
||||||
let InOperandList = ins;
|
|
||||||
let AsmString = asm;
|
|
||||||
let Pattern = pattern;
|
|
||||||
let Itinerary = NullALU;
|
|
||||||
let TSFlags{42-40} = Gen;
|
|
||||||
let TSFlags{63-48} = AMDILOp;
|
|
||||||
}
|
|
||||||
|
|
||||||
class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
|
|
||||||
: AMDGPUInst<outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
field bits<32> Inst = 0xffffffff;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
def COND_EQ : PatLeaf <
|
|
||||||
(cond),
|
|
||||||
[{switch(N->get()){{default: return false;
|
|
||||||
case ISD::SETOEQ: case ISD::SETUEQ:
|
|
||||||
case ISD::SETEQ: return true;}}}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def COND_NE : PatLeaf <
|
|
||||||
(cond),
|
|
||||||
[{switch(N->get()){{default: return false;
|
|
||||||
case ISD::SETONE: case ISD::SETUNE:
|
|
||||||
case ISD::SETNE: return true;}}}]
|
|
||||||
>;
|
|
||||||
def COND_GT : PatLeaf <
|
|
||||||
(cond),
|
|
||||||
[{switch(N->get()){{default: return false;
|
|
||||||
case ISD::SETOGT: case ISD::SETUGT:
|
|
||||||
case ISD::SETGT: return true;}}}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def COND_GE : PatLeaf <
|
|
||||||
(cond),
|
|
||||||
[{switch(N->get()){{default: return false;
|
|
||||||
case ISD::SETOGE: case ISD::SETUGE:
|
|
||||||
case ISD::SETGE: return true;}}}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def COND_LT : PatLeaf <
|
|
||||||
(cond),
|
|
||||||
[{switch(N->get()){{default: return false;
|
|
||||||
case ISD::SETOLT: case ISD::SETULT:
|
|
||||||
case ISD::SETLT: return true;}}}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def COND_LE : PatLeaf <
|
|
||||||
(cond),
|
|
||||||
[{switch(N->get()){{default: return false;
|
|
||||||
case ISD::SETOLE: case ISD::SETULE:
|
|
||||||
case ISD::SETLE: return true;}}}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Load/Store Pattern Fragments
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
|
|
||||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
|
||||||
}]>;
|
|
||||||
|
|
||||||
class Constants {
|
|
||||||
int TWO_PI = 0x40c90fdb;
|
|
||||||
int PI = 0x40490fdb;
|
|
||||||
int TWO_PI_INV = 0x3e22f983;
|
|
||||||
}
|
|
||||||
def CONST : Constants;
|
|
||||||
|
|
||||||
def FP_ZERO : PatLeaf <
|
|
||||||
(fpimm),
|
|
||||||
[{return N->getValueAPF().isZero();}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def FP_ONE : PatLeaf <
|
|
||||||
(fpimm),
|
|
||||||
[{return N->isExactlyValue(1.0);}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
|
|
||||||
|
|
||||||
class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
|
|
||||||
(outs rc:$dst),
|
|
||||||
(ins rc:$src0),
|
|
||||||
"CLAMP $dst, $src0",
|
|
||||||
[(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
class FABS <RegisterClass rc> : AMDGPUShaderInst <
|
|
||||||
(outs rc:$dst),
|
|
||||||
(ins rc:$src0),
|
|
||||||
"FABS $dst, $src0",
|
|
||||||
[(set rc:$dst, (fabs rc:$src0))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
class FNEG <RegisterClass rc> : AMDGPUShaderInst <
|
|
||||||
(outs rc:$dst),
|
|
||||||
(ins rc:$src0),
|
|
||||||
"FNEG $dst, $src0",
|
|
||||||
[(set rc:$dst, (fneg rc:$src0))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def SHADER_TYPE : AMDGPUShaderInst <
|
|
||||||
(outs),
|
|
||||||
(ins i32imm:$type),
|
|
||||||
"SHADER_TYPE $type",
|
|
||||||
[(int_AMDGPU_shader_type imm:$type)]
|
|
||||||
>;
|
|
||||||
|
|
||||||
} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
|
|
||||||
|
|
||||||
/* Generic helper patterns for intrinsics */
|
|
||||||
/* -------------------------------------- */
|
|
||||||
|
|
||||||
class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
|
|
||||||
RegisterClass rc> : Pat <
|
|
||||||
(fpow rc:$src0, rc:$src1),
|
|
||||||
(exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
|
|
||||||
>;
|
|
||||||
|
|
||||||
/* Other helper patterns */
|
|
||||||
/* --------------------- */
|
|
||||||
|
|
||||||
/* Extract element pattern */
|
|
||||||
class Extract_Element <ValueType sub_type, ValueType vec_type,
|
|
||||||
RegisterClass vec_class, int sub_idx,
|
|
||||||
SubRegIndex sub_reg>: Pat<
|
|
||||||
(sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
|
|
||||||
(EXTRACT_SUBREG vec_class:$src, sub_reg)
|
|
||||||
>;
|
|
||||||
|
|
||||||
/* Insert element pattern */
|
|
||||||
class Insert_Element <ValueType elem_type, ValueType vec_type,
|
|
||||||
RegisterClass elem_class, RegisterClass vec_class,
|
|
||||||
int sub_idx, SubRegIndex sub_reg> : Pat <
|
|
||||||
|
|
||||||
(vec_type (vector_insert (vec_type vec_class:$vec),
|
|
||||||
(elem_type elem_class:$elem), sub_idx)),
|
|
||||||
(INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
|
|
||||||
>;
|
|
||||||
|
|
||||||
// Vector Build pattern
|
|
||||||
class Vector_Build <ValueType vecType, RegisterClass vectorClass,
|
|
||||||
ValueType elemType, RegisterClass elemClass> : Pat <
|
|
||||||
(vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
|
|
||||||
(elemType elemClass:$z), (elemType elemClass:$w))),
|
|
||||||
(INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
|
|
||||||
(vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
|
|
||||||
elemClass:$z, sel_z), elemClass:$w, sel_w)
|
|
||||||
>;
|
|
||||||
|
|
||||||
// bitconvert pattern
|
|
||||||
class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
|
|
||||||
(dt (bitconvert (st rc:$src0))),
|
|
||||||
(dt rc:$src0)
|
|
||||||
>;
|
|
||||||
|
|
||||||
include "R600Instructions.td"
|
|
||||||
|
|
||||||
include "SIInstrInfo.td"
|
|
||||||
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file defines intrinsics that are used by all hw codegen targets.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
let TargetPrefix = "AMDGPU", isTarget = 1 in {
|
|
||||||
|
|
||||||
def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
|
|
||||||
def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
|
|
||||||
def int_AMDGPU_kilp : Intrinsic<[], [], []>;
|
|
||||||
def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_ssg : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
|
||||||
|
|
||||||
def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let TargetPrefix = "TGSI", isTarget = 1 in {
|
|
||||||
|
|
||||||
def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
include "SIIntrinsics.td"
|
|
||||||
|
|
@ -1,82 +0,0 @@
|
||||||
//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains code to lower AMDGPU MachineInstrs to their corresponding
|
|
||||||
// MCInst.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
|
|
||||||
#include "AMDGPUMCInstLower.h"
|
|
||||||
#include "AMDGPUAsmPrinter.h"
|
|
||||||
#include "R600InstrInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstr.h"
|
|
||||||
#include "llvm/Constants.h"
|
|
||||||
#include "llvm/MC/MCInst.h"
|
|
||||||
#include "llvm/MC/MCStreamer.h"
|
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPUMCInstLower::AMDGPUMCInstLower() { }
|
|
||||||
|
|
||||||
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
|
|
||||||
OutMI.setOpcode(MI->getOpcode());
|
|
||||||
|
|
||||||
for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
|
|
||||||
const MachineOperand &MO = MI->getOperand(i);
|
|
||||||
|
|
||||||
MCOperand MCOp;
|
|
||||||
switch (MO.getType()) {
|
|
||||||
default:
|
|
||||||
llvm_unreachable("unknown operand type");
|
|
||||||
case MachineOperand::MO_FPImmediate: {
|
|
||||||
const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
|
|
||||||
assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
|
|
||||||
"Only floating point immediates are supported at the moment.");
|
|
||||||
MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case MachineOperand::MO_Immediate:
|
|
||||||
MCOp = MCOperand::CreateImm(MO.getImm());
|
|
||||||
break;
|
|
||||||
case MachineOperand::MO_Register:
|
|
||||||
MCOp = MCOperand::CreateReg(MO.getReg());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
OutMI.addOperand(MCOp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
|
||||||
AMDGPUMCInstLower MCInstLowering;
|
|
||||||
|
|
||||||
// Ignore placeholder instructions:
|
|
||||||
if (MI->getOpcode() == AMDGPU::MASK_WRITE) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (MI->isBundle()) {
|
|
||||||
const MachineBasicBlock *MBB = MI->getParent();
|
|
||||||
MachineBasicBlock::const_instr_iterator I = MI;
|
|
||||||
++I;
|
|
||||||
while (I != MBB->end() && I->isInsideBundle()) {
|
|
||||||
MCInst MCBundleInst;
|
|
||||||
const MachineInstr *BundledInst = I;
|
|
||||||
MCInstLowering.lower(BundledInst, MCBundleInst);
|
|
||||||
OutStreamer.EmitInstruction(MCBundleInst);
|
|
||||||
++I;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
MCInst TmpInst;
|
|
||||||
MCInstLowering.lower(MI, TmpInst);
|
|
||||||
OutStreamer.EmitInstruction(TmpInst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPU_MCINSTLOWER_H
|
|
||||||
#define AMDGPU_MCINSTLOWER_H
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class MCInst;
|
|
||||||
class MachineInstr;
|
|
||||||
|
|
||||||
class AMDGPUMCInstLower {
|
|
||||||
|
|
||||||
public:
|
|
||||||
AMDGPUMCInstLower();
|
|
||||||
|
|
||||||
/// lower - Lower a MachineInstr to an MCInst
|
|
||||||
void lower(const MachineInstr *MI, MCInst &OutMI) const;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif //AMDGPU_MCINSTLOWER_H
|
|
||||||
|
|
@ -1,50 +0,0 @@
|
||||||
//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Parent TargetRegisterInfo class common to all hw codegen targets.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPURegisterInfo.h"
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
|
|
||||||
const TargetInstrInfo &tii)
|
|
||||||
: AMDGPUGenRegisterInfo(0),
|
|
||||||
TM(tm),
|
|
||||||
TII(tii)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Function handling callbacks - Functions are a seldom used feature of GPUS, so
|
|
||||||
// they are not supported at this time.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
|
|
||||||
|
|
||||||
const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
|
|
||||||
const {
|
|
||||||
return &CalleeSavedReg;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|
||||||
int SPAdj,
|
|
||||||
RegScavenger *RS) const {
|
|
||||||
assert(!"Subroutines not supported yet");
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
|
||||||
assert(!"Subroutines not supported yet");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define GET_REGINFO_TARGET_DESC
|
|
||||||
#include "AMDGPUGenRegisterInfo.inc"
|
|
||||||
|
|
@ -1,62 +0,0 @@
|
||||||
//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains the TargetRegisterInfo interface that is implemented
|
|
||||||
// by all hw codegen targets.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPUREGISTERINFO_H_
|
|
||||||
#define AMDGPUREGISTERINFO_H_
|
|
||||||
|
|
||||||
#include "llvm/ADT/BitVector.h"
|
|
||||||
#include "llvm/Target/TargetRegisterInfo.h"
|
|
||||||
|
|
||||||
#define GET_REGINFO_HEADER
|
|
||||||
#define GET_REGINFO_ENUM
|
|
||||||
#include "AMDGPUGenRegisterInfo.inc"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUTargetMachine;
|
|
||||||
class TargetInstrInfo;
|
|
||||||
|
|
||||||
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo
|
|
||||||
{
|
|
||||||
TargetMachine &TM;
|
|
||||||
const TargetInstrInfo &TII;
|
|
||||||
static const uint16_t CalleeSavedReg;
|
|
||||||
|
|
||||||
AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
|
|
||||||
|
|
||||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const {
|
|
||||||
assert(!"Unimplemented"); return BitVector();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
|
||||||
/// ISA reg class that is equivalent to the given AMDIL reg class.
|
|
||||||
virtual const TargetRegisterClass * getISARegClass(
|
|
||||||
const TargetRegisterClass * rc) const {
|
|
||||||
assert(!"Unimplemented"); return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
|
|
||||||
assert(!"Unimplemented"); return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
|
|
||||||
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
|
|
||||||
RegScavenger *RS) const;
|
|
||||||
unsigned getFrameRegister(const MachineFunction &MF) const;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDIDSAREGISTERINFO_H_
|
|
||||||
|
|
@ -1,22 +0,0 @@
|
||||||
//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Tablegen register definitions common to all hw codegen targets.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
let Namespace = "AMDGPU" in {
|
|
||||||
def sel_x : SubRegIndex;
|
|
||||||
def sel_y : SubRegIndex;
|
|
||||||
def sel_z : SubRegIndex;
|
|
||||||
def sel_w : SubRegIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
include "R600RegisterInfo.td"
|
|
||||||
include "SIRegisterInfo.td"
|
|
||||||
|
|
@ -1,94 +0,0 @@
|
||||||
//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file implements the AMDGPU specific subclass of TargetSubtarget.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
#define GET_SUBTARGETINFO_ENUM
|
|
||||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
|
||||||
#define GET_SUBTARGETINFO_CTOR
|
|
||||||
#include "AMDGPUGenSubtargetInfo.inc"
|
|
||||||
|
|
||||||
AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
|
||||||
AMDGPUGenSubtargetInfo(TT, CPU, FS), mDumpCode(false) {
|
|
||||||
InstrItins = getInstrItineraryForCPU(CPU);
|
|
||||||
|
|
||||||
memset(CapsOverride, 0, sizeof(*CapsOverride)
|
|
||||||
* AMDGPUDeviceInfo::MaxNumberCapabilities);
|
|
||||||
// Default card
|
|
||||||
StringRef GPU = CPU;
|
|
||||||
mIs64bit = false;
|
|
||||||
mDefaultSize[0] = 64;
|
|
||||||
mDefaultSize[1] = 1;
|
|
||||||
mDefaultSize[2] = 1;
|
|
||||||
ParseSubtargetFeatures(GPU, FS);
|
|
||||||
mDevName = GPU;
|
|
||||||
mDevice = AMDGPUDeviceInfo::getDeviceFromName(mDevName, this, mIs64bit);
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUSubtarget::~AMDGPUSubtarget()
|
|
||||||
{
|
|
||||||
delete mDevice;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const
|
|
||||||
{
|
|
||||||
assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
|
|
||||||
"Caps index is out of bounds!");
|
|
||||||
return CapsOverride[caps];
|
|
||||||
}
|
|
||||||
bool
|
|
||||||
AMDGPUSubtarget::is64bit() const
|
|
||||||
{
|
|
||||||
return mIs64bit;
|
|
||||||
}
|
|
||||||
bool
|
|
||||||
AMDGPUSubtarget::isTargetELF() const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
size_t
|
|
||||||
AMDGPUSubtarget::getDefaultSize(uint32_t dim) const
|
|
||||||
{
|
|
||||||
if (dim > 3) {
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
return mDefaultSize[dim];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
|
||||||
AMDGPUSubtarget::getDataLayout() const
|
|
||||||
{
|
|
||||||
if (!mDevice) {
|
|
||||||
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
|
||||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
|
||||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
|
||||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
|
||||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
|
|
||||||
}
|
|
||||||
return mDevice->getDataLayout();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
|
||||||
AMDGPUSubtarget::getDeviceName() const
|
|
||||||
{
|
|
||||||
return mDevName;
|
|
||||||
}
|
|
||||||
const AMDGPUDevice *
|
|
||||||
AMDGPUSubtarget::device() const
|
|
||||||
{
|
|
||||||
return mDevice;
|
|
||||||
}
|
|
||||||
|
|
@ -1,66 +0,0 @@
|
||||||
//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file declares the AMDGPU specific subclass of TargetSubtarget.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef _AMDGPUSUBTARGET_H_
|
|
||||||
#define _AMDGPUSUBTARGET_H_
|
|
||||||
#include "AMDILDevice.h"
|
|
||||||
#include "llvm/ADT/StringExtras.h"
|
|
||||||
#include "llvm/ADT/StringRef.h"
|
|
||||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
|
||||||
|
|
||||||
#define GET_SUBTARGETINFO_HEADER
|
|
||||||
#include "AMDGPUGenSubtargetInfo.inc"
|
|
||||||
|
|
||||||
#define MAX_CB_SIZE (1 << 16)
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo
|
|
||||||
{
|
|
||||||
private:
|
|
||||||
bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
|
|
||||||
const AMDGPUDevice *mDevice;
|
|
||||||
size_t mDefaultSize[3];
|
|
||||||
size_t mMinimumSize[3];
|
|
||||||
std::string mDevName;
|
|
||||||
bool mIs64bit;
|
|
||||||
bool mIs32on64bit;
|
|
||||||
bool mDumpCode;
|
|
||||||
bool mR600ALUInst;
|
|
||||||
|
|
||||||
InstrItineraryData InstrItins;
|
|
||||||
|
|
||||||
public:
|
|
||||||
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
|
|
||||||
virtual ~AMDGPUSubtarget();
|
|
||||||
|
|
||||||
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
|
|
||||||
virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
|
|
||||||
|
|
||||||
bool isOverride(AMDGPUDeviceInfo::Caps) const;
|
|
||||||
bool is64bit() const;
|
|
||||||
|
|
||||||
// Helper functions to simplify if statements
|
|
||||||
bool isTargetELF() const;
|
|
||||||
const AMDGPUDevice* device() const;
|
|
||||||
std::string getDataLayout() const;
|
|
||||||
std::string getDeviceName() const;
|
|
||||||
virtual size_t getDefaultSize(uint32_t dim) const;
|
|
||||||
bool dumpCode() const { return mDumpCode; }
|
|
||||||
bool r600ALUEncoding() const { return mR600ALUInst; }
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDGPUSUBTARGET_H_
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// The AMDGPU target machine contains all of the hardware specific information
|
|
||||||
// needed to emit code for R600 and SI GPUs.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "R600ISelLowering.h"
|
|
||||||
#include "R600InstrInfo.h"
|
|
||||||
#include "SIISelLowering.h"
|
|
||||||
#include "SIInstrInfo.h"
|
|
||||||
#include "llvm/Analysis/Passes.h"
|
|
||||||
#include "llvm/Analysis/Verifier.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
|
||||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
|
||||||
#include "llvm/CodeGen/Passes.h"
|
|
||||||
#include "llvm/MC/MCAsmInfo.h"
|
|
||||||
#include "llvm/PassManager.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
|
||||||
#include "llvm/Support/raw_os_ostream.h"
|
|
||||||
#include "llvm/Transforms/IPO.h"
|
|
||||||
#include "llvm/Transforms/Scalar.h"
|
|
||||||
#include <llvm/CodeGen/Passes.h>
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
extern "C" void LLVMInitializeAMDGPUTarget() {
|
|
||||||
// Register the target
|
|
||||||
RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
|
|
||||||
StringRef CPU, StringRef FS,
|
|
||||||
TargetOptions Options,
|
|
||||||
Reloc::Model RM, CodeModel::Model CM,
|
|
||||||
CodeGenOpt::Level OptLevel
|
|
||||||
)
|
|
||||||
:
|
|
||||||
LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
|
|
||||||
Subtarget(TT, CPU, FS),
|
|
||||||
DataLayout(Subtarget.getDataLayout()),
|
|
||||||
FrameLowering(TargetFrameLowering::StackGrowsUp,
|
|
||||||
Subtarget.device()->getStackAlignment(), 0),
|
|
||||||
IntrinsicInfo(this),
|
|
||||||
InstrItins(&Subtarget.getInstrItineraryData()),
|
|
||||||
mDump(false)
|
|
||||||
|
|
||||||
{
|
|
||||||
// TLInfo uses InstrInfo so it must be initialized after.
|
|
||||||
if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
|
||||||
InstrInfo = new R600InstrInfo(*this);
|
|
||||||
TLInfo = new R600TargetLowering(*this);
|
|
||||||
} else {
|
|
||||||
InstrInfo = new SIInstrInfo(*this);
|
|
||||||
TLInfo = new SITargetLowering(*this);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUTargetMachine::~AMDGPUTargetMachine()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
class AMDGPUPassConfig : public TargetPassConfig {
|
|
||||||
public:
|
|
||||||
AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
|
|
||||||
: TargetPassConfig(TM, PM) {}
|
|
||||||
|
|
||||||
AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
|
|
||||||
return getTM<AMDGPUTargetMachine>();
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual bool addPreISel();
|
|
||||||
virtual bool addInstSelector();
|
|
||||||
virtual bool addPreRegAlloc();
|
|
||||||
virtual bool addPostRegAlloc();
|
|
||||||
virtual bool addPreSched2();
|
|
||||||
virtual bool addPreEmitPass();
|
|
||||||
};
|
|
||||||
} // End of anonymous namespace
|
|
||||||
|
|
||||||
TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
|
|
||||||
return new AMDGPUPassConfig(this, PM);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUPassConfig::addPreISel()
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUPassConfig::addInstSelector() {
|
|
||||||
PM->add(createAMDGPUPeepholeOpt(*TM));
|
|
||||||
PM->add(createAMDGPUISelDag(getAMDGPUTargetMachine()));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUPassConfig::addPreRegAlloc() {
|
|
||||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
|
||||||
|
|
||||||
if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
|
|
||||||
PM->add(createSIAssignInterpRegsPass(*TM));
|
|
||||||
}
|
|
||||||
PM->add(createAMDGPUConvertToISAPass(*TM));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUPassConfig::addPostRegAlloc() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUPassConfig::addPreSched2() {
|
|
||||||
|
|
||||||
addPass(IfConverterID);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUPassConfig::addPreEmitPass() {
|
|
||||||
PM->add(createAMDGPUCFGPreparationPass(*TM));
|
|
||||||
PM->add(createAMDGPUCFGStructurizerPass(*TM));
|
|
||||||
|
|
||||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
|
||||||
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
|
|
||||||
PM->add(createR600ExpandSpecialInstrsPass(*TM));
|
|
||||||
addPass(FinalizeMachineBundlesID);
|
|
||||||
} else {
|
|
||||||
PM->add(createSILowerLiteralConstantsPass(*TM));
|
|
||||||
// piglit is unreliable (VM protection faults, GPU lockups) with this pass:
|
|
||||||
//PM->add(createSILowerFlowControlPass(*TM));
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,70 +0,0 @@
|
||||||
//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// The AMDGPU TargetMachine interface definition for hw codgen targets.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPU_TARGET_MACHINE_H
|
|
||||||
#define AMDGPU_TARGET_MACHINE_H
|
|
||||||
|
|
||||||
#include "AMDGPUInstrInfo.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
#include "AMDILFrameLowering.h"
|
|
||||||
#include "AMDILIntrinsicInfo.h"
|
|
||||||
#include "R600ISelLowering.h"
|
|
||||||
#include "llvm/ADT/OwningPtr.h"
|
|
||||||
#include "llvm/Target/TargetData.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
|
|
||||||
|
|
||||||
class AMDGPUTargetMachine : public LLVMTargetMachine {
|
|
||||||
|
|
||||||
AMDGPUSubtarget Subtarget;
|
|
||||||
const TargetData DataLayout;
|
|
||||||
AMDGPUFrameLowering FrameLowering;
|
|
||||||
AMDGPUIntrinsicInfo IntrinsicInfo;
|
|
||||||
const AMDGPUInstrInfo * InstrInfo;
|
|
||||||
AMDGPUTargetLowering * TLInfo;
|
|
||||||
const InstrItineraryData* InstrItins;
|
|
||||||
bool mDump;
|
|
||||||
|
|
||||||
public:
|
|
||||||
AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
|
|
||||||
StringRef CPU,
|
|
||||||
TargetOptions Options,
|
|
||||||
Reloc::Model RM, CodeModel::Model CM,
|
|
||||||
CodeGenOpt::Level OL);
|
|
||||||
~AMDGPUTargetMachine();
|
|
||||||
virtual const AMDGPUFrameLowering* getFrameLowering() const {
|
|
||||||
return &FrameLowering;
|
|
||||||
}
|
|
||||||
virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
|
|
||||||
return &IntrinsicInfo;
|
|
||||||
}
|
|
||||||
virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
|
|
||||||
virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
|
|
||||||
virtual const AMDGPURegisterInfo *getRegisterInfo() const {
|
|
||||||
return &InstrInfo->getRegisterInfo();
|
|
||||||
}
|
|
||||||
virtual AMDGPUTargetLowering * getTargetLowering() const {
|
|
||||||
return TLInfo;
|
|
||||||
}
|
|
||||||
virtual const InstrItineraryData* getInstrItineraryData() const {
|
|
||||||
return InstrItins;
|
|
||||||
}
|
|
||||||
virtual const TargetData* getTargetData() const { return &DataLayout; }
|
|
||||||
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDGPU_TARGET_MACHINE_H
|
|
||||||
|
|
@ -1,106 +0,0 @@
|
||||||
//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains the entry points for global functions defined in the LLVM
|
|
||||||
// AMDGPU back-end.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDIL_H_
|
|
||||||
#define AMDIL_H_
|
|
||||||
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/Target/TargetMachine.h"
|
|
||||||
|
|
||||||
#define ARENA_SEGMENT_RESERVED_UAVS 12
|
|
||||||
#define DEFAULT_ARENA_UAV_ID 8
|
|
||||||
#define DEFAULT_RAW_UAV_ID 7
|
|
||||||
#define GLOBAL_RETURN_RAW_UAV_ID 11
|
|
||||||
#define HW_MAX_NUM_CB 8
|
|
||||||
#define MAX_NUM_UNIQUE_UAVS 8
|
|
||||||
#define OPENCL_MAX_NUM_ATOMIC_COUNTERS 8
|
|
||||||
#define OPENCL_MAX_READ_IMAGES 128
|
|
||||||
#define OPENCL_MAX_WRITE_IMAGES 8
|
|
||||||
#define OPENCL_MAX_SAMPLERS 16
|
|
||||||
|
|
||||||
// The next two values can never be zero, as zero is the ID that is
|
|
||||||
// used to assert against.
|
|
||||||
#define DEFAULT_LDS_ID 1
|
|
||||||
#define DEFAULT_GDS_ID 1
|
|
||||||
#define DEFAULT_SCRATCH_ID 1
|
|
||||||
#define DEFAULT_VEC_SLOTS 8
|
|
||||||
|
|
||||||
#define OCL_DEVICE_RV710 0x0001
|
|
||||||
#define OCL_DEVICE_RV730 0x0002
|
|
||||||
#define OCL_DEVICE_RV770 0x0004
|
|
||||||
#define OCL_DEVICE_CEDAR 0x0008
|
|
||||||
#define OCL_DEVICE_REDWOOD 0x0010
|
|
||||||
#define OCL_DEVICE_JUNIPER 0x0020
|
|
||||||
#define OCL_DEVICE_CYPRESS 0x0040
|
|
||||||
#define OCL_DEVICE_CAICOS 0x0080
|
|
||||||
#define OCL_DEVICE_TURKS 0x0100
|
|
||||||
#define OCL_DEVICE_BARTS 0x0200
|
|
||||||
#define OCL_DEVICE_CAYMAN 0x0400
|
|
||||||
#define OCL_DEVICE_ALL 0x3FFF
|
|
||||||
|
|
||||||
/// The number of function ID's that are reserved for
|
|
||||||
/// internal compiler usage.
|
|
||||||
const unsigned int RESERVED_FUNCS = 1024;
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class AMDGPUInstrPrinter;
|
|
||||||
class FunctionPass;
|
|
||||||
class MCAsmInfo;
|
|
||||||
class raw_ostream;
|
|
||||||
class Target;
|
|
||||||
class TargetMachine;
|
|
||||||
|
|
||||||
/// Instruction selection passes.
|
|
||||||
FunctionPass*
|
|
||||||
createAMDGPUISelDag(TargetMachine &TM);
|
|
||||||
FunctionPass*
|
|
||||||
createAMDGPUPeepholeOpt(TargetMachine &TM);
|
|
||||||
|
|
||||||
/// Pre emit passes.
|
|
||||||
FunctionPass*
|
|
||||||
createAMDGPUCFGPreparationPass(TargetMachine &TM);
|
|
||||||
FunctionPass*
|
|
||||||
createAMDGPUCFGStructurizerPass(TargetMachine &TM);
|
|
||||||
|
|
||||||
extern Target TheAMDGPUTarget;
|
|
||||||
} // end namespace llvm;
|
|
||||||
|
|
||||||
/// Include device information enumerations
|
|
||||||
#include "AMDILDeviceInfo.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
/// OpenCL uses address spaces to differentiate between
|
|
||||||
/// various memory regions on the hardware. On the CPU
|
|
||||||
/// all of the address spaces point to the same memory,
|
|
||||||
/// however on the GPU, each address space points to
|
|
||||||
/// a seperate piece of memory that is unique from other
|
|
||||||
/// memory locations.
|
|
||||||
namespace AMDGPUAS {
|
|
||||||
enum AddressSpaces {
|
|
||||||
PRIVATE_ADDRESS = 0, // Address space for private memory.
|
|
||||||
GLOBAL_ADDRESS = 1, // Address space for global memory (RAT0, VTX0).
|
|
||||||
CONSTANT_ADDRESS = 2, // Address space for constant memory.
|
|
||||||
LOCAL_ADDRESS = 3, // Address space for local memory.
|
|
||||||
REGION_ADDRESS = 4, // Address space for region memory.
|
|
||||||
ADDRESS_NONE = 5, // Address space for unknown memory.
|
|
||||||
PARAM_D_ADDRESS = 6, // Address space for direct addressible parameter memory (CONST0)
|
|
||||||
PARAM_I_ADDRESS = 7, // Address space for indirect addressible parameter memory (VTX1)
|
|
||||||
USER_SGPR_ADDRESS = 8, // Address space for USER_SGPRS on SI
|
|
||||||
LAST_ADDRESS = 9
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace AMDGPUAS
|
|
||||||
|
|
||||||
} // end namespace llvm
|
|
||||||
#endif // AMDIL_H_
|
|
||||||
|
|
@ -1,129 +0,0 @@
|
||||||
//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#include "AMDIL7XXDevice.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
#include "AMDILDevice.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST)
|
|
||||||
{
|
|
||||||
setCaps();
|
|
||||||
std::string name = mSTM->getDeviceName();
|
|
||||||
if (name == "rv710") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_RV710;
|
|
||||||
} else if (name == "rv730") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_RV730;
|
|
||||||
} else {
|
|
||||||
mDeviceFlag = OCL_DEVICE_RV770;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPU7XXDevice::~AMDGPU7XXDevice()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPU7XXDevice::setCaps()
|
|
||||||
{
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPU7XXDevice::getMaxLDSSize() const
|
|
||||||
{
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
|
||||||
return MAX_LDS_SIZE_700;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPU7XXDevice::getWavefrontSize() const
|
|
||||||
{
|
|
||||||
return AMDGPUDevice::HalfWavefrontSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t AMDGPU7XXDevice::getGeneration() const
|
|
||||||
{
|
|
||||||
return AMDGPUDeviceInfo::HD4XXX;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const
|
|
||||||
{
|
|
||||||
switch (DeviceID) {
|
|
||||||
default:
|
|
||||||
assert(0 && "ID type passed in is unknown!");
|
|
||||||
break;
|
|
||||||
case GLOBAL_ID:
|
|
||||||
case CONSTANT_ID:
|
|
||||||
case RAW_UAV_ID:
|
|
||||||
case ARENA_UAV_ID:
|
|
||||||
break;
|
|
||||||
case LDS_ID:
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
|
||||||
return DEFAULT_LDS_ID;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case SCRATCH_ID:
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
|
|
||||||
return DEFAULT_SCRATCH_ID;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case GDS_ID:
|
|
||||||
assert(0 && "GDS UAV ID is not supported on this chip");
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
|
||||||
return DEFAULT_GDS_ID;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST)
|
|
||||||
{
|
|
||||||
setCaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPU770Device::~AMDGPU770Device()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPU770Device::setCaps()
|
|
||||||
{
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
|
||||||
}
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
|
|
||||||
mHWBits.reset(AMDGPUDeviceInfo::LongOps);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::LongOps);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPU770Device::getWavefrontSize() const
|
|
||||||
{
|
|
||||||
return AMDGPUDevice::WavefrontSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPU710Device::~AMDGPU710Device()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPU710Device::getWavefrontSize() const
|
|
||||||
{
|
|
||||||
return AMDGPUDevice::QuarterWavefrontSize;
|
|
||||||
}
|
|
||||||
|
|
@ -1,70 +0,0 @@
|
||||||
//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface for the subtarget data classes.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// This file will define the interface that each generation needs to
|
|
||||||
// implement in order to correctly answer queries on the capabilities of the
|
|
||||||
// specific hardware.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDIL7XXDEVICEIMPL_H_
|
|
||||||
#define _AMDIL7XXDEVICEIMPL_H_
|
|
||||||
#include "AMDILDevice.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class AMDGPUSubtarget;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// 7XX generation of devices and their respective sub classes
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// The AMDGPU7XXDevice class represents the generic 7XX device. All 7XX
|
|
||||||
// devices are derived from this class. The AMDGPU7XX device will only
|
|
||||||
// support the minimal features that are required to be considered OpenCL 1.0
|
|
||||||
// compliant and nothing more.
|
|
||||||
class AMDGPU7XXDevice : public AMDGPUDevice {
|
|
||||||
public:
|
|
||||||
AMDGPU7XXDevice(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPU7XXDevice();
|
|
||||||
virtual size_t getMaxLDSSize() const;
|
|
||||||
virtual size_t getWavefrontSize() const;
|
|
||||||
virtual uint32_t getGeneration() const;
|
|
||||||
virtual uint32_t getResourceID(uint32_t DeviceID) const;
|
|
||||||
virtual uint32_t getMaxNumUAVs() const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
virtual void setCaps();
|
|
||||||
}; // AMDGPU7XXDevice
|
|
||||||
|
|
||||||
// The AMDGPU770Device class represents the RV770 chip and it's
|
|
||||||
// derivative cards. The difference between this device and the base
|
|
||||||
// class is this device device adds support for double precision
|
|
||||||
// and has a larger wavefront size.
|
|
||||||
class AMDGPU770Device : public AMDGPU7XXDevice {
|
|
||||||
public:
|
|
||||||
AMDGPU770Device(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPU770Device();
|
|
||||||
virtual size_t getWavefrontSize() const;
|
|
||||||
private:
|
|
||||||
virtual void setCaps();
|
|
||||||
}; // AMDGPU770Device
|
|
||||||
|
|
||||||
// The AMDGPU710Device class derives from the 7XX base class, but this
|
|
||||||
// class is a smaller derivative, so we need to overload some of the
|
|
||||||
// functions in order to correctly specify this information.
|
|
||||||
class AMDGPU710Device : public AMDGPU7XXDevice {
|
|
||||||
public:
|
|
||||||
AMDGPU710Device(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPU710Device();
|
|
||||||
virtual size_t getWavefrontSize() const;
|
|
||||||
}; // AMDGPU710Device
|
|
||||||
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // _AMDILDEVICEIMPL_H_
|
|
||||||
|
|
@ -1,85 +0,0 @@
|
||||||
//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Target-independent interfaces which we are implementing
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
include "llvm/Target/Target.td"
|
|
||||||
|
|
||||||
// Dummy Instruction itineraries for pseudo instructions
|
|
||||||
def ALU_NULL : FuncUnit;
|
|
||||||
def NullALU : InstrItinClass;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// AMDIL Subtarget features.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
def FeatureFP64 : SubtargetFeature<"fp64",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
|
|
||||||
"true",
|
|
||||||
"Enable 64bit double precision operations">;
|
|
||||||
def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::ByteStores]",
|
|
||||||
"true",
|
|
||||||
"Enable byte addressable stores">;
|
|
||||||
def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
|
|
||||||
"true",
|
|
||||||
"Enable duplicate barrier detection(HD5XXX or later).">;
|
|
||||||
def FeatureImages : SubtargetFeature<"images",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::Images]",
|
|
||||||
"true",
|
|
||||||
"Enable image functions">;
|
|
||||||
def FeatureMultiUAV : SubtargetFeature<"multi_uav",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
|
|
||||||
"true",
|
|
||||||
"Generate multiple UAV code(HD5XXX family or later)">;
|
|
||||||
def FeatureMacroDB : SubtargetFeature<"macrodb",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::MacroDB]",
|
|
||||||
"true",
|
|
||||||
"Use internal macrodb, instead of macrodb in driver">;
|
|
||||||
def FeatureNoAlias : SubtargetFeature<"noalias",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::NoAlias]",
|
|
||||||
"true",
|
|
||||||
"assert that all kernel argument pointers are not aliased">;
|
|
||||||
def FeatureNoInline : SubtargetFeature<"no-inline",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::NoInline]",
|
|
||||||
"true",
|
|
||||||
"specify whether to not inline functions">;
|
|
||||||
|
|
||||||
def Feature64BitPtr : SubtargetFeature<"64BitPtr",
|
|
||||||
"mIs64bit",
|
|
||||||
"false",
|
|
||||||
"Specify if 64bit addressing should be used.">;
|
|
||||||
|
|
||||||
def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
|
|
||||||
"mIs32on64bit",
|
|
||||||
"false",
|
|
||||||
"Specify if 64bit sized pointers with 32bit addressing should be used.">;
|
|
||||||
def FeatureDebug : SubtargetFeature<"debug",
|
|
||||||
"CapsOverride[AMDGPUDeviceInfo::Debug]",
|
|
||||||
"true",
|
|
||||||
"Debug mode is enabled, so disable hardware accelerated address spaces.">;
|
|
||||||
def FeatureDumpCode : SubtargetFeature <"DumpCode",
|
|
||||||
"mDumpCode",
|
|
||||||
"true",
|
|
||||||
"Dump MachineInstrs in the CodeEmitter">;
|
|
||||||
|
|
||||||
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
|
|
||||||
"mR600ALUInst",
|
|
||||||
"false",
|
|
||||||
"Older version of ALU instructions encoding.">;
|
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Register File, Calling Conv, Instruction Descriptions
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
include "AMDILRegisterInfo.td"
|
|
||||||
include "AMDILInstrInfo.td"
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,137 +0,0 @@
|
||||||
//===-- AMDILDevice.cpp - Base class for AMDIL Devices --------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#include "AMDILDevice.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
// Default implementation for all of the classes.
|
|
||||||
AMDGPUDevice::AMDGPUDevice(AMDGPUSubtarget *ST) : mSTM(ST)
|
|
||||||
{
|
|
||||||
mHWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
|
|
||||||
mSWBits.resize(AMDGPUDeviceInfo::MaxNumberCapabilities);
|
|
||||||
setCaps();
|
|
||||||
mDeviceFlag = OCL_DEVICE_ALL;
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUDevice::~AMDGPUDevice()
|
|
||||||
{
|
|
||||||
mHWBits.clear();
|
|
||||||
mSWBits.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPUDevice::getMaxGDSSize() const
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t
|
|
||||||
AMDGPUDevice::getDeviceFlag() const
|
|
||||||
{
|
|
||||||
return mDeviceFlag;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPUDevice::getMaxNumCBs() const
|
|
||||||
{
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
|
|
||||||
return HW_MAX_NUM_CB;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPUDevice::getMaxCBSize() const
|
|
||||||
{
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::ConstantMem)) {
|
|
||||||
return MAX_CB_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPUDevice::getMaxScratchSize() const
|
|
||||||
{
|
|
||||||
return 65536;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t AMDGPUDevice::getStackAlignment() const
|
|
||||||
{
|
|
||||||
return 16;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUDevice::setCaps()
|
|
||||||
{
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::HalfOps);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::ByteOps);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::ShortOps);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::NoInline)) {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::NoInline);
|
|
||||||
}
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::MacroDB)) {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::MacroDB);
|
|
||||||
}
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::ConstantMem);
|
|
||||||
} else {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::ConstantMem);
|
|
||||||
}
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::PrivateMem);
|
|
||||||
} else {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::PrivateMem);
|
|
||||||
}
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::BarrierDetect)) {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
|
|
||||||
}
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::LongOps);
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUDeviceInfo::ExecutionMode
|
|
||||||
AMDGPUDevice::getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const
|
|
||||||
{
|
|
||||||
if (mHWBits[Caps]) {
|
|
||||||
assert(!mSWBits[Caps] && "Cannot set both SW and HW caps");
|
|
||||||
return AMDGPUDeviceInfo::Hardware;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mSWBits[Caps]) {
|
|
||||||
assert(!mHWBits[Caps] && "Cannot set both SW and HW caps");
|
|
||||||
return AMDGPUDeviceInfo::Software;
|
|
||||||
}
|
|
||||||
|
|
||||||
return AMDGPUDeviceInfo::Unsupported;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDevice::isSupported(AMDGPUDeviceInfo::Caps Mode) const
|
|
||||||
{
|
|
||||||
return getExecutionMode(Mode) != AMDGPUDeviceInfo::Unsupported;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDevice::usesHardware(AMDGPUDeviceInfo::Caps Mode) const
|
|
||||||
{
|
|
||||||
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Hardware;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDevice::usesSoftware(AMDGPUDeviceInfo::Caps Mode) const
|
|
||||||
{
|
|
||||||
return getExecutionMode(Mode) == AMDGPUDeviceInfo::Software;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
|
||||||
AMDGPUDevice::getDataLayout() const
|
|
||||||
{
|
|
||||||
return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
|
|
||||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
|
||||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
|
||||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
|
||||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
|
||||||
"-n8:16:32:64");
|
|
||||||
}
|
|
||||||
|
|
@ -1,115 +0,0 @@
|
||||||
//===---- AMDILDevice.h - Define Device Data for AMDIL -----*- C++ -*------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface for the subtarget data classes.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// This file will define the interface that each generation needs to
|
|
||||||
// implement in order to correctly answer queries on the capabilities of the
|
|
||||||
// specific hardware.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDILDEVICEIMPL_H_
|
|
||||||
#define _AMDILDEVICEIMPL_H_
|
|
||||||
#include "AMDIL.h"
|
|
||||||
#include "llvm/ADT/BitVector.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class AMDGPUSubtarget;
|
|
||||||
class MCStreamer;
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Interface for data that is specific to a single device
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
class AMDGPUDevice {
|
|
||||||
public:
|
|
||||||
AMDGPUDevice(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPUDevice();
|
|
||||||
|
|
||||||
// Enum values for the various memory types.
|
|
||||||
enum {
|
|
||||||
RAW_UAV_ID = 0,
|
|
||||||
ARENA_UAV_ID = 1,
|
|
||||||
LDS_ID = 2,
|
|
||||||
GDS_ID = 3,
|
|
||||||
SCRATCH_ID = 4,
|
|
||||||
CONSTANT_ID = 5,
|
|
||||||
GLOBAL_ID = 6,
|
|
||||||
MAX_IDS = 7
|
|
||||||
} IO_TYPE_IDS;
|
|
||||||
|
|
||||||
// Returns the max LDS size that the hardware supports. Size is in
|
|
||||||
// bytes.
|
|
||||||
virtual size_t getMaxLDSSize() const = 0;
|
|
||||||
|
|
||||||
// Returns the max GDS size that the hardware supports if the GDS is
|
|
||||||
// supported by the hardware. Size is in bytes.
|
|
||||||
virtual size_t getMaxGDSSize() const;
|
|
||||||
|
|
||||||
// Returns the max number of hardware constant address spaces that
|
|
||||||
// are supported by this device.
|
|
||||||
virtual size_t getMaxNumCBs() const;
|
|
||||||
|
|
||||||
// Returns the max number of bytes a single hardware constant buffer
|
|
||||||
// can support. Size is in bytes.
|
|
||||||
virtual size_t getMaxCBSize() const;
|
|
||||||
|
|
||||||
// Returns the max number of bytes allowed by the hardware scratch
|
|
||||||
// buffer. Size is in bytes.
|
|
||||||
virtual size_t getMaxScratchSize() const;
|
|
||||||
|
|
||||||
// Get the flag that corresponds to the device.
|
|
||||||
virtual uint32_t getDeviceFlag() const;
|
|
||||||
|
|
||||||
// Returns the number of work-items that exist in a single hardware
|
|
||||||
// wavefront.
|
|
||||||
virtual size_t getWavefrontSize() const = 0;
|
|
||||||
|
|
||||||
// Get the generational name of this specific device.
|
|
||||||
virtual uint32_t getGeneration() const = 0;
|
|
||||||
|
|
||||||
// Get the stack alignment of this specific device.
|
|
||||||
virtual uint32_t getStackAlignment() const;
|
|
||||||
|
|
||||||
// Get the resource ID for this specific device.
|
|
||||||
virtual uint32_t getResourceID(uint32_t DeviceID) const = 0;
|
|
||||||
|
|
||||||
// Get the max number of UAV's for this device.
|
|
||||||
virtual uint32_t getMaxNumUAVs() const = 0;
|
|
||||||
|
|
||||||
// API utilizing more detailed capabilities of each family of
|
|
||||||
// cards. If a capability is supported, then either usesHardware or
|
|
||||||
// usesSoftware returned true. If usesHardware returned true, then
|
|
||||||
// usesSoftware must return false for the same capability. Hardware
|
|
||||||
// execution means that the feature is done natively by the hardware
|
|
||||||
// and is not emulated by the softare. Software execution means
|
|
||||||
// that the feature could be done in the hardware, but there is
|
|
||||||
// software that emulates it with possibly using the hardware for
|
|
||||||
// support since the hardware does not fully comply with OpenCL
|
|
||||||
// specs.
|
|
||||||
bool isSupported(AMDGPUDeviceInfo::Caps Mode) const;
|
|
||||||
bool usesHardware(AMDGPUDeviceInfo::Caps Mode) const;
|
|
||||||
bool usesSoftware(AMDGPUDeviceInfo::Caps Mode) const;
|
|
||||||
virtual std::string getDataLayout() const;
|
|
||||||
static const unsigned int MAX_LDS_SIZE_700 = 16384;
|
|
||||||
static const unsigned int MAX_LDS_SIZE_800 = 32768;
|
|
||||||
static const unsigned int WavefrontSize = 64;
|
|
||||||
static const unsigned int HalfWavefrontSize = 32;
|
|
||||||
static const unsigned int QuarterWavefrontSize = 16;
|
|
||||||
protected:
|
|
||||||
virtual void setCaps();
|
|
||||||
llvm::BitVector mHWBits;
|
|
||||||
llvm::BitVector mSWBits;
|
|
||||||
AMDGPUSubtarget *mSTM;
|
|
||||||
uint32_t mDeviceFlag;
|
|
||||||
private:
|
|
||||||
AMDGPUDeviceInfo::ExecutionMode
|
|
||||||
getExecutionMode(AMDGPUDeviceInfo::Caps Caps) const;
|
|
||||||
}; // AMDILDevice
|
|
||||||
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // _AMDILDEVICEIMPL_H_
|
|
||||||
|
|
@ -1,94 +0,0 @@
|
||||||
//===-- AMDILDeviceInfo.cpp - AMDILDeviceInfo class -----------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Function that creates DeviceInfo from a device name and other information.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#include "AMDILDevices.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
namespace llvm {
|
|
||||||
namespace AMDGPUDeviceInfo {
|
|
||||||
AMDGPUDevice*
|
|
||||||
getDeviceFromName(const std::string &deviceName, AMDGPUSubtarget *ptr,
|
|
||||||
bool is64bit, bool is64on32bit)
|
|
||||||
{
|
|
||||||
if (deviceName.c_str()[2] == '7') {
|
|
||||||
switch (deviceName.c_str()[3]) {
|
|
||||||
case '1':
|
|
||||||
return new AMDGPU710Device(ptr);
|
|
||||||
case '7':
|
|
||||||
return new AMDGPU770Device(ptr);
|
|
||||||
default:
|
|
||||||
return new AMDGPU7XXDevice(ptr);
|
|
||||||
};
|
|
||||||
} else if (deviceName == "cypress") {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPUCypressDevice(ptr);
|
|
||||||
} else if (deviceName == "juniper") {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPUEvergreenDevice(ptr);
|
|
||||||
} else if (deviceName == "redwood") {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPURedwoodDevice(ptr);
|
|
||||||
} else if (deviceName == "cedar") {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPUCedarDevice(ptr);
|
|
||||||
} else if (deviceName == "barts"
|
|
||||||
|| deviceName == "turks") {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPUNIDevice(ptr);
|
|
||||||
} else if (deviceName == "cayman") {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPUCaymanDevice(ptr);
|
|
||||||
} else if (deviceName == "caicos") {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPUNIDevice(ptr);
|
|
||||||
} else if (deviceName == "SI") {
|
|
||||||
return new AMDGPUSIDevice(ptr);
|
|
||||||
} else {
|
|
||||||
#if DEBUG
|
|
||||||
assert(!is64bit && "This device does not support 64bit pointers!");
|
|
||||||
assert(!is64on32bit && "This device does not support 64bit"
|
|
||||||
" on 32bit pointers!");
|
|
||||||
#endif
|
|
||||||
return new AMDGPU7XXDevice(ptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} // End namespace AMDGPUDeviceInfo
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
@ -1,90 +0,0 @@
|
||||||
//===-- AMDILDeviceInfo.h - Constants for describing devices --------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDILDEVICEINFO_H_
|
|
||||||
#define _AMDILDEVICEINFO_H_
|
|
||||||
|
|
||||||
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
namespace llvm
|
|
||||||
{
|
|
||||||
class AMDGPUDevice;
|
|
||||||
class AMDGPUSubtarget;
|
|
||||||
namespace AMDGPUDeviceInfo
|
|
||||||
{
|
|
||||||
// Each Capabilities can be executed using a hardware instruction,
|
|
||||||
// emulated with a sequence of software instructions, or not
|
|
||||||
// supported at all.
|
|
||||||
enum ExecutionMode {
|
|
||||||
Unsupported = 0, // Unsupported feature on the card(Default value)
|
|
||||||
Software, // This is the execution mode that is set if the
|
|
||||||
// feature is emulated in software
|
|
||||||
Hardware // This execution mode is set if the feature exists
|
|
||||||
// natively in hardware
|
|
||||||
};
|
|
||||||
|
|
||||||
// Any changes to this needs to have a corresponding update to the
|
|
||||||
// twiki page GPUMetadataABI
|
|
||||||
enum Caps {
|
|
||||||
HalfOps = 0x1, // Half float is supported or not.
|
|
||||||
DoubleOps = 0x2, // Double is supported or not.
|
|
||||||
ByteOps = 0x3, // Byte(char) is support or not.
|
|
||||||
ShortOps = 0x4, // Short is supported or not.
|
|
||||||
LongOps = 0x5, // Long is supported or not.
|
|
||||||
Images = 0x6, // Images are supported or not.
|
|
||||||
ByteStores = 0x7, // ByteStores available(!HD4XXX).
|
|
||||||
ConstantMem = 0x8, // Constant/CB memory.
|
|
||||||
LocalMem = 0x9, // Local/LDS memory.
|
|
||||||
PrivateMem = 0xA, // Scratch/Private/Stack memory.
|
|
||||||
RegionMem = 0xB, // OCL GDS Memory Extension.
|
|
||||||
FMA = 0xC, // Use HW FMA or SW FMA.
|
|
||||||
ArenaSegment = 0xD, // Use for Arena UAV per pointer 12-1023.
|
|
||||||
MultiUAV = 0xE, // Use for UAV per Pointer 0-7.
|
|
||||||
Reserved0 = 0xF, // ReservedFlag
|
|
||||||
NoAlias = 0x10, // Cached loads.
|
|
||||||
Signed24BitOps = 0x11, // Peephole Optimization.
|
|
||||||
// Debug mode implies that no hardware features or optimizations
|
|
||||||
// are performned and that all memory access go through a single
|
|
||||||
// uav(Arena on HD5XXX/HD6XXX and Raw on HD4XXX).
|
|
||||||
Debug = 0x12, // Debug mode is enabled.
|
|
||||||
CachedMem = 0x13, // Cached mem is available or not.
|
|
||||||
BarrierDetect = 0x14, // Detect duplicate barriers.
|
|
||||||
Reserved1 = 0x15, // Reserved flag
|
|
||||||
ByteLDSOps = 0x16, // Flag to specify if byte LDS ops are available.
|
|
||||||
ArenaVectors = 0x17, // Flag to specify if vector loads from arena work.
|
|
||||||
TmrReg = 0x18, // Flag to specify if Tmr register is supported.
|
|
||||||
NoInline = 0x19, // Flag to specify that no inlining should occur.
|
|
||||||
MacroDB = 0x1A, // Flag to specify that backend handles macrodb.
|
|
||||||
HW64BitDivMod = 0x1B, // Flag for backend to generate 64bit div/mod.
|
|
||||||
ArenaUAV = 0x1C, // Flag to specify that arena uav is supported.
|
|
||||||
PrivateUAV = 0x1D, // Flag to specify that private memory uses uav's.
|
|
||||||
// If more capabilities are required, then
|
|
||||||
// this number needs to be increased.
|
|
||||||
// All capabilities must come before this
|
|
||||||
// number.
|
|
||||||
MaxNumberCapabilities = 0x20
|
|
||||||
};
|
|
||||||
// These have to be in order with the older generations
|
|
||||||
// having the lower number enumerations.
|
|
||||||
enum Generation {
|
|
||||||
HD4XXX = 0, // 7XX based devices.
|
|
||||||
HD5XXX, // Evergreen based devices.
|
|
||||||
HD6XXX, // NI/Evergreen+ based devices.
|
|
||||||
HD7XXX,
|
|
||||||
HDTEST, // Experimental feature testing device.
|
|
||||||
HDNUMGEN
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
AMDGPUDevice*
|
|
||||||
getDeviceFromName(const std::string &name, AMDGPUSubtarget *ptr,
|
|
||||||
bool is64bit = false, bool is64on32bit = false);
|
|
||||||
} // namespace AMDILDeviceInfo
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // _AMDILDEVICEINFO_H_
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
//===-- AMDILDevices.h - Consolidate AMDIL Device headers -----------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#ifndef __AMDIL_DEVICES_H_
|
|
||||||
#define __AMDIL_DEVICES_H_
|
|
||||||
// Include all of the device specific header files
|
|
||||||
// This file is for Internal use only!
|
|
||||||
#include "AMDIL7XXDevice.h"
|
|
||||||
#include "AMDILDevice.h"
|
|
||||||
#include "AMDILEvergreenDevice.h"
|
|
||||||
#include "AMDILNIDevice.h"
|
|
||||||
#include "AMDILSIDevice.h"
|
|
||||||
|
|
||||||
#endif // _AMDIL_DEVICES_H_
|
|
||||||
|
|
@ -1,169 +0,0 @@
|
||||||
//===-- AMDILEvergreenDevice.cpp - Device Info for Evergreen --------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#include "AMDILEvergreenDevice.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPUEvergreenDevice::AMDGPUEvergreenDevice(AMDGPUSubtarget *ST)
|
|
||||||
: AMDGPUDevice(ST) {
|
|
||||||
setCaps();
|
|
||||||
std::string name = ST->getDeviceName();
|
|
||||||
if (name == "cedar") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_CEDAR;
|
|
||||||
} else if (name == "redwood") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_REDWOOD;
|
|
||||||
} else if (name == "cypress") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_CYPRESS;
|
|
||||||
} else {
|
|
||||||
mDeviceFlag = OCL_DEVICE_JUNIPER;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUEvergreenDevice::~AMDGPUEvergreenDevice() {
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPUEvergreenDevice::getMaxLDSSize() const {
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
|
||||||
return MAX_LDS_SIZE_800;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
size_t AMDGPUEvergreenDevice::getMaxGDSSize() const {
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
|
||||||
return MAX_LDS_SIZE_800;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
uint32_t AMDGPUEvergreenDevice::getMaxNumUAVs() const {
|
|
||||||
return 12;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t AMDGPUEvergreenDevice::getResourceID(uint32_t id) const {
|
|
||||||
switch(id) {
|
|
||||||
default:
|
|
||||||
assert(0 && "ID type passed in is unknown!");
|
|
||||||
break;
|
|
||||||
case CONSTANT_ID:
|
|
||||||
case RAW_UAV_ID:
|
|
||||||
return GLOBAL_RETURN_RAW_UAV_ID;
|
|
||||||
case GLOBAL_ID:
|
|
||||||
case ARENA_UAV_ID:
|
|
||||||
return DEFAULT_ARENA_UAV_ID;
|
|
||||||
case LDS_ID:
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
|
||||||
return DEFAULT_LDS_ID;
|
|
||||||
} else {
|
|
||||||
return DEFAULT_ARENA_UAV_ID;
|
|
||||||
}
|
|
||||||
case GDS_ID:
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
|
|
||||||
return DEFAULT_GDS_ID;
|
|
||||||
} else {
|
|
||||||
return DEFAULT_ARENA_UAV_ID;
|
|
||||||
}
|
|
||||||
case SCRATCH_ID:
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
|
|
||||||
return DEFAULT_SCRATCH_ID;
|
|
||||||
} else {
|
|
||||||
return DEFAULT_ARENA_UAV_ID;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPUEvergreenDevice::getWavefrontSize() const {
|
|
||||||
return AMDGPUDevice::WavefrontSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t AMDGPUEvergreenDevice::getGeneration() const {
|
|
||||||
return AMDGPUDeviceInfo::HD5XXX;
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUEvergreenDevice::setCaps() {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::ArenaUAV);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::HW64BitDivMod);
|
|
||||||
mSWBits.reset(AMDGPUDeviceInfo::HW64BitDivMod);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::ByteStores)) {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::ByteStores);
|
|
||||||
}
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::Debug)) {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::LocalMem);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::RegionMem);
|
|
||||||
} else {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::LocalMem);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::RegionMem);
|
|
||||||
}
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::Images);
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::NoAlias)) {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::NoAlias);
|
|
||||||
}
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::CachedMem);
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::MultiUAV)) {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::MultiUAV);
|
|
||||||
}
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::ByteLDSOps);
|
|
||||||
mSWBits.reset(AMDGPUDeviceInfo::ByteLDSOps);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::ArenaVectors);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::LongOps);
|
|
||||||
mSWBits.reset(AMDGPUDeviceInfo::LongOps);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::TmrReg);
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUCypressDevice::AMDGPUCypressDevice(AMDGPUSubtarget *ST)
|
|
||||||
: AMDGPUEvergreenDevice(ST) {
|
|
||||||
setCaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUCypressDevice::~AMDGPUCypressDevice() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUCypressDevice::setCaps() {
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::FMA);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
AMDGPUCedarDevice::AMDGPUCedarDevice(AMDGPUSubtarget *ST)
|
|
||||||
: AMDGPUEvergreenDevice(ST) {
|
|
||||||
setCaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUCedarDevice::~AMDGPUCedarDevice() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUCedarDevice::setCaps() {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPUCedarDevice::getWavefrontSize() const {
|
|
||||||
return AMDGPUDevice::QuarterWavefrontSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPURedwoodDevice::AMDGPURedwoodDevice(AMDGPUSubtarget *ST)
|
|
||||||
: AMDGPUEvergreenDevice(ST) {
|
|
||||||
setCaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPURedwoodDevice::~AMDGPURedwoodDevice()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPURedwoodDevice::setCaps() {
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::FMA);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t AMDGPURedwoodDevice::getWavefrontSize() const {
|
|
||||||
return AMDGPUDevice::HalfWavefrontSize;
|
|
||||||
}
|
|
||||||
|
|
@ -1,87 +0,0 @@
|
||||||
//==- AMDILEvergreenDevice.h - Define Evergreen Device for AMDIL -*- C++ -*--=//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface for the subtarget data classes.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// This file will define the interface that each generation needs to
|
|
||||||
// implement in order to correctly answer queries on the capabilities of the
|
|
||||||
// specific hardware.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDILEVERGREENDEVICE_H_
|
|
||||||
#define _AMDILEVERGREENDEVICE_H_
|
|
||||||
#include "AMDILDevice.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class AMDGPUSubtarget;
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Evergreen generation of devices and their respective sub classes
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
// The AMDGPUEvergreenDevice is the base device class for all of the Evergreen
|
|
||||||
// series of cards. This class contains information required to differentiate
|
|
||||||
// the Evergreen device from the generic AMDGPUDevice. This device represents
|
|
||||||
// that capabilities of the 'Juniper' cards, also known as the HD57XX.
|
|
||||||
class AMDGPUEvergreenDevice : public AMDGPUDevice {
|
|
||||||
public:
|
|
||||||
AMDGPUEvergreenDevice(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPUEvergreenDevice();
|
|
||||||
virtual size_t getMaxLDSSize() const;
|
|
||||||
virtual size_t getMaxGDSSize() const;
|
|
||||||
virtual size_t getWavefrontSize() const;
|
|
||||||
virtual uint32_t getGeneration() const;
|
|
||||||
virtual uint32_t getMaxNumUAVs() const;
|
|
||||||
virtual uint32_t getResourceID(uint32_t) const;
|
|
||||||
protected:
|
|
||||||
virtual void setCaps();
|
|
||||||
}; // AMDGPUEvergreenDevice
|
|
||||||
|
|
||||||
// The AMDGPUCypressDevice is similiar to the AMDGPUEvergreenDevice, except it has
|
|
||||||
// support for double precision operations. This device is used to represent
|
|
||||||
// both the Cypress and Hemlock cards, which are commercially known as HD58XX
|
|
||||||
// and HD59XX cards.
|
|
||||||
class AMDGPUCypressDevice : public AMDGPUEvergreenDevice {
|
|
||||||
public:
|
|
||||||
AMDGPUCypressDevice(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPUCypressDevice();
|
|
||||||
private:
|
|
||||||
virtual void setCaps();
|
|
||||||
}; // AMDGPUCypressDevice
|
|
||||||
|
|
||||||
|
|
||||||
// The AMDGPUCedarDevice is the class that represents all of the 'Cedar' based
|
|
||||||
// devices. This class differs from the base AMDGPUEvergreenDevice in that the
|
|
||||||
// device is a ~quarter of the 'Juniper'. These are commercially known as the
|
|
||||||
// HD54XX and HD53XX series of cards.
|
|
||||||
class AMDGPUCedarDevice : public AMDGPUEvergreenDevice {
|
|
||||||
public:
|
|
||||||
AMDGPUCedarDevice(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPUCedarDevice();
|
|
||||||
virtual size_t getWavefrontSize() const;
|
|
||||||
private:
|
|
||||||
virtual void setCaps();
|
|
||||||
}; // AMDGPUCedarDevice
|
|
||||||
|
|
||||||
// The AMDGPURedwoodDevice is the class the represents all of the 'Redwood' based
|
|
||||||
// devices. This class differs from the base class, in that these devices are
|
|
||||||
// considered about half of a 'Juniper' device. These are commercially known as
|
|
||||||
// the HD55XX and HD56XX series of cards.
|
|
||||||
class AMDGPURedwoodDevice : public AMDGPUEvergreenDevice {
|
|
||||||
public:
|
|
||||||
AMDGPURedwoodDevice(AMDGPUSubtarget *ST);
|
|
||||||
virtual ~AMDGPURedwoodDevice();
|
|
||||||
virtual size_t getWavefrontSize() const;
|
|
||||||
private:
|
|
||||||
virtual void setCaps();
|
|
||||||
}; // AMDGPURedwoodDevice
|
|
||||||
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // _AMDGPUEVERGREENDEVICE_H_
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
//===----------------------- AMDILFrameLowering.cpp -----------------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#include "AMDILFrameLowering.h"
|
|
||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
|
|
||||||
int LAO, unsigned TransAl)
|
|
||||||
: TargetFrameLowering(D, StackAl, LAO, TransAl)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUFrameLowering::~AMDGPUFrameLowering()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
/// getFrameIndexOffset - Returns the displacement from the frame register to
|
|
||||||
/// the stack frame of the specified index.
|
|
||||||
int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
|
|
||||||
int FI) const {
|
|
||||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
|
||||||
return MFI->getObjectOffset(FI);
|
|
||||||
}
|
|
||||||
|
|
||||||
const TargetFrameLowering::SpillSlot *
|
|
||||||
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const
|
|
||||||
{
|
|
||||||
NumEntries = 0;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
void
|
|
||||||
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const
|
|
||||||
{
|
|
||||||
}
|
|
||||||
void
|
|
||||||
AMDGPUFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
|
|
||||||
{
|
|
||||||
}
|
|
||||||
bool
|
|
||||||
AMDGPUFrameLowering::hasFP(const MachineFunction &MF) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
@ -1,46 +0,0 @@
|
||||||
//===--------------------- AMDILFrameLowering.h -----------------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface to describe a layout of a stack frame on a AMDIL target machine
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDILFRAME_LOWERING_H_
|
|
||||||
#define _AMDILFRAME_LOWERING_H_
|
|
||||||
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/Target/TargetFrameLowering.h"
|
|
||||||
|
|
||||||
/// Information about the stack frame layout on the AMDGPU targets. It holds
|
|
||||||
/// the direction of the stack growth, the known stack alignment on entry to
|
|
||||||
/// each function, and the offset to the locals area.
|
|
||||||
/// See TargetFrameInfo for more comments.
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class AMDGPUFrameLowering : public TargetFrameLowering {
|
|
||||||
public:
|
|
||||||
AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO, unsigned
|
|
||||||
TransAl = 1);
|
|
||||||
virtual ~AMDGPUFrameLowering();
|
|
||||||
virtual int getFrameIndexOffset(const MachineFunction &MF,
|
|
||||||
int FI) const;
|
|
||||||
virtual const SpillSlot *
|
|
||||||
getCalleeSavedSpillSlots(unsigned &NumEntries) const;
|
|
||||||
virtual void emitPrologue(MachineFunction &MF) const;
|
|
||||||
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
|
||||||
virtual bool hasFP(const MachineFunction &MF) const;
|
|
||||||
}; // class AMDGPUFrameLowering
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // _AMDILFRAME_LOWERING_H_
|
|
||||||
|
|
@ -1,395 +0,0 @@
|
||||||
//===-- AMDILISelDAGToDAG.cpp - A dag to dag inst selector for AMDIL ------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file defines an instruction selector for the AMDIL target.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#include "AMDGPUInstrInfo.h"
|
|
||||||
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
|
||||||
#include "AMDGPURegisterInfo.h"
|
|
||||||
#include "AMDILDevices.h"
|
|
||||||
#include "AMDILUtilityFunctions.h"
|
|
||||||
#include "llvm/ADT/ValueMap.h"
|
|
||||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
||||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
|
||||||
#include "llvm/Support/Compiler.h"
|
|
||||||
#include <list>
|
|
||||||
#include <queue>
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Instruction Selector Implementation
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// AMDGPUDAGToDAGISel - AMDGPU specific code to select AMDGPU machine instructions
|
|
||||||
// //for SelectionDAG operations.
|
|
||||||
//
|
|
||||||
namespace {
|
|
||||||
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
|
|
||||||
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
|
|
||||||
// make the right decision when generating code for different targets.
|
|
||||||
const AMDGPUSubtarget &Subtarget;
|
|
||||||
public:
|
|
||||||
AMDGPUDAGToDAGISel(TargetMachine &TM);
|
|
||||||
virtual ~AMDGPUDAGToDAGISel();
|
|
||||||
|
|
||||||
SDNode *Select(SDNode *N);
|
|
||||||
virtual const char *getPassName() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
inline SDValue getSmallIPtrImm(unsigned Imm);
|
|
||||||
|
|
||||||
// Complex pattern selectors
|
|
||||||
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
|
|
||||||
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
|
|
||||||
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
|
|
||||||
|
|
||||||
static bool checkType(const Value *ptr, unsigned int addrspace);
|
|
||||||
static const Value *getBasePointerValue(const Value *V);
|
|
||||||
|
|
||||||
static bool isGlobalStore(const StoreSDNode *N);
|
|
||||||
static bool isPrivateStore(const StoreSDNode *N);
|
|
||||||
static bool isLocalStore(const StoreSDNode *N);
|
|
||||||
static bool isRegionStore(const StoreSDNode *N);
|
|
||||||
|
|
||||||
static bool isCPLoad(const LoadSDNode *N);
|
|
||||||
static bool isConstantLoad(const LoadSDNode *N, int cbID);
|
|
||||||
static bool isGlobalLoad(const LoadSDNode *N);
|
|
||||||
static bool isPrivateLoad(const LoadSDNode *N);
|
|
||||||
static bool isLocalLoad(const LoadSDNode *N);
|
|
||||||
static bool isRegionLoad(const LoadSDNode *N);
|
|
||||||
|
|
||||||
bool SelectADDR8BitOffset(SDValue Addr, SDValue& Base, SDValue& Offset);
|
|
||||||
bool SelectADDRReg(SDValue Addr, SDValue& Base, SDValue& Offset);
|
|
||||||
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
|
||||||
|
|
||||||
// Include the pieces autogenerated from the target description.
|
|
||||||
#include "AMDGPUGenDAGISel.inc"
|
|
||||||
};
|
|
||||||
} // end anonymous namespace
|
|
||||||
|
|
||||||
// createAMDGPUISelDag - This pass converts a legalized DAG into a AMDGPU-specific
|
|
||||||
// DAG, ready for instruction scheduling.
|
|
||||||
//
|
|
||||||
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
|
|
||||||
) {
|
|
||||||
return new AMDGPUDAGToDAGISel(TM);
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM
|
|
||||||
)
|
|
||||||
: SelectionDAGISel(TM), Subtarget(TM.getSubtarget<AMDGPUSubtarget>())
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
|
|
||||||
return CurDAG->getTargetConstant(Imm, MVT::i32);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectADDRParam(
|
|
||||||
SDValue Addr, SDValue& R1, SDValue& R2) {
|
|
||||||
|
|
||||||
if (Addr.getOpcode() == ISD::FrameIndex) {
|
|
||||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
|
||||||
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
|
|
||||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
|
||||||
} else {
|
|
||||||
R1 = Addr;
|
|
||||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
|
||||||
}
|
|
||||||
} else if (Addr.getOpcode() == ISD::ADD) {
|
|
||||||
R1 = Addr.getOperand(0);
|
|
||||||
R2 = Addr.getOperand(1);
|
|
||||||
} else {
|
|
||||||
R1 = Addr;
|
|
||||||
R2 = CurDAG->getTargetConstant(0, MVT::i32);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectADDR(SDValue Addr, SDValue& R1, SDValue& R2) {
|
|
||||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
|
||||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return SelectADDRParam(Addr, R1, R2);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
|
|
||||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
|
||||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Addr.getOpcode() == ISD::FrameIndex) {
|
|
||||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
|
|
||||||
R1 = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i64);
|
|
||||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
|
||||||
} else {
|
|
||||||
R1 = Addr;
|
|
||||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
|
||||||
}
|
|
||||||
} else if (Addr.getOpcode() == ISD::ADD) {
|
|
||||||
R1 = Addr.getOperand(0);
|
|
||||||
R2 = Addr.getOperand(1);
|
|
||||||
} else {
|
|
||||||
R1 = Addr;
|
|
||||||
R2 = CurDAG->getTargetConstant(0, MVT::i64);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
|
||||||
unsigned int Opc = N->getOpcode();
|
|
||||||
if (N->isMachineOpcode()) {
|
|
||||||
return NULL; // Already selected.
|
|
||||||
}
|
|
||||||
switch (Opc) {
|
|
||||||
default: break;
|
|
||||||
case ISD::FrameIndex:
|
|
||||||
{
|
|
||||||
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
|
|
||||||
unsigned int FI = FIN->getIndex();
|
|
||||||
EVT OpVT = N->getValueType(0);
|
|
||||||
unsigned int NewOpc = AMDGPU::COPY;
|
|
||||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
|
|
||||||
return CurDAG->SelectNodeTo(N, NewOpc, OpVT, TFI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return SelectCode(N);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
|
|
||||||
if (!ptr) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
Type *ptrType = ptr->getType();
|
|
||||||
return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Value * AMDGPUDAGToDAGISel::getBasePointerValue(const Value *V)
|
|
||||||
{
|
|
||||||
if (!V) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
const Value *ret = NULL;
|
|
||||||
ValueMap<const Value *, bool> ValueBitMap;
|
|
||||||
std::queue<const Value *, std::list<const Value *> > ValueQueue;
|
|
||||||
ValueQueue.push(V);
|
|
||||||
while (!ValueQueue.empty()) {
|
|
||||||
V = ValueQueue.front();
|
|
||||||
if (ValueBitMap.find(V) == ValueBitMap.end()) {
|
|
||||||
ValueBitMap[V] = true;
|
|
||||||
if (dyn_cast<Argument>(V) && dyn_cast<PointerType>(V->getType())) {
|
|
||||||
ret = V;
|
|
||||||
break;
|
|
||||||
} else if (dyn_cast<GlobalVariable>(V)) {
|
|
||||||
ret = V;
|
|
||||||
break;
|
|
||||||
} else if (dyn_cast<Constant>(V)) {
|
|
||||||
const ConstantExpr *CE = dyn_cast<ConstantExpr>(V);
|
|
||||||
if (CE) {
|
|
||||||
ValueQueue.push(CE->getOperand(0));
|
|
||||||
}
|
|
||||||
} else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
|
|
||||||
ret = AI;
|
|
||||||
break;
|
|
||||||
} else if (const Instruction *I = dyn_cast<Instruction>(V)) {
|
|
||||||
uint32_t numOps = I->getNumOperands();
|
|
||||||
for (uint32_t x = 0; x < numOps; ++x) {
|
|
||||||
ValueQueue.push(I->getOperand(x));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// assert(0 && "Found a Value that we didn't know how to handle!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ValueQueue.pop();
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
|
|
||||||
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
|
|
||||||
return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
|
|
||||||
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
|
|
||||||
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
|
|
||||||
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
|
|
||||||
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) {
|
|
||||||
if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
MachineMemOperand *MMO = N->getMemOperand();
|
|
||||||
const Value *V = MMO->getValue();
|
|
||||||
const Value *BV = getBasePointerValue(V);
|
|
||||||
if (MMO
|
|
||||||
&& MMO->getValue()
|
|
||||||
&& ((V && dyn_cast<GlobalValue>(V))
|
|
||||||
|| (BV && dyn_cast<GlobalValue>(
|
|
||||||
getBasePointerValue(MMO->getValue()))))) {
|
|
||||||
return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) {
|
|
||||||
return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) {
|
|
||||||
return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) {
|
|
||||||
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) {
|
|
||||||
MachineMemOperand *MMO = N->getMemOperand();
|
|
||||||
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
|
|
||||||
if (MMO) {
|
|
||||||
const Value *V = MMO->getValue();
|
|
||||||
const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
|
|
||||||
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) {
|
|
||||||
if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
|
|
||||||
// Check to make sure we are not a constant pool load or a constant load
|
|
||||||
// that is marked as a private load
|
|
||||||
if (isCPLoad(N) || isConstantLoad(N, -1)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
|
|
||||||
&& !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
|
|
||||||
&& !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
|
|
||||||
&& !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
|
|
||||||
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
|
|
||||||
&& !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS))
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
const char *AMDGPUDAGToDAGISel::getPassName() const {
|
|
||||||
return "AMDGPU DAG->DAG Pattern Instruction Selection";
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef DEBUGTMP
|
|
||||||
#undef INT64_C
|
|
||||||
#endif
|
|
||||||
#undef DEBUGTMP
|
|
||||||
|
|
||||||
///==== AMDGPU Functions ====///
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectADDR8BitOffset(SDValue Addr, SDValue& Base,
|
|
||||||
SDValue& Offset) {
|
|
||||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
|
||||||
Addr.getOpcode() == ISD::TargetGlobalAddress) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (Addr.getOpcode() == ISD::ADD) {
|
|
||||||
bool Match = false;
|
|
||||||
|
|
||||||
// Find the base ptr and the offset
|
|
||||||
for (unsigned i = 0; i < Addr.getNumOperands(); i++) {
|
|
||||||
SDValue Arg = Addr.getOperand(i);
|
|
||||||
ConstantSDNode * OffsetNode = dyn_cast<ConstantSDNode>(Arg);
|
|
||||||
// This arg isn't a constant so it must be the base PTR.
|
|
||||||
if (!OffsetNode) {
|
|
||||||
Base = Addr.getOperand(i);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Check if the constant argument fits in 8-bits. The offset is in bytes
|
|
||||||
// so we need to convert it to dwords.
|
|
||||||
if (isUInt<8>(OffsetNode->getZExtValue() >> 2)) {
|
|
||||||
Match = true;
|
|
||||||
Offset = CurDAG->getTargetConstant(OffsetNode->getZExtValue() >> 2,
|
|
||||||
MVT::i32);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Match;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default case, no offset
|
|
||||||
Base = Addr;
|
|
||||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
|
||||||
SDValue &Offset)
|
|
||||||
{
|
|
||||||
ConstantSDNode * IMMOffset;
|
|
||||||
|
|
||||||
if (Addr.getOpcode() == ISD::ADD
|
|
||||||
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
|
|
||||||
&& isInt<16>(IMMOffset->getZExtValue())) {
|
|
||||||
|
|
||||||
Base = Addr.getOperand(0);
|
|
||||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
|
||||||
return true;
|
|
||||||
// If the pointer address is constant, we can move it to the offset field.
|
|
||||||
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
|
|
||||||
&& isInt<16>(IMMOffset->getZExtValue())) {
|
|
||||||
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
|
|
||||||
CurDAG->getEntryNode().getDebugLoc(),
|
|
||||||
AMDGPU::ZERO, MVT::i32);
|
|
||||||
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), MVT::i32);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Default case, no offset
|
|
||||||
Base = Addr;
|
|
||||||
Offset = CurDAG->getTargetConstant(0, MVT::i32);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AMDGPUDAGToDAGISel::SelectADDRReg(SDValue Addr, SDValue& Base,
|
|
||||||
SDValue& Offset) {
|
|
||||||
if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
|
|
||||||
Addr.getOpcode() == ISD::TargetGlobalAddress ||
|
|
||||||
Addr.getOpcode() != ISD::ADD) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
Base = Addr.getOperand(0);
|
|
||||||
Offset = Addr.getOperand(1);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
@ -1,677 +0,0 @@
|
||||||
//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains TargetLowering functions borrowed from AMDLI.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPUISelLowering.h"
|
|
||||||
#include "AMDGPURegisterInfo.h"
|
|
||||||
#include "AMDILDevices.h"
|
|
||||||
#include "AMDILIntrinsicInfo.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
#include "AMDILUtilityFunctions.h"
|
|
||||||
#include "llvm/CallingConv.h"
|
|
||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
|
||||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
|
||||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
|
||||||
#include "llvm/DerivedTypes.h"
|
|
||||||
#include "llvm/Instructions.h"
|
|
||||||
#include "llvm/Intrinsics.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
#include "llvm/Target/TargetInstrInfo.h"
|
|
||||||
#include "llvm/Target/TargetOptions.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Calling Convention Implementation
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#include "AMDGPUGenCallingConv.inc"
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// TargetLowering Implementation Help Functions End
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// TargetLowering Class Implementation Begins
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
void AMDGPUTargetLowering::InitAMDILLowering()
|
|
||||||
{
|
|
||||||
int types[] =
|
|
||||||
{
|
|
||||||
(int)MVT::i8,
|
|
||||||
(int)MVT::i16,
|
|
||||||
(int)MVT::i32,
|
|
||||||
(int)MVT::f32,
|
|
||||||
(int)MVT::f64,
|
|
||||||
(int)MVT::i64,
|
|
||||||
(int)MVT::v2i8,
|
|
||||||
(int)MVT::v4i8,
|
|
||||||
(int)MVT::v2i16,
|
|
||||||
(int)MVT::v4i16,
|
|
||||||
(int)MVT::v4f32,
|
|
||||||
(int)MVT::v4i32,
|
|
||||||
(int)MVT::v2f32,
|
|
||||||
(int)MVT::v2i32,
|
|
||||||
(int)MVT::v2f64,
|
|
||||||
(int)MVT::v2i64
|
|
||||||
};
|
|
||||||
|
|
||||||
int IntTypes[] =
|
|
||||||
{
|
|
||||||
(int)MVT::i8,
|
|
||||||
(int)MVT::i16,
|
|
||||||
(int)MVT::i32,
|
|
||||||
(int)MVT::i64
|
|
||||||
};
|
|
||||||
|
|
||||||
int FloatTypes[] =
|
|
||||||
{
|
|
||||||
(int)MVT::f32,
|
|
||||||
(int)MVT::f64
|
|
||||||
};
|
|
||||||
|
|
||||||
int VectorTypes[] =
|
|
||||||
{
|
|
||||||
(int)MVT::v2i8,
|
|
||||||
(int)MVT::v4i8,
|
|
||||||
(int)MVT::v2i16,
|
|
||||||
(int)MVT::v4i16,
|
|
||||||
(int)MVT::v4f32,
|
|
||||||
(int)MVT::v4i32,
|
|
||||||
(int)MVT::v2f32,
|
|
||||||
(int)MVT::v2i32,
|
|
||||||
(int)MVT::v2f64,
|
|
||||||
(int)MVT::v2i64
|
|
||||||
};
|
|
||||||
size_t numTypes = sizeof(types) / sizeof(*types);
|
|
||||||
size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
|
|
||||||
size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
|
|
||||||
size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
|
|
||||||
|
|
||||||
const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
|
|
||||||
// These are the current register classes that are
|
|
||||||
// supported
|
|
||||||
|
|
||||||
for (unsigned int x = 0; x < numTypes; ++x) {
|
|
||||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
|
|
||||||
|
|
||||||
//FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
|
|
||||||
// We cannot sextinreg, expand to shifts
|
|
||||||
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
|
|
||||||
setOperationAction(ISD::SUBE, VT, Expand);
|
|
||||||
setOperationAction(ISD::SUBC, VT, Expand);
|
|
||||||
setOperationAction(ISD::ADDE, VT, Expand);
|
|
||||||
setOperationAction(ISD::ADDC, VT, Expand);
|
|
||||||
setOperationAction(ISD::BRCOND, VT, Custom);
|
|
||||||
setOperationAction(ISD::BR_JT, VT, Expand);
|
|
||||||
setOperationAction(ISD::BRIND, VT, Expand);
|
|
||||||
// TODO: Implement custom UREM/SREM routines
|
|
||||||
setOperationAction(ISD::SREM, VT, Expand);
|
|
||||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
||||||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
|
||||||
if (VT != MVT::i64 && VT != MVT::v2i64) {
|
|
||||||
setOperationAction(ISD::SDIV, VT, Custom);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (unsigned int x = 0; x < numFloatTypes; ++x) {
|
|
||||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
|
|
||||||
|
|
||||||
// IL does not have these operations for floating point types
|
|
||||||
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETOLT, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETOGE, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETOGT, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETOLE, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETULT, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETUGE, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETUGT, VT, Expand);
|
|
||||||
setOperationAction(ISD::SETULE, VT, Expand);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned int x = 0; x < numIntTypes; ++x) {
|
|
||||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
|
|
||||||
|
|
||||||
// GPU also does not have divrem function for signed or unsigned
|
|
||||||
setOperationAction(ISD::SDIVREM, VT, Expand);
|
|
||||||
|
|
||||||
// GPU does not have [S|U]MUL_LOHI functions as a single instruction
|
|
||||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
||||||
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
|
|
||||||
|
|
||||||
// GPU doesn't have a rotl, rotr, or byteswap instruction
|
|
||||||
setOperationAction(ISD::ROTR, VT, Expand);
|
|
||||||
setOperationAction(ISD::BSWAP, VT, Expand);
|
|
||||||
|
|
||||||
// GPU doesn't have any counting operators
|
|
||||||
setOperationAction(ISD::CTPOP, VT, Expand);
|
|
||||||
setOperationAction(ISD::CTTZ, VT, Expand);
|
|
||||||
setOperationAction(ISD::CTLZ, VT, Expand);
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
|
|
||||||
{
|
|
||||||
MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
|
|
||||||
|
|
||||||
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
|
|
||||||
setOperationAction(ISD::SDIVREM, VT, Expand);
|
|
||||||
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
|
|
||||||
// setOperationAction(ISD::VSETCC, VT, Expand);
|
|
||||||
setOperationAction(ISD::SELECT_CC, VT, Expand);
|
|
||||||
|
|
||||||
}
|
|
||||||
if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) {
|
|
||||||
setOperationAction(ISD::MULHU, MVT::i64, Expand);
|
|
||||||
setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::MULHS, MVT::i64, Expand);
|
|
||||||
setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::ADD, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::SREM, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::Constant , MVT::i64 , Legal);
|
|
||||||
setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
|
|
||||||
setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
|
|
||||||
}
|
|
||||||
if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) {
|
|
||||||
// we support loading/storing v2f64 but not operations on the type
|
|
||||||
setOperationAction(ISD::FADD, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
|
|
||||||
// We want to expand vector conversions into their scalar
|
|
||||||
// counterparts.
|
|
||||||
setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
|
|
||||||
setOperationAction(ISD::FABS, MVT::f64, Expand);
|
|
||||||
setOperationAction(ISD::FABS, MVT::v2f64, Expand);
|
|
||||||
}
|
|
||||||
// TODO: Fix the UDIV24 algorithm so it works for these
|
|
||||||
// types correctly. This needs vector comparisons
|
|
||||||
// for this to work correctly.
|
|
||||||
setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
|
|
||||||
setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
|
|
||||||
setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
|
|
||||||
setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
|
|
||||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
|
|
||||||
setOperationAction(ISD::SUBC, MVT::Other, Expand);
|
|
||||||
setOperationAction(ISD::ADDE, MVT::Other, Expand);
|
|
||||||
setOperationAction(ISD::ADDC, MVT::Other, Expand);
|
|
||||||
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
|
||||||
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
|
|
||||||
setOperationAction(ISD::BRIND, MVT::Other, Expand);
|
|
||||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
|
|
||||||
|
|
||||||
|
|
||||||
// Use the default implementation.
|
|
||||||
setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
|
|
||||||
setOperationAction(ISD::Constant , MVT::i32 , Legal);
|
|
||||||
|
|
||||||
setSchedulingPreference(Sched::RegPressure);
|
|
||||||
setPow2DivIsCheap(false);
|
|
||||||
setPrefLoopAlignment(16);
|
|
||||||
setSelectIsExpensive(true);
|
|
||||||
setJumpIsExpensive(true);
|
|
||||||
|
|
||||||
maxStoresPerMemcpy = 4096;
|
|
||||||
maxStoresPerMemmove = 4096;
|
|
||||||
maxStoresPerMemset = 4096;
|
|
||||||
|
|
||||||
#undef numTypes
|
|
||||||
#undef numIntTypes
|
|
||||||
#undef numVectorTypes
|
|
||||||
#undef numFloatTypes
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
|
|
||||||
const CallInst &I, unsigned Intrinsic) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// The backend supports 32 and 64 bit floating point immediates
|
|
||||||
bool
|
|
||||||
AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
|
|
||||||
{
|
|
||||||
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
|
||||||
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const
|
|
||||||
{
|
|
||||||
if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
|
|
||||||
|| VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
|
|
||||||
// be zero. Op is expected to be a target specific node. Used by DAG
|
|
||||||
// combiner.
|
|
||||||
|
|
||||||
void
|
|
||||||
AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
|
|
||||||
const SDValue Op,
|
|
||||||
APInt &KnownZero,
|
|
||||||
APInt &KnownOne,
|
|
||||||
const SelectionDAG &DAG,
|
|
||||||
unsigned Depth) const
|
|
||||||
{
|
|
||||||
APInt KnownZero2;
|
|
||||||
APInt KnownOne2;
|
|
||||||
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
|
|
||||||
switch (Op.getOpcode()) {
|
|
||||||
default: break;
|
|
||||||
case ISD::SELECT_CC:
|
|
||||||
DAG.ComputeMaskedBits(
|
|
||||||
Op.getOperand(1),
|
|
||||||
KnownZero,
|
|
||||||
KnownOne,
|
|
||||||
Depth + 1
|
|
||||||
);
|
|
||||||
DAG.ComputeMaskedBits(
|
|
||||||
Op.getOperand(0),
|
|
||||||
KnownZero2,
|
|
||||||
KnownOne2
|
|
||||||
);
|
|
||||||
assert((KnownZero & KnownOne) == 0
|
|
||||||
&& "Bits known to be one AND zero?");
|
|
||||||
assert((KnownZero2 & KnownOne2) == 0
|
|
||||||
&& "Bits known to be one AND zero?");
|
|
||||||
// Only known if known in both the LHS and RHS
|
|
||||||
KnownOne &= KnownOne2;
|
|
||||||
KnownZero &= KnownZero2;
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Other Lowering Hooks
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
EVT OVT = Op.getValueType();
|
|
||||||
SDValue DST;
|
|
||||||
if (OVT.getScalarType() == MVT::i64) {
|
|
||||||
DST = LowerSDIV64(Op, DAG);
|
|
||||||
} else if (OVT.getScalarType() == MVT::i32) {
|
|
||||||
DST = LowerSDIV32(Op, DAG);
|
|
||||||
} else if (OVT.getScalarType() == MVT::i16
|
|
||||||
|| OVT.getScalarType() == MVT::i8) {
|
|
||||||
DST = LowerSDIV24(Op, DAG);
|
|
||||||
} else {
|
|
||||||
DST = SDValue(Op.getNode(), 0);
|
|
||||||
}
|
|
||||||
return DST;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
EVT OVT = Op.getValueType();
|
|
||||||
SDValue DST;
|
|
||||||
if (OVT.getScalarType() == MVT::i64) {
|
|
||||||
DST = LowerSREM64(Op, DAG);
|
|
||||||
} else if (OVT.getScalarType() == MVT::i32) {
|
|
||||||
DST = LowerSREM32(Op, DAG);
|
|
||||||
} else if (OVT.getScalarType() == MVT::i16) {
|
|
||||||
DST = LowerSREM16(Op, DAG);
|
|
||||||
} else if (OVT.getScalarType() == MVT::i8) {
|
|
||||||
DST = LowerSREM8(Op, DAG);
|
|
||||||
} else {
|
|
||||||
DST = SDValue(Op.getNode(), 0);
|
|
||||||
}
|
|
||||||
return DST;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
SDValue Data = Op.getOperand(0);
|
|
||||||
VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT DVT = Data.getValueType();
|
|
||||||
EVT BVT = BaseType->getVT();
|
|
||||||
unsigned baseBits = BVT.getScalarType().getSizeInBits();
|
|
||||||
unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
|
|
||||||
unsigned shiftBits = srcBits - baseBits;
|
|
||||||
if (srcBits < 32) {
|
|
||||||
// If the op is less than 32 bits, then it needs to extend to 32bits
|
|
||||||
// so it can properly keep the upper bits valid.
|
|
||||||
EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
|
|
||||||
Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
|
|
||||||
shiftBits = 32 - baseBits;
|
|
||||||
DVT = IVT;
|
|
||||||
}
|
|
||||||
SDValue Shift = DAG.getConstant(shiftBits, DVT);
|
|
||||||
// Shift left by 'Shift' bits.
|
|
||||||
Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
|
|
||||||
// Signed shift Right by 'Shift' bits.
|
|
||||||
Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
|
|
||||||
if (srcBits < 32) {
|
|
||||||
// Once the sign extension is done, the op needs to be converted to
|
|
||||||
// its original type.
|
|
||||||
Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
|
|
||||||
}
|
|
||||||
return Data;
|
|
||||||
}
|
|
||||||
EVT
|
|
||||||
AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
|
|
||||||
{
|
|
||||||
int iSize = (size * numEle);
|
|
||||||
int vEle = (iSize >> ((size == 64) ? 6 : 5));
|
|
||||||
if (!vEle) {
|
|
||||||
vEle = 1;
|
|
||||||
}
|
|
||||||
if (size == 64) {
|
|
||||||
if (vEle == 1) {
|
|
||||||
return EVT(MVT::i64);
|
|
||||||
} else {
|
|
||||||
return EVT(MVT::getVectorVT(MVT::i64, vEle));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (vEle == 1) {
|
|
||||||
return EVT(MVT::i32);
|
|
||||||
} else {
|
|
||||||
return EVT(MVT::getVectorVT(MVT::i32, vEle));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
SDValue Chain = Op.getOperand(0);
|
|
||||||
SDValue Cond = Op.getOperand(1);
|
|
||||||
SDValue Jump = Op.getOperand(2);
|
|
||||||
SDValue Result;
|
|
||||||
Result = DAG.getNode(
|
|
||||||
AMDGPUISD::BRANCH_COND,
|
|
||||||
Op.getDebugLoc(),
|
|
||||||
Op.getValueType(),
|
|
||||||
Chain, Jump, Cond);
|
|
||||||
return Result;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT OVT = Op.getValueType();
|
|
||||||
SDValue LHS = Op.getOperand(0);
|
|
||||||
SDValue RHS = Op.getOperand(1);
|
|
||||||
MVT INTTY;
|
|
||||||
MVT FLTTY;
|
|
||||||
if (!OVT.isVector()) {
|
|
||||||
INTTY = MVT::i32;
|
|
||||||
FLTTY = MVT::f32;
|
|
||||||
} else if (OVT.getVectorNumElements() == 2) {
|
|
||||||
INTTY = MVT::v2i32;
|
|
||||||
FLTTY = MVT::v2f32;
|
|
||||||
} else if (OVT.getVectorNumElements() == 4) {
|
|
||||||
INTTY = MVT::v4i32;
|
|
||||||
FLTTY = MVT::v4f32;
|
|
||||||
}
|
|
||||||
unsigned bitsize = OVT.getScalarType().getSizeInBits();
|
|
||||||
// char|short jq = ia ^ ib;
|
|
||||||
SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
|
|
||||||
|
|
||||||
// jq = jq >> (bitsize - 2)
|
|
||||||
jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
|
|
||||||
|
|
||||||
// jq = jq | 0x1
|
|
||||||
jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
|
|
||||||
|
|
||||||
// jq = (int)jq
|
|
||||||
jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
|
|
||||||
|
|
||||||
// int ia = (int)LHS;
|
|
||||||
SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
|
|
||||||
|
|
||||||
// int ib, (int)RHS;
|
|
||||||
SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
|
|
||||||
|
|
||||||
// float fa = (float)ia;
|
|
||||||
SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
|
|
||||||
|
|
||||||
// float fb = (float)ib;
|
|
||||||
SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
|
|
||||||
|
|
||||||
// float fq = native_divide(fa, fb);
|
|
||||||
SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb);
|
|
||||||
|
|
||||||
// fq = trunc(fq);
|
|
||||||
fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
|
|
||||||
|
|
||||||
// float fqneg = -fq;
|
|
||||||
SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
|
|
||||||
|
|
||||||
// float fr = mad(fqneg, fb, fa);
|
|
||||||
SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa);
|
|
||||||
|
|
||||||
// int iq = (int)fq;
|
|
||||||
SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
|
|
||||||
|
|
||||||
// fr = fabs(fr);
|
|
||||||
fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
|
|
||||||
|
|
||||||
// fb = fabs(fb);
|
|
||||||
fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
|
|
||||||
|
|
||||||
// int cv = fr >= fb;
|
|
||||||
SDValue cv;
|
|
||||||
if (INTTY == MVT::i32) {
|
|
||||||
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
|
||||||
} else {
|
|
||||||
cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
|
|
||||||
}
|
|
||||||
// jq = (cv ? jq : 0);
|
|
||||||
jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq,
|
|
||||||
DAG.getConstant(0, OVT));
|
|
||||||
// dst = iq + jq;
|
|
||||||
iq = DAG.getSExtOrTrunc(iq, DL, OVT);
|
|
||||||
iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
|
|
||||||
return iq;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT OVT = Op.getValueType();
|
|
||||||
SDValue LHS = Op.getOperand(0);
|
|
||||||
SDValue RHS = Op.getOperand(1);
|
|
||||||
// The LowerSDIV32 function generates equivalent to the following IL.
|
|
||||||
// mov r0, LHS
|
|
||||||
// mov r1, RHS
|
|
||||||
// ilt r10, r0, 0
|
|
||||||
// ilt r11, r1, 0
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
// iadd r1, r1, r11
|
|
||||||
// ixor r0, r0, r10
|
|
||||||
// ixor r1, r1, r11
|
|
||||||
// udiv r0, r0, r1
|
|
||||||
// ixor r10, r10, r11
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
// ixor DST, r0, r10
|
|
||||||
|
|
||||||
// mov r0, LHS
|
|
||||||
SDValue r0 = LHS;
|
|
||||||
|
|
||||||
// mov r1, RHS
|
|
||||||
SDValue r1 = RHS;
|
|
||||||
|
|
||||||
// ilt r10, r0, 0
|
|
||||||
SDValue r10 = DAG.getSelectCC(DL,
|
|
||||||
r0, DAG.getConstant(0, OVT),
|
|
||||||
DAG.getConstant(-1, MVT::i32),
|
|
||||||
DAG.getConstant(0, MVT::i32),
|
|
||||||
ISD::SETLT);
|
|
||||||
|
|
||||||
// ilt r11, r1, 0
|
|
||||||
SDValue r11 = DAG.getSelectCC(DL,
|
|
||||||
r1, DAG.getConstant(0, OVT),
|
|
||||||
DAG.getConstant(-1, MVT::i32),
|
|
||||||
DAG.getConstant(0, MVT::i32),
|
|
||||||
ISD::SETLT);
|
|
||||||
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
||||||
|
|
||||||
// iadd r1, r1, r11
|
|
||||||
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
|
||||||
|
|
||||||
// ixor r0, r0, r10
|
|
||||||
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
||||||
|
|
||||||
// ixor r1, r1, r11
|
|
||||||
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
|
||||||
|
|
||||||
// udiv r0, r0, r1
|
|
||||||
r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
|
|
||||||
|
|
||||||
// ixor r10, r10, r11
|
|
||||||
r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
|
|
||||||
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
||||||
|
|
||||||
// ixor DST, r0, r10
|
|
||||||
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
||||||
return DST;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
return SDValue(Op.getNode(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT OVT = Op.getValueType();
|
|
||||||
MVT INTTY = MVT::i32;
|
|
||||||
if (OVT == MVT::v2i8) {
|
|
||||||
INTTY = MVT::v2i32;
|
|
||||||
} else if (OVT == MVT::v4i8) {
|
|
||||||
INTTY = MVT::v4i32;
|
|
||||||
}
|
|
||||||
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
|
||||||
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
|
||||||
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
|
||||||
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
|
||||||
return LHS;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT OVT = Op.getValueType();
|
|
||||||
MVT INTTY = MVT::i32;
|
|
||||||
if (OVT == MVT::v2i16) {
|
|
||||||
INTTY = MVT::v2i32;
|
|
||||||
} else if (OVT == MVT::v4i16) {
|
|
||||||
INTTY = MVT::v4i32;
|
|
||||||
}
|
|
||||||
SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
|
|
||||||
SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
|
|
||||||
LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
|
|
||||||
LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
|
|
||||||
return LHS;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT OVT = Op.getValueType();
|
|
||||||
SDValue LHS = Op.getOperand(0);
|
|
||||||
SDValue RHS = Op.getOperand(1);
|
|
||||||
// The LowerSREM32 function generates equivalent to the following IL.
|
|
||||||
// mov r0, LHS
|
|
||||||
// mov r1, RHS
|
|
||||||
// ilt r10, r0, 0
|
|
||||||
// ilt r11, r1, 0
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
// iadd r1, r1, r11
|
|
||||||
// ixor r0, r0, r10
|
|
||||||
// ixor r1, r1, r11
|
|
||||||
// udiv r20, r0, r1
|
|
||||||
// umul r20, r20, r1
|
|
||||||
// sub r0, r0, r20
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
// ixor DST, r0, r10
|
|
||||||
|
|
||||||
// mov r0, LHS
|
|
||||||
SDValue r0 = LHS;
|
|
||||||
|
|
||||||
// mov r1, RHS
|
|
||||||
SDValue r1 = RHS;
|
|
||||||
|
|
||||||
// ilt r10, r0, 0
|
|
||||||
SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT);
|
|
||||||
|
|
||||||
// ilt r11, r1, 0
|
|
||||||
SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT);
|
|
||||||
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
||||||
|
|
||||||
// iadd r1, r1, r11
|
|
||||||
r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
|
|
||||||
|
|
||||||
// ixor r0, r0, r10
|
|
||||||
r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
||||||
|
|
||||||
// ixor r1, r1, r11
|
|
||||||
r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
|
|
||||||
|
|
||||||
// udiv r20, r0, r1
|
|
||||||
SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
|
|
||||||
|
|
||||||
// umul r20, r20, r1
|
|
||||||
r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1);
|
|
||||||
|
|
||||||
// sub r0, r0, r20
|
|
||||||
r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
|
|
||||||
|
|
||||||
// iadd r0, r0, r10
|
|
||||||
r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
|
|
||||||
|
|
||||||
// ixor DST, r0, r10
|
|
||||||
SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
|
|
||||||
return DST;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue
|
|
||||||
AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
return SDValue(Op.getNode(), 0);
|
|
||||||
}
|
|
||||||
|
|
@ -1,270 +0,0 @@
|
||||||
//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file describes the AMDIL instructions in TableGen format.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// AMDIL Instruction Predicate Definitions
|
|
||||||
// Predicate that is set to true if the hardware supports double precision
|
|
||||||
// divide
|
|
||||||
def HasHWDDiv : Predicate<"Subtarget.device()"
|
|
||||||
"->getGeneration() > AMDGPUDeviceInfo::HD4XXX && "
|
|
||||||
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
|
|
||||||
|
|
||||||
// Predicate that is set to true if the hardware supports double, but not double
|
|
||||||
// precision divide in hardware
|
|
||||||
def HasSWDDiv : Predicate<"Subtarget.device()"
|
|
||||||
"->getGeneration() == AMDGPUDeviceInfo::HD4XXX &&"
|
|
||||||
"Subtarget.device()->usesHardware(AMDGPUDeviceInfo::DoubleOps)">;
|
|
||||||
|
|
||||||
// Predicate that is set to true if the hardware support 24bit signed
|
|
||||||
// math ops. Otherwise a software expansion to 32bit math ops is used instead.
|
|
||||||
def HasHWSign24Bit : Predicate<"Subtarget.device()"
|
|
||||||
"->getGeneration() > AMDGPUDeviceInfo::HD5XXX">;
|
|
||||||
|
|
||||||
// Predicate that is set to true if 64bit operations are supported or not
|
|
||||||
def HasHW64Bit : Predicate<"Subtarget.device()"
|
|
||||||
"->usesHardware(AMDGPUDeviceInfo::LongOps)">;
|
|
||||||
def HasSW64Bit : Predicate<"Subtarget.device()"
|
|
||||||
"->usesSoftware(AMDGPUDeviceInfo::LongOps)">;
|
|
||||||
|
|
||||||
// Predicate that is set to true if the timer register is supported
|
|
||||||
def HasTmrRegister : Predicate<"Subtarget.device()"
|
|
||||||
"->isSupported(AMDGPUDeviceInfo::TmrReg)">;
|
|
||||||
// Predicate that is true if we are at least evergreen series
|
|
||||||
def HasDeviceIDInst : Predicate<"Subtarget.device()"
|
|
||||||
"->getGeneration() >= AMDGPUDeviceInfo::HD5XXX">;
|
|
||||||
|
|
||||||
// Predicate that is true if we have region address space.
|
|
||||||
def hasRegionAS : Predicate<"Subtarget.device()"
|
|
||||||
"->usesHardware(AMDGPUDeviceInfo::RegionMem)">;
|
|
||||||
|
|
||||||
// Predicate that is false if we don't have region address space.
|
|
||||||
def noRegionAS : Predicate<"!Subtarget.device()"
|
|
||||||
"->isSupported(AMDGPUDeviceInfo::RegionMem)">;
|
|
||||||
|
|
||||||
|
|
||||||
// Predicate that is set to true if 64bit Mul is supported in the IL or not
|
|
||||||
def HasHW64Mul : Predicate<"Subtarget.calVersion()"
|
|
||||||
">= CAL_VERSION_SC_139"
|
|
||||||
"&& Subtarget.device()"
|
|
||||||
"->getGeneration() >="
|
|
||||||
"AMDGPUDeviceInfo::HD5XXX">;
|
|
||||||
def HasSW64Mul : Predicate<"Subtarget.calVersion()"
|
|
||||||
"< CAL_VERSION_SC_139">;
|
|
||||||
// Predicate that is set to true if 64bit Div/Mod is supported in the IL or not
|
|
||||||
def HasHW64DivMod : Predicate<"Subtarget.device()"
|
|
||||||
"->usesHardware(AMDGPUDeviceInfo::HW64BitDivMod)">;
|
|
||||||
def HasSW64DivMod : Predicate<"Subtarget.device()"
|
|
||||||
"->usesSoftware(AMDGPUDeviceInfo::HW64BitDivMod)">;
|
|
||||||
|
|
||||||
// Predicate that is set to true if 64bit pointer are used.
|
|
||||||
def Has64BitPtr : Predicate<"Subtarget.is64bit()">;
|
|
||||||
def Has32BitPtr : Predicate<"!Subtarget.is64bit()">;
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Custom Operands
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
def brtarget : Operand<OtherVT>;
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Custom Selection DAG Type Profiles
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Generic Profile Types
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [
|
|
||||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
|
||||||
]>;
|
|
||||||
def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [
|
|
||||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>
|
|
||||||
]>;
|
|
||||||
def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [
|
|
||||||
SDTCisEltOfVec<1, 0>
|
|
||||||
]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Flow Control Profile Types
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Branch instruction where second and third are basic blocks
|
|
||||||
def SDTIL_BRCond : SDTypeProfile<0, 2, [
|
|
||||||
SDTCisVT<0, OtherVT>
|
|
||||||
]>;
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Custom Selection DAG Nodes
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Flow Control DAG Nodes
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Call/Return DAG Nodes
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
|
|
||||||
[SDNPHasChain, SDNPOptInGlue]>;
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Instructions
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Floating point math functions
|
|
||||||
def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>;
|
|
||||||
def IL_mad : SDNode<"AMDGPUISD::MAD", SDTIL_GenTernaryOp>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Integer functions
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]>;
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Custom Pattern DAG Nodes
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
def global_store : PatFrag<(ops node:$val, node:$ptr),
|
|
||||||
(store node:$val, node:$ptr), [{
|
|
||||||
return isGlobalStore(dyn_cast<StoreSDNode>(N));
|
|
||||||
}]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Load pattern fragments
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Global address space loads
|
|
||||||
def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|
||||||
return isGlobalLoad(dyn_cast<LoadSDNode>(N));
|
|
||||||
}]>;
|
|
||||||
// Constant address space loads
|
|
||||||
def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
|
||||||
return isConstantLoad(dyn_cast<LoadSDNode>(N), -1);
|
|
||||||
}]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Complex addressing mode patterns
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>;
|
|
||||||
def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>;
|
|
||||||
def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>;
|
|
||||||
def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Instruction format classes
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
|
|
||||||
: Instruction {
|
|
||||||
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
dag OutOperandList = outs;
|
|
||||||
dag InOperandList = ins;
|
|
||||||
let Pattern = pattern;
|
|
||||||
let AsmString = !strconcat(asmstr, "\n");
|
|
||||||
let isPseudo = 1;
|
|
||||||
let Itinerary = NullALU;
|
|
||||||
bit hasIEEEFlag = 0;
|
|
||||||
bit hasZeroOpFlag = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Multiclass Instruction formats
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Multiclass that handles branch instructions
|
|
||||||
multiclass BranchConditional<SDNode Op> {
|
|
||||||
def _i32 : ILFormat<(outs),
|
|
||||||
(ins brtarget:$target, GPRI32:$src0),
|
|
||||||
"; i32 Pseudo branch instruction",
|
|
||||||
[(Op bb:$target, GPRI32:$src0)]>;
|
|
||||||
def _f32 : ILFormat<(outs),
|
|
||||||
(ins brtarget:$target, GPRF32:$src0),
|
|
||||||
"; f32 Pseudo branch instruction",
|
|
||||||
[(Op bb:$target, GPRF32:$src0)]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Only scalar types should generate flow control
|
|
||||||
multiclass BranchInstr<string name> {
|
|
||||||
def _i32 : ILFormat<(outs), (ins GPRI32:$src),
|
|
||||||
!strconcat(name, " $src"), []>;
|
|
||||||
def _f32 : ILFormat<(outs), (ins GPRF32:$src),
|
|
||||||
!strconcat(name, " $src"), []>;
|
|
||||||
}
|
|
||||||
// Only scalar types should generate flow control
|
|
||||||
multiclass BranchInstr2<string name> {
|
|
||||||
def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1),
|
|
||||||
!strconcat(name, " $src0, $src1"), []>;
|
|
||||||
def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1),
|
|
||||||
!strconcat(name, " $src0, $src1"), []>;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Intrinsics support
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
include "AMDILIntrinsics.td"
|
|
||||||
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Instructions support
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// Custom Inserter for Branches and returns, this eventually will be a
|
|
||||||
// seperate pass
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
|
|
||||||
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
|
|
||||||
"; Pseudo unconditional branch instruction",
|
|
||||||
[(br bb:$target)]>;
|
|
||||||
defm BRANCH_COND : BranchConditional<IL_brcond>;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// Flow and Program control Instructions
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
let isTerminator=1 in {
|
|
||||||
def SWITCH : ILFormat< (outs), (ins GPRI32:$src),
|
|
||||||
!strconcat("SWITCH", " $src"), []>;
|
|
||||||
def CASE : ILFormat< (outs), (ins GPRI32:$src),
|
|
||||||
!strconcat("CASE", " $src"), []>;
|
|
||||||
def BREAK : ILFormat< (outs), (ins),
|
|
||||||
"BREAK", []>;
|
|
||||||
def CONTINUE : ILFormat< (outs), (ins),
|
|
||||||
"CONTINUE", []>;
|
|
||||||
def DEFAULT : ILFormat< (outs), (ins),
|
|
||||||
"DEFAULT", []>;
|
|
||||||
def ELSE : ILFormat< (outs), (ins),
|
|
||||||
"ELSE", []>;
|
|
||||||
def ENDSWITCH : ILFormat< (outs), (ins),
|
|
||||||
"ENDSWITCH", []>;
|
|
||||||
def ENDMAIN : ILFormat< (outs), (ins),
|
|
||||||
"ENDMAIN", []>;
|
|
||||||
def END : ILFormat< (outs), (ins),
|
|
||||||
"END", []>;
|
|
||||||
def ENDFUNC : ILFormat< (outs), (ins),
|
|
||||||
"ENDFUNC", []>;
|
|
||||||
def ENDIF : ILFormat< (outs), (ins),
|
|
||||||
"ENDIF", []>;
|
|
||||||
def WHILELOOP : ILFormat< (outs), (ins),
|
|
||||||
"WHILE", []>;
|
|
||||||
def ENDLOOP : ILFormat< (outs), (ins),
|
|
||||||
"ENDLOOP", []>;
|
|
||||||
def FUNC : ILFormat< (outs), (ins),
|
|
||||||
"FUNC", []>;
|
|
||||||
def RETDYN : ILFormat< (outs), (ins),
|
|
||||||
"RET_DYN", []>;
|
|
||||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
|
||||||
defm IF_LOGICALNZ : BranchInstr<"IF_LOGICALNZ">;
|
|
||||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
|
||||||
defm IF_LOGICALZ : BranchInstr<"IF_LOGICALZ">;
|
|
||||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
|
||||||
defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
|
|
||||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
|
||||||
defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
|
|
||||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
|
||||||
defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
|
|
||||||
// This opcode has custom swizzle pattern encoded in Swizzle Encoder
|
|
||||||
defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
|
|
||||||
defm IFC : BranchInstr2<"IFC">;
|
|
||||||
defm BREAKC : BranchInstr2<"BREAKC">;
|
|
||||||
defm CONTINUEC : BranchInstr2<"CONTINUEC">;
|
|
||||||
}
|
|
||||||
|
|
@ -1,93 +0,0 @@
|
||||||
//===- AMDILIntrinsicInfo.cpp - AMDIL Intrinsic Information ------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains the AMDIL Implementation of the IntrinsicInfo class.
|
|
||||||
//
|
|
||||||
//===-----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDILIntrinsicInfo.h"
|
|
||||||
#include "AMDIL.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
#include "llvm/DerivedTypes.h"
|
|
||||||
#include "llvm/Intrinsics.h"
|
|
||||||
#include "llvm/Module.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
|
||||||
#include "AMDGPUGenIntrinsics.inc"
|
|
||||||
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
|
|
||||||
|
|
||||||
AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm)
|
|
||||||
: TargetIntrinsicInfo()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
|
||||||
AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
|
|
||||||
unsigned int numTys) const
|
|
||||||
{
|
|
||||||
static const char* const names[] = {
|
|
||||||
#define GET_INTRINSIC_NAME_TABLE
|
|
||||||
#include "AMDGPUGenIntrinsics.inc"
|
|
||||||
#undef GET_INTRINSIC_NAME_TABLE
|
|
||||||
};
|
|
||||||
|
|
||||||
//assert(!isOverloaded(IntrID)
|
|
||||||
//&& "AMDGPU Intrinsics are not overloaded");
|
|
||||||
if (IntrID < Intrinsic::num_intrinsics) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
|
|
||||||
&& "Invalid intrinsic ID");
|
|
||||||
|
|
||||||
std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
|
|
||||||
return Result;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int
|
|
||||||
AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const
|
|
||||||
{
|
|
||||||
#define GET_FUNCTION_RECOGNIZER
|
|
||||||
#include "AMDGPUGenIntrinsics.inc"
|
|
||||||
#undef GET_FUNCTION_RECOGNIZER
|
|
||||||
AMDGPUIntrinsic::ID IntrinsicID
|
|
||||||
= (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic;
|
|
||||||
IntrinsicID = getIntrinsicForGCCBuiltin("AMDIL", Name);
|
|
||||||
|
|
||||||
if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) {
|
|
||||||
return IntrinsicID;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const
|
|
||||||
{
|
|
||||||
// Overload Table
|
|
||||||
#define GET_INTRINSIC_OVERLOAD_TABLE
|
|
||||||
#include "AMDGPUGenIntrinsics.inc"
|
|
||||||
#undef GET_INTRINSIC_OVERLOAD_TABLE
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This defines the "getAttributes(ID id)" method.
|
|
||||||
#define GET_INTRINSIC_ATTRIBUTES
|
|
||||||
#include "AMDGPUGenIntrinsics.inc"
|
|
||||||
#undef GET_INTRINSIC_ATTRIBUTES
|
|
||||||
|
|
||||||
Function*
|
|
||||||
AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
|
|
||||||
Type **Tys,
|
|
||||||
unsigned numTys) const
|
|
||||||
{
|
|
||||||
//Silence a warning
|
|
||||||
AttrListPtr List = getAttributes((AMDGPUIntrinsic::ID)IntrID);
|
|
||||||
(void)List;
|
|
||||||
assert(!"Not implemented");
|
|
||||||
}
|
|
||||||
|
|
@ -1,47 +0,0 @@
|
||||||
//===- AMDILIntrinsicInfo.h - AMDIL Intrinsic Information ------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface for the AMDIL Implementation of the Intrinsic Info class.
|
|
||||||
//
|
|
||||||
//===-----------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDIL_INTRINSICS_H_
|
|
||||||
#define _AMDIL_INTRINSICS_H_
|
|
||||||
|
|
||||||
#include "llvm/Intrinsics.h"
|
|
||||||
#include "llvm/Target/TargetIntrinsicInfo.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class TargetMachine;
|
|
||||||
namespace AMDGPUIntrinsic {
|
|
||||||
enum ID {
|
|
||||||
last_non_AMDGPU_intrinsic = Intrinsic::num_intrinsics - 1,
|
|
||||||
#define GET_INTRINSIC_ENUM_VALUES
|
|
||||||
#include "AMDGPUGenIntrinsics.inc"
|
|
||||||
#undef GET_INTRINSIC_ENUM_VALUES
|
|
||||||
, num_AMDGPU_intrinsics
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
|
|
||||||
public:
|
|
||||||
AMDGPUIntrinsicInfo(TargetMachine *tm);
|
|
||||||
std::string getName(unsigned int IntrId, Type **Tys = 0,
|
|
||||||
unsigned int numTys = 0) const;
|
|
||||||
unsigned int lookupName(const char *Name, unsigned int Len) const;
|
|
||||||
bool isOverloaded(unsigned int IID) const;
|
|
||||||
Function *getDeclaration(Module *M, unsigned int ID,
|
|
||||||
Type **Tys = 0,
|
|
||||||
unsigned int numTys = 0) const;
|
|
||||||
}; // AMDGPUIntrinsicInfo
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // _AMDIL_INTRINSICS_H_
|
|
||||||
|
|
||||||
|
|
@ -1,242 +0,0 @@
|
||||||
//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file defines all of the amdil-specific intrinsics
|
|
||||||
//
|
|
||||||
//===---------------------------------------------------------------===//
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
// Intrinsic classes
|
|
||||||
// Generic versions of the above classes but for Target specific intrinsics
|
|
||||||
// instead of SDNode patterns.
|
|
||||||
//===--------------------------------------------------------------------===//
|
|
||||||
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
|
||||||
class VoidIntLong :
|
|
||||||
Intrinsic<[llvm_i64_ty], [], []>;
|
|
||||||
class VoidIntInt :
|
|
||||||
Intrinsic<[llvm_i32_ty], [], []>;
|
|
||||||
class VoidIntBool :
|
|
||||||
Intrinsic<[llvm_i32_ty], [], []>;
|
|
||||||
class UnaryIntInt :
|
|
||||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
|
||||||
class UnaryIntFloat :
|
|
||||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
|
|
||||||
class ConvertIntFTOI :
|
|
||||||
Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>;
|
|
||||||
class ConvertIntITOF :
|
|
||||||
Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>;
|
|
||||||
class UnaryIntNoRetInt :
|
|
||||||
Intrinsic<[], [llvm_anyint_ty], []>;
|
|
||||||
class UnaryIntNoRetFloat :
|
|
||||||
Intrinsic<[], [llvm_anyfloat_ty], []>;
|
|
||||||
class BinaryIntInt :
|
|
||||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
|
||||||
class BinaryIntFloat :
|
|
||||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
|
||||||
class BinaryIntNoRetInt :
|
|
||||||
Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>;
|
|
||||||
class BinaryIntNoRetFloat :
|
|
||||||
Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>;
|
|
||||||
class TernaryIntInt :
|
|
||||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
|
||||||
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
|
||||||
class TernaryIntFloat :
|
|
||||||
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
|
|
||||||
LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
|
||||||
class QuaternaryIntInt :
|
|
||||||
Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
|
|
||||||
LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
|
|
||||||
class UnaryAtomicInt :
|
|
||||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
|
||||||
class BinaryAtomicInt :
|
|
||||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
|
||||||
class TernaryAtomicInt :
|
|
||||||
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
|
|
||||||
class UnaryAtomicIntNoRet :
|
|
||||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
|
||||||
class BinaryAtomicIntNoRet :
|
|
||||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
|
||||||
class TernaryAtomicIntNoRet :
|
|
||||||
Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let TargetPrefix = "AMDIL", isTarget = 1 in {
|
|
||||||
def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt;
|
|
||||||
|
|
||||||
def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,
|
|
||||||
UnaryIntInt;
|
|
||||||
def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">,
|
|
||||||
UnaryIntInt;
|
|
||||||
def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">,
|
|
||||||
UnaryIntInt;
|
|
||||||
def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">,
|
|
||||||
UnaryIntInt;
|
|
||||||
def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">,
|
|
||||||
UnaryIntInt;
|
|
||||||
def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">,
|
|
||||||
QuaternaryIntInt;
|
|
||||||
def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_mad_i32 : GCCBuiltin<"__amdil_imad">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_mad_u32 : GCCBuiltin<"__amdil_umad">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_mad : GCCBuiltin<"__amdil_mad">,
|
|
||||||
TernaryIntFloat;
|
|
||||||
def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_mad24_i32 : GCCBuiltin<"__amdil_imad24">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_mad24_u32 : GCCBuiltin<"__amdil_umad24">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_min : GCCBuiltin<"__amdil_min">,
|
|
||||||
BinaryIntFloat;
|
|
||||||
def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">,
|
|
||||||
BinaryIntInt;
|
|
||||||
def int_AMDIL_max : GCCBuiltin<"__amdil_max">,
|
|
||||||
BinaryIntFloat;
|
|
||||||
def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">,
|
|
||||||
TernaryIntInt;
|
|
||||||
def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">,
|
|
||||||
TernaryIntFloat;
|
|
||||||
def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat;
|
|
||||||
def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat;
|
|
||||||
def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt;
|
|
||||||
def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">,
|
|
||||||
TernaryIntFloat;
|
|
||||||
def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_length : GCCBuiltin<"__amdil_length">,
|
|
||||||
UnaryIntFloat;
|
|
||||||
def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">,
|
|
||||||
TernaryIntFloat;
|
|
||||||
def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">,
|
|
||||||
Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty,
|
|
||||||
llvm_v4i32_ty, llvm_i32_ty], []>;
|
|
||||||
|
|
||||||
def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">,
|
|
||||||
Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>;
|
|
||||||
def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">,
|
|
||||||
Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>;
|
|
||||||
def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">,
|
|
||||||
Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
|
|
||||||
def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">,
|
|
||||||
ConvertIntITOF;
|
|
||||||
def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">,
|
|
||||||
ConvertIntFTOI;
|
|
||||||
def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">,
|
|
||||||
ConvertIntFTOI;
|
|
||||||
def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">,
|
|
||||||
ConvertIntFTOI;
|
|
||||||
def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">,
|
|
||||||
ConvertIntFTOI;
|
|
||||||
def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">,
|
|
||||||
ConvertIntFTOI;
|
|
||||||
def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">,
|
|
||||||
ConvertIntFTOI;
|
|
||||||
def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">,
|
|
||||||
Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>;
|
|
||||||
def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">,
|
|
||||||
ConvertIntITOF;
|
|
||||||
def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">,
|
|
||||||
ConvertIntITOF;
|
|
||||||
def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">,
|
|
||||||
ConvertIntITOF;
|
|
||||||
def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">,
|
|
||||||
ConvertIntITOF;
|
|
||||||
def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">,
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
|
||||||
llvm_v2f32_ty, llvm_float_ty], []>;
|
|
||||||
def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">,
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_v2f32_ty,
|
|
||||||
llvm_v2f32_ty], []>;
|
|
||||||
def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">,
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
|
||||||
llvm_v4f32_ty], []>;
|
|
||||||
def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">,
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
|
|
||||||
llvm_v4f32_ty], []>;
|
|
||||||
}
|
|
||||||
|
|
@ -1,71 +0,0 @@
|
||||||
//===-- AMDILNIDevice.cpp - Device Info for Northern Islands devices ------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#include "AMDILNIDevice.h"
|
|
||||||
#include "AMDILEvergreenDevice.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPUNIDevice::AMDGPUNIDevice(AMDGPUSubtarget *ST)
|
|
||||||
: AMDGPUEvergreenDevice(ST)
|
|
||||||
{
|
|
||||||
std::string name = ST->getDeviceName();
|
|
||||||
if (name == "caicos") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_CAICOS;
|
|
||||||
} else if (name == "turks") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_TURKS;
|
|
||||||
} else if (name == "cayman") {
|
|
||||||
mDeviceFlag = OCL_DEVICE_CAYMAN;
|
|
||||||
} else {
|
|
||||||
mDeviceFlag = OCL_DEVICE_BARTS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
AMDGPUNIDevice::~AMDGPUNIDevice()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
AMDGPUNIDevice::getMaxLDSSize() const
|
|
||||||
{
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
|
||||||
return MAX_LDS_SIZE_900;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t
|
|
||||||
AMDGPUNIDevice::getGeneration() const
|
|
||||||
{
|
|
||||||
return AMDGPUDeviceInfo::HD6XXX;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
AMDGPUCaymanDevice::AMDGPUCaymanDevice(AMDGPUSubtarget *ST)
|
|
||||||
: AMDGPUNIDevice(ST)
|
|
||||||
{
|
|
||||||
setCaps();
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUCaymanDevice::~AMDGPUCaymanDevice()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
AMDGPUCaymanDevice::setCaps()
|
|
||||||
{
|
|
||||||
if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::FMA);
|
|
||||||
}
|
|
||||||
mHWBits.set(AMDGPUDeviceInfo::Signed24BitOps);
|
|
||||||
mSWBits.reset(AMDGPUDeviceInfo::Signed24BitOps);
|
|
||||||
mSWBits.set(AMDGPUDeviceInfo::ArenaSegment);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
@ -1,59 +0,0 @@
|
||||||
//===------- AMDILNIDevice.h - Define NI Device for AMDIL -*- C++ -*------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface for the subtarget data classes.
|
|
||||||
//
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// This file will define the interface that each generation needs to
|
|
||||||
// implement in order to correctly answer queries on the capabilities of the
|
|
||||||
// specific hardware.
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDILNIDEVICE_H_
|
|
||||||
#define _AMDILNIDEVICE_H_
|
|
||||||
#include "AMDILEvergreenDevice.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class AMDGPUSubtarget;
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// NI generation of devices and their respective sub classes
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// The AMDGPUNIDevice is the base class for all Northern Island series of
|
|
||||||
// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
|
|
||||||
// exception being differences in wavefront size and hardware capabilities. The
|
|
||||||
// NI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
|
||||||
// integer operations
|
|
||||||
|
|
||||||
class AMDGPUNIDevice : public AMDGPUEvergreenDevice {
|
|
||||||
public:
|
|
||||||
AMDGPUNIDevice(AMDGPUSubtarget*);
|
|
||||||
virtual ~AMDGPUNIDevice();
|
|
||||||
virtual size_t getMaxLDSSize() const;
|
|
||||||
virtual uint32_t getGeneration() const;
|
|
||||||
protected:
|
|
||||||
}; // AMDGPUNIDevice
|
|
||||||
|
|
||||||
// Just as the AMDGPUCypressDevice is the double capable version of the
|
|
||||||
// AMDGPUEvergreenDevice, the AMDGPUCaymanDevice is the double capable version of
|
|
||||||
// the AMDGPUNIDevice. The other major difference that is not as useful from
|
|
||||||
// standpoint is that the Cayman Device has 4 wide ALU's, whereas the rest of the
|
|
||||||
// NI family is a 5 wide.
|
|
||||||
|
|
||||||
class AMDGPUCaymanDevice: public AMDGPUNIDevice {
|
|
||||||
public:
|
|
||||||
AMDGPUCaymanDevice(AMDGPUSubtarget*);
|
|
||||||
virtual ~AMDGPUCaymanDevice();
|
|
||||||
private:
|
|
||||||
virtual void setCaps();
|
|
||||||
}; // AMDGPUCaymanDevice
|
|
||||||
|
|
||||||
static const unsigned int MAX_LDS_SIZE_900 = AMDGPUDevice::MAX_LDS_SIZE_800;
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // _AMDILNIDEVICE_H_
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,110 +0,0 @@
|
||||||
//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Declarations that describe the AMDIL register file
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
class AMDILReg<bits<16> num, string n> : Register<n> {
|
|
||||||
field bits<16> Value;
|
|
||||||
let Value = num;
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
}
|
|
||||||
|
|
||||||
// We will start with 8 registers for each class before expanding to more
|
|
||||||
// Since the swizzle is added based on the register class, we can leave it
|
|
||||||
// off here and just specify different registers for different register classes
|
|
||||||
def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>;
|
|
||||||
def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>;
|
|
||||||
def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>;
|
|
||||||
def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>;
|
|
||||||
def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>;
|
|
||||||
def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>;
|
|
||||||
def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>;
|
|
||||||
def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>;
|
|
||||||
def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>;
|
|
||||||
def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>;
|
|
||||||
def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>;
|
|
||||||
def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>;
|
|
||||||
def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>;
|
|
||||||
def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>;
|
|
||||||
def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>;
|
|
||||||
def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>;
|
|
||||||
def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>;
|
|
||||||
def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>;
|
|
||||||
def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>;
|
|
||||||
def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>;
|
|
||||||
|
|
||||||
// All registers between 1000 and 1024 are reserved and cannot be used
|
|
||||||
// unless commented in this section
|
|
||||||
// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's
|
|
||||||
// r1020 is used to hold the frame index for local arrays
|
|
||||||
// r1019 is used to hold the dynamic stack allocation pointer
|
|
||||||
// r1018 is used as a temporary register for handwritten code
|
|
||||||
// r1017 is used as a temporary register for handwritten code
|
|
||||||
// r1016 is used as a temporary register for load/store code
|
|
||||||
// r1015 is used as a temporary register for data segment offset
|
|
||||||
// r1014 is used as a temporary register for store code
|
|
||||||
// r1013 is used as the section data pointer register
|
|
||||||
// r1012-r1010 and r1001-r1008 are used for temporary I/O registers
|
|
||||||
// r1009 is used as the frame pointer register
|
|
||||||
// r999 is used as the mem register.
|
|
||||||
// r998 is used as the return address register.
|
|
||||||
//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>;
|
|
||||||
//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>;
|
|
||||||
//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>;
|
|
||||||
//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>;
|
|
||||||
//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>;
|
|
||||||
//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>;
|
|
||||||
def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>;
|
|
||||||
def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>;
|
|
||||||
def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>;
|
|
||||||
def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>;
|
|
||||||
def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>;
|
|
||||||
def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>;
|
|
||||||
def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>;
|
|
||||||
def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>;
|
|
||||||
def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>;
|
|
||||||
def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>;
|
|
||||||
def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>;
|
|
||||||
def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>;
|
|
||||||
def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>;
|
|
||||||
def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>;
|
|
||||||
def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>;
|
|
||||||
def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>;
|
|
||||||
def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>;
|
|
||||||
def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>;
|
|
||||||
def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>;
|
|
||||||
def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>;
|
|
||||||
def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>;
|
|
||||||
def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>;
|
|
||||||
def GPRI16 : RegisterClass<"AMDGPU", [i16], 16,
|
|
||||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
|
||||||
{
|
|
||||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
|
||||||
let AltOrderSelect = [{
|
|
||||||
return 1;
|
|
||||||
}];
|
|
||||||
}
|
|
||||||
def GPRI32 : RegisterClass<"AMDGPU", [i32], 32,
|
|
||||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
|
||||||
{
|
|
||||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
|
||||||
let AltOrderSelect = [{
|
|
||||||
return 1;
|
|
||||||
}];
|
|
||||||
}
|
|
||||||
def GPRF32 : RegisterClass<"AMDGPU", [f32], 32,
|
|
||||||
(add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)>
|
|
||||||
{
|
|
||||||
let AltOrders = [(add (sequence "R%u", 1, 20))];
|
|
||||||
let AltOrderSelect = [{
|
|
||||||
return 1;
|
|
||||||
}];
|
|
||||||
}
|
|
||||||
|
|
@ -1,49 +0,0 @@
|
||||||
//===-- AMDILSIDevice.cpp - Device Info for Southern Islands GPUs ---------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
#include "AMDILSIDevice.h"
|
|
||||||
#include "AMDILEvergreenDevice.h"
|
|
||||||
#include "AMDILNIDevice.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
AMDGPUSIDevice::AMDGPUSIDevice(AMDGPUSubtarget *ST)
|
|
||||||
: AMDGPUEvergreenDevice(ST)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
AMDGPUSIDevice::~AMDGPUSIDevice()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
AMDGPUSIDevice::getMaxLDSSize() const
|
|
||||||
{
|
|
||||||
if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
|
|
||||||
return MAX_LDS_SIZE_900;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t
|
|
||||||
AMDGPUSIDevice::getGeneration() const
|
|
||||||
{
|
|
||||||
return AMDGPUDeviceInfo::HD7XXX;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string
|
|
||||||
AMDGPUSIDevice::getDataLayout() const
|
|
||||||
{
|
|
||||||
return std::string("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16"
|
|
||||||
"-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
|
|
||||||
"-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
|
|
||||||
"-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
|
|
||||||
"-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
|
||||||
"-n8:16:32:64");
|
|
||||||
}
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
//===------- AMDILSIDevice.h - Define SI Device for AMDIL -*- C++ -*------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface for the subtarget data classes.
|
|
||||||
//
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// This file will define the interface that each generation needs to
|
|
||||||
// implement in order to correctly answer queries on the capabilities of the
|
|
||||||
// specific hardware.
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
#ifndef _AMDILSIDEVICE_H_
|
|
||||||
#define _AMDILSIDEVICE_H_
|
|
||||||
#include "AMDILEvergreenDevice.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class AMDGPUSubtarget;
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
// SI generation of devices and their respective sub classes
|
|
||||||
//===---------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// The AMDGPUSIDevice is the base class for all Northern Island series of
|
|
||||||
// cards. It is very similiar to the AMDGPUEvergreenDevice, with the major
|
|
||||||
// exception being differences in wavefront size and hardware capabilities. The
|
|
||||||
// SI devices are all 64 wide wavefronts and also add support for signed 24 bit
|
|
||||||
// integer operations
|
|
||||||
|
|
||||||
class AMDGPUSIDevice : public AMDGPUEvergreenDevice {
|
|
||||||
public:
|
|
||||||
AMDGPUSIDevice(AMDGPUSubtarget*);
|
|
||||||
virtual ~AMDGPUSIDevice();
|
|
||||||
virtual size_t getMaxLDSSize() const;
|
|
||||||
virtual uint32_t getGeneration() const;
|
|
||||||
virtual std::string getDataLayout() const;
|
|
||||||
protected:
|
|
||||||
}; // AMDGPUSIDevice
|
|
||||||
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // _AMDILSIDEVICE_H_
|
|
||||||
|
|
@ -1,75 +0,0 @@
|
||||||
//===-- AMDILUtilityFunctions.h - AMDIL Utility Functions Header --------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//==-----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file provides helper macros for expanding case statements.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
#ifndef AMDILUTILITYFUNCTIONS_H_
|
|
||||||
#define AMDILUTILITYFUNCTIONS_H_
|
|
||||||
|
|
||||||
// Macros that are used to help with switch statements for various data types
|
|
||||||
// However, these macro's do not return anything unlike the second set below.
|
|
||||||
#define ExpandCaseTo32bitIntTypes(Instr) \
|
|
||||||
case Instr##_i32:
|
|
||||||
|
|
||||||
#define ExpandCaseTo32bitIntTruncTypes(Instr) \
|
|
||||||
case Instr##_i32i8: \
|
|
||||||
case Instr##_i32i16:
|
|
||||||
|
|
||||||
#define ExpandCaseToIntTypes(Instr) \
|
|
||||||
ExpandCaseTo32bitIntTypes(Instr)
|
|
||||||
|
|
||||||
#define ExpandCaseToIntTruncTypes(Instr) \
|
|
||||||
ExpandCaseTo32bitIntTruncTypes(Instr)
|
|
||||||
|
|
||||||
#define ExpandCaseToFloatTypes(Instr) \
|
|
||||||
case Instr##_f32:
|
|
||||||
|
|
||||||
#define ExpandCaseTo32bitScalarTypes(Instr) \
|
|
||||||
ExpandCaseTo32bitIntTypes(Instr) \
|
|
||||||
case Instr##_f32:
|
|
||||||
|
|
||||||
#define ExpandCaseToAllScalarTypes(Instr) \
|
|
||||||
ExpandCaseToFloatTypes(Instr) \
|
|
||||||
ExpandCaseToIntTypes(Instr)
|
|
||||||
|
|
||||||
#define ExpandCaseToAllScalarTruncTypes(Instr) \
|
|
||||||
ExpandCaseToFloatTruncTypes(Instr) \
|
|
||||||
ExpandCaseToIntTruncTypes(Instr)
|
|
||||||
|
|
||||||
#define ExpandCaseToAllTypes(Instr) \
|
|
||||||
ExpandCaseToAllScalarTypes(Instr)
|
|
||||||
|
|
||||||
#define ExpandCaseToAllTruncTypes(Instr) \
|
|
||||||
ExpandCaseToAllScalarTruncTypes(Instr)
|
|
||||||
|
|
||||||
// Macros that expand into statements with return values
|
|
||||||
#define ExpandCaseTo32bitIntReturn(Instr, Return) \
|
|
||||||
case Instr##_i32: return Return##_i32;
|
|
||||||
|
|
||||||
#define ExpandCaseToIntReturn(Instr, Return) \
|
|
||||||
ExpandCaseTo32bitIntReturn(Instr, Return)
|
|
||||||
|
|
||||||
#define ExpandCaseToFloatReturn(Instr, Return) \
|
|
||||||
case Instr##_f32: return Return##_f32;\
|
|
||||||
|
|
||||||
#define ExpandCaseToAllScalarReturn(Instr, Return) \
|
|
||||||
ExpandCaseToFloatReturn(Instr, Return) \
|
|
||||||
ExpandCaseToIntReturn(Instr, Return)
|
|
||||||
|
|
||||||
// These macros expand to common groupings of RegClass ID's
|
|
||||||
#define ExpandCaseTo1CompRegID \
|
|
||||||
case AMDGPU::GPRI32RegClassID: \
|
|
||||||
case AMDGPU::GPRF32RegClassID:
|
|
||||||
|
|
||||||
#define ExpandCaseTo32BitType(Instr) \
|
|
||||||
case Instr##_i32: \
|
|
||||||
case Instr##_f32:
|
|
||||||
|
|
||||||
#endif // AMDILUTILITYFUNCTIONS_H_
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
|
|
||||||
#include "AMDGPUInstPrinter.h"
|
|
||||||
#include "llvm/MC/MCInst.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
|
|
||||||
StringRef Annot) {
|
|
||||||
printInstruction(MI, OS);
|
|
||||||
|
|
||||||
printAnnotation(OS, Annot);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
|
||||||
raw_ostream &O) {
|
|
||||||
|
|
||||||
const MCOperand &Op = MI->getOperand(OpNo);
|
|
||||||
if (Op.isReg()) {
|
|
||||||
O << getRegisterName(Op.getReg());
|
|
||||||
} else if (Op.isImm()) {
|
|
||||||
O << Op.getImm();
|
|
||||||
} else if (Op.isFPImm()) {
|
|
||||||
O << Op.getFPImm();
|
|
||||||
} else {
|
|
||||||
assert(!"unknown operand type in printOperand");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
|
|
||||||
raw_ostream &O) {
|
|
||||||
printOperand(MI, OpNo, O);
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "AMDGPUGenAsmWriter.inc"
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
||||||
|
|
||||||
#ifndef AMDGPUINSTPRINTER_H
|
|
||||||
#define AMDGPUINSTPRINTER_H
|
|
||||||
|
|
||||||
#include "llvm/ADT/StringRef.h"
|
|
||||||
#include "llvm/MC/MCInstPrinter.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUInstPrinter : public MCInstPrinter {
|
|
||||||
public:
|
|
||||||
AMDGPUInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
|
|
||||||
const MCRegisterInfo &MRI)
|
|
||||||
: MCInstPrinter(MAI, MII, MRI) {}
|
|
||||||
|
|
||||||
//Autogenerated by tblgen
|
|
||||||
void printInstruction(const MCInst *MI, raw_ostream &O);
|
|
||||||
static const char *getRegisterName(unsigned RegNo);
|
|
||||||
|
|
||||||
// virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
|
|
||||||
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
|
|
||||||
|
|
||||||
private:
|
|
||||||
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
|
||||||
// void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
|
|
||||||
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDGPUINSTRPRINTER_H
|
|
||||||
|
|
@ -1,43 +0,0 @@
|
||||||
==============================================================================
|
|
||||||
LLVM Release License
|
|
||||||
==============================================================================
|
|
||||||
University of Illinois/NCSA
|
|
||||||
Open Source License
|
|
||||||
|
|
||||||
Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
|
|
||||||
All rights reserved.
|
|
||||||
|
|
||||||
Developed by:
|
|
||||||
|
|
||||||
LLVM Team
|
|
||||||
|
|
||||||
University of Illinois at Urbana-Champaign
|
|
||||||
|
|
||||||
http://llvm.org
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
||||||
this software and associated documentation files (the "Software"), to deal with
|
|
||||||
the Software without restriction, including without limitation the rights to
|
|
||||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
|
||||||
of the Software, and to permit persons to whom the Software is furnished to do
|
|
||||||
so, subject to the following conditions:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimers.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimers in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the names of the LLVM Team, University of Illinois at
|
|
||||||
Urbana-Champaign, nor the names of its contributors may be used to
|
|
||||||
endorse or promote products derived from this Software without specific
|
|
||||||
prior written permission.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
|
||||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
|
||||||
SOFTWARE.
|
|
||||||
|
|
@ -1,80 +0,0 @@
|
||||||
//===-- AMDGPUAsmBackend.cpp - AMDGPU Assembler Backend -------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
||||||
#include "llvm/ADT/StringRef.h"
|
|
||||||
#include "llvm/MC/MCAsmBackend.h"
|
|
||||||
#include "llvm/MC/MCAssembler.h"
|
|
||||||
#include "llvm/MC/MCObjectWriter.h"
|
|
||||||
#include "llvm/MC/MCValue.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class AMDGPUMCObjectWriter : public MCObjectWriter {
|
|
||||||
public:
|
|
||||||
AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
|
|
||||||
virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
|
|
||||||
const MCAsmLayout &Layout) {
|
|
||||||
//XXX: Implement if necessary.
|
|
||||||
}
|
|
||||||
virtual void RecordRelocation(const MCAssembler &Asm,
|
|
||||||
const MCAsmLayout &Layout,
|
|
||||||
const MCFragment *Fragment,
|
|
||||||
const MCFixup &Fixup,
|
|
||||||
MCValue Target, uint64_t &FixedValue) {
|
|
||||||
assert(!"Not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
class AMDGPUAsmBackend : public MCAsmBackend {
|
|
||||||
public:
|
|
||||||
AMDGPUAsmBackend(const Target &T)
|
|
||||||
: MCAsmBackend() {}
|
|
||||||
|
|
||||||
virtual AMDGPUMCObjectWriter *createObjectWriter(raw_ostream &OS) const;
|
|
||||||
virtual unsigned getNumFixupKinds() const { return 0; };
|
|
||||||
virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
|
|
||||||
uint64_t Value) const { assert(!"Not implemented"); }
|
|
||||||
virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
|
|
||||||
const MCInstFragment *DF,
|
|
||||||
const MCAsmLayout &Layout) const {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
|
|
||||||
assert(!"Not implemented");
|
|
||||||
}
|
|
||||||
virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
|
|
||||||
virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} //End anonymous namespace
|
|
||||||
|
|
||||||
void AMDGPUMCObjectWriter::WriteObject(MCAssembler &Asm,
|
|
||||||
const MCAsmLayout &Layout) {
|
|
||||||
for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
|
|
||||||
Asm.writeSectionData(I, Layout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T, StringRef TT) {
|
|
||||||
return new AMDGPUAsmBackend(T);
|
|
||||||
}
|
|
||||||
|
|
||||||
AMDGPUMCObjectWriter * AMDGPUAsmBackend::createObjectWriter(
|
|
||||||
raw_ostream &OS) const {
|
|
||||||
return new AMDGPUMCObjectWriter(OS);
|
|
||||||
}
|
|
||||||
|
|
@ -1,96 +0,0 @@
|
||||||
//===-- MCTargetDesc/AMDGPUMCAsmInfo.cpp - Assembly Info ------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPUMCAsmInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Target &T, StringRef &TT) : MCAsmInfo()
|
|
||||||
{
|
|
||||||
HasSingleParameterDotFile = false;
|
|
||||||
WeakDefDirective = 0;
|
|
||||||
//===------------------------------------------------------------------===//
|
|
||||||
HasSubsectionsViaSymbols = true;
|
|
||||||
HasMachoZeroFillDirective = false;
|
|
||||||
HasMachoTBSSDirective = false;
|
|
||||||
HasStaticCtorDtorReferenceInStaticMode = false;
|
|
||||||
LinkerRequiresNonEmptyDwarfLines = true;
|
|
||||||
MaxInstLength = 16;
|
|
||||||
PCSymbol = "$";
|
|
||||||
SeparatorString = "\n";
|
|
||||||
CommentColumn = 40;
|
|
||||||
CommentString = ";";
|
|
||||||
LabelSuffix = ":";
|
|
||||||
GlobalPrefix = "@";
|
|
||||||
PrivateGlobalPrefix = ";.";
|
|
||||||
LinkerPrivateGlobalPrefix = "!";
|
|
||||||
InlineAsmStart = ";#ASMSTART";
|
|
||||||
InlineAsmEnd = ";#ASMEND";
|
|
||||||
AssemblerDialect = 0;
|
|
||||||
AllowQuotesInName = false;
|
|
||||||
AllowNameToStartWithDigit = false;
|
|
||||||
AllowPeriodsInName = false;
|
|
||||||
|
|
||||||
//===--- Data Emission Directives -------------------------------------===//
|
|
||||||
ZeroDirective = ".zero";
|
|
||||||
AsciiDirective = ".ascii\t";
|
|
||||||
AscizDirective = ".asciz\t";
|
|
||||||
Data8bitsDirective = ".byte\t";
|
|
||||||
Data16bitsDirective = ".short\t";
|
|
||||||
Data32bitsDirective = ".long\t";
|
|
||||||
Data64bitsDirective = ".quad\t";
|
|
||||||
GPRel32Directive = 0;
|
|
||||||
SunStyleELFSectionSwitchSyntax = true;
|
|
||||||
UsesELFSectionDirectiveForBSS = true;
|
|
||||||
HasMicrosoftFastStdCallMangling = false;
|
|
||||||
|
|
||||||
//===--- Alignment Information ----------------------------------------===//
|
|
||||||
AlignDirective = ".align\t";
|
|
||||||
AlignmentIsInBytes = true;
|
|
||||||
TextAlignFillValue = 0;
|
|
||||||
|
|
||||||
//===--- Global Variable Emission Directives --------------------------===//
|
|
||||||
GlobalDirective = ".global";
|
|
||||||
ExternDirective = ".extern";
|
|
||||||
HasSetDirective = false;
|
|
||||||
HasAggressiveSymbolFolding = true;
|
|
||||||
LCOMMDirectiveType = LCOMM::None;
|
|
||||||
COMMDirectiveAlignmentIsInBytes = false;
|
|
||||||
HasDotTypeDotSizeDirective = false;
|
|
||||||
HasNoDeadStrip = true;
|
|
||||||
HasSymbolResolver = false;
|
|
||||||
WeakRefDirective = ".weakref\t";
|
|
||||||
LinkOnceDirective = 0;
|
|
||||||
//===--- Dwarf Emission Directives -----------------------------------===//
|
|
||||||
HasLEB128 = true;
|
|
||||||
SupportsDebugInformation = true;
|
|
||||||
ExceptionsType = ExceptionHandling::None;
|
|
||||||
DwarfUsesInlineInfoSection = false;
|
|
||||||
DwarfSectionOffsetDirective = ".offset";
|
|
||||||
DwarfUsesLabelOffsetForRanges = true;
|
|
||||||
|
|
||||||
//===--- CBE Asm Translation Table -----------------------------------===//
|
|
||||||
AsmTransCBE = 0;
|
|
||||||
}
|
|
||||||
const char*
|
|
||||||
AMDGPUMCAsmInfo::getDataASDirective(unsigned int Size, unsigned int AS) const
|
|
||||||
{
|
|
||||||
switch (AS) {
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
case 0:
|
|
||||||
return 0;
|
|
||||||
};
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
const MCSection*
|
|
||||||
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
||||||
//===-- MCTargetDesc/AMDGPUMCAsmInfo.h - TODO: Add brief description -------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// TODO: Add full description
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPUMCASMINFO_H_
|
|
||||||
#define AMDGPUMCASMINFO_H_
|
|
||||||
|
|
||||||
#include "llvm/MC/MCAsmInfo.h"
|
|
||||||
namespace llvm {
|
|
||||||
class Target;
|
|
||||||
class StringRef;
|
|
||||||
|
|
||||||
class AMDGPUMCAsmInfo : public MCAsmInfo {
|
|
||||||
public:
|
|
||||||
explicit AMDGPUMCAsmInfo(const Target &T, StringRef &TT);
|
|
||||||
const char*
|
|
||||||
getDataASDirective(unsigned int Size, unsigned int AS) const;
|
|
||||||
const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
|
|
||||||
};
|
|
||||||
} // namespace llvm
|
|
||||||
#endif // AMDGPUMCASMINFO_H_
|
|
||||||
|
|
@ -1,59 +0,0 @@
|
||||||
//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// CodeEmitter interface for R600 and SI codegen.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef AMDGPUCODEEMITTER_H
|
|
||||||
#define AMDGPUCODEEMITTER_H
|
|
||||||
|
|
||||||
#include "llvm/MC/MCCodeEmitter.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class MCInst;
|
|
||||||
class MCOperand;
|
|
||||||
|
|
||||||
class AMDGPUMCCodeEmitter : public MCCodeEmitter {
|
|
||||||
public:
|
|
||||||
|
|
||||||
uint64_t getBinaryCodeForInstr(const MCInst &MI,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
|
||||||
|
|
||||||
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const {
|
|
||||||
return Value;
|
|
||||||
}
|
|
||||||
virtual uint64_t i32LiteralEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDGPUCODEEMITTER_H
|
|
||||||
|
|
@ -1,111 +0,0 @@
|
||||||
//===-- AMDGPUMCTargetDesc.cpp - AMDGPU Target Descriptions ---------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file provides AMDGPU specific target descriptions.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPUMCTargetDesc.h"
|
|
||||||
#include "AMDGPUMCAsmInfo.h"
|
|
||||||
#include "InstPrinter/AMDGPUInstPrinter.h"
|
|
||||||
#include "llvm/MC/MachineLocation.h"
|
|
||||||
#include "llvm/MC/MCCodeGenInfo.h"
|
|
||||||
#include "llvm/MC/MCInstrInfo.h"
|
|
||||||
#include "llvm/MC/MCRegisterInfo.h"
|
|
||||||
#include "llvm/MC/MCStreamer.h"
|
|
||||||
#include "llvm/MC/MCSubtargetInfo.h"
|
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
|
||||||
|
|
||||||
#define GET_INSTRINFO_MC_DESC
|
|
||||||
#include "AMDGPUGenInstrInfo.inc"
|
|
||||||
|
|
||||||
#define GET_SUBTARGETINFO_MC_DESC
|
|
||||||
#include "AMDGPUGenSubtargetInfo.inc"
|
|
||||||
|
|
||||||
#define GET_REGINFO_MC_DESC
|
|
||||||
#include "AMDGPUGenRegisterInfo.inc"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
static MCInstrInfo *createAMDGPUMCInstrInfo() {
|
|
||||||
MCInstrInfo *X = new MCInstrInfo();
|
|
||||||
InitAMDGPUMCInstrInfo(X);
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
static MCRegisterInfo *createAMDGPUMCRegisterInfo(StringRef TT) {
|
|
||||||
MCRegisterInfo *X = new MCRegisterInfo();
|
|
||||||
InitAMDGPUMCRegisterInfo(X, 0);
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
static MCSubtargetInfo *createAMDGPUMCSubtargetInfo(StringRef TT, StringRef CPU,
|
|
||||||
StringRef FS) {
|
|
||||||
MCSubtargetInfo * X = new MCSubtargetInfo();
|
|
||||||
InitAMDGPUMCSubtargetInfo(X, TT, CPU, FS);
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
static MCCodeGenInfo *createAMDGPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
|
||||||
CodeModel::Model CM,
|
|
||||||
CodeGenOpt::Level OL) {
|
|
||||||
MCCodeGenInfo *X = new MCCodeGenInfo();
|
|
||||||
X->InitMCCodeGenInfo(RM, CM, OL);
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
static MCInstPrinter *createAMDGPUMCInstPrinter(const Target &T,
|
|
||||||
unsigned SyntaxVariant,
|
|
||||||
const MCAsmInfo &MAI,
|
|
||||||
const MCInstrInfo &MII,
|
|
||||||
const MCRegisterInfo &MRI,
|
|
||||||
const MCSubtargetInfo &STI) {
|
|
||||||
return new AMDGPUInstPrinter(MAI, MII, MRI);
|
|
||||||
}
|
|
||||||
|
|
||||||
static MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
|
|
||||||
const MCSubtargetInfo &STI,
|
|
||||||
MCContext &Ctx) {
|
|
||||||
if (STI.getFeatureBits() & AMDGPU::Feature64BitPtr) {
|
|
||||||
return createSIMCCodeEmitter(MCII, STI, Ctx);
|
|
||||||
} else {
|
|
||||||
return createR600MCCodeEmitter(MCII, STI, Ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
|
|
||||||
MCContext &Ctx, MCAsmBackend &MAB,
|
|
||||||
raw_ostream &_OS,
|
|
||||||
MCCodeEmitter *_Emitter,
|
|
||||||
bool RelaxAll,
|
|
||||||
bool NoExecStack) {
|
|
||||||
return createPureStreamer(Ctx, MAB, _OS, _Emitter);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void LLVMInitializeAMDGPUTargetMC() {
|
|
||||||
|
|
||||||
RegisterMCAsmInfo<AMDGPUMCAsmInfo> Y(TheAMDGPUTarget);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCCodeGenInfo(TheAMDGPUTarget, createAMDGPUMCCodeGenInfo);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCInstrInfo(TheAMDGPUTarget, createAMDGPUMCInstrInfo);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCRegInfo(TheAMDGPUTarget, createAMDGPUMCRegisterInfo);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCSubtargetInfo(TheAMDGPUTarget, createAMDGPUMCSubtargetInfo);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCInstPrinter(TheAMDGPUTarget, createAMDGPUMCInstPrinter);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget, createAMDGPUMCCodeEmitter);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCAsmBackend(TheAMDGPUTarget, createAMDGPUAsmBackend);
|
|
||||||
|
|
||||||
TargetRegistry::RegisterMCObjectStreamer(TheAMDGPUTarget, createMCStreamer);
|
|
||||||
}
|
|
||||||
|
|
@ -1,51 +0,0 @@
|
||||||
//===-- AMDGPUMCTargetDesc.h - AMDGPU Target Descriptions -----*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file provides AMDGPU specific target descriptions.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
|
|
||||||
#ifndef AMDGPUMCTARGETDESC_H
|
|
||||||
#define AMDGPUMCTARGETDESC_H
|
|
||||||
|
|
||||||
#include "llvm/ADT/StringRef.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
class MCAsmBackend;
|
|
||||||
class MCCodeEmitter;
|
|
||||||
class MCContext;
|
|
||||||
class MCInstrInfo;
|
|
||||||
class MCRegisterInfo;
|
|
||||||
class MCSubtargetInfo;
|
|
||||||
class Target;
|
|
||||||
|
|
||||||
extern Target TheAMDGPUTarget;
|
|
||||||
|
|
||||||
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
|
|
||||||
const MCSubtargetInfo &STI,
|
|
||||||
MCContext &Ctx);
|
|
||||||
|
|
||||||
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
|
||||||
const MCSubtargetInfo &STI,
|
|
||||||
MCContext &Ctx);
|
|
||||||
|
|
||||||
MCAsmBackend *createAMDGPUAsmBackend(const Target &T, StringRef TT);
|
|
||||||
} // End llvm namespace
|
|
||||||
|
|
||||||
#define GET_REGINFO_ENUM
|
|
||||||
#include "AMDGPUGenRegisterInfo.inc"
|
|
||||||
|
|
||||||
#define GET_INSTRINFO_ENUM
|
|
||||||
#include "AMDGPUGenInstrInfo.inc"
|
|
||||||
|
|
||||||
#define GET_SUBTARGETINFO_ENUM
|
|
||||||
#include "AMDGPUGenSubtargetInfo.inc"
|
|
||||||
|
|
||||||
#endif // AMDGPUMCTARGETDESC_H
|
|
||||||
|
|
@ -1,727 +0,0 @@
|
||||||
//===- R600MCCodeEmitter.cpp - Code Emitter for R600->Cayman GPU families -===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This code emitters outputs bytecode that is understood by the r600g driver
|
|
||||||
// in the Mesa [1] project. The bytecode is very similar to the hardware's ISA,
|
|
||||||
// except that the size of the instruction fields are rounded up to the
|
|
||||||
// nearest byte.
|
|
||||||
//
|
|
||||||
// [1] http://www.mesa3d.org/
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "R600Defines.h"
|
|
||||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
||||||
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
|
||||||
#include "llvm/MC/MCCodeEmitter.h"
|
|
||||||
#include "llvm/MC/MCContext.h"
|
|
||||||
#include "llvm/MC/MCInst.h"
|
|
||||||
#include "llvm/MC/MCInstrInfo.h"
|
|
||||||
#include "llvm/MC/MCRegisterInfo.h"
|
|
||||||
#include "llvm/MC/MCSubtargetInfo.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#define SRC_BYTE_COUNT 11
|
|
||||||
#define DST_BYTE_COUNT 5
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
|
|
||||||
R600MCCodeEmitter(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
|
|
||||||
void operator=(const R600MCCodeEmitter &); // DO NOT IMPLEMENT
|
|
||||||
const MCInstrInfo &MCII;
|
|
||||||
const MCSubtargetInfo &STI;
|
|
||||||
MCContext &Ctx;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
|
|
||||||
MCContext &ctx)
|
|
||||||
: MCII(mcii), STI(sti), Ctx(ctx) { }
|
|
||||||
|
|
||||||
/// EncodeInstruction - Encode the instruction and write it to the OS.
|
|
||||||
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
|
||||||
|
|
||||||
/// getMachineOpValue - Reutrn the encoding for an MCOperand.
|
|
||||||
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
|
||||||
private:
|
|
||||||
|
|
||||||
void EmitALUInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
|
|
||||||
raw_ostream &OS) const;
|
|
||||||
void EmitSrc(const MCInst &MI, unsigned OpIdx, raw_ostream &OS) const;
|
|
||||||
void EmitSrcISA(const MCInst &MI, unsigned OpIdx, uint64_t &Value,
|
|
||||||
raw_ostream &OS) const;
|
|
||||||
void EmitDst(const MCInst &MI, raw_ostream &OS) const;
|
|
||||||
void EmitTexInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
|
|
||||||
raw_ostream &OS) const;
|
|
||||||
void EmitFCInstr(const MCInst &MI, raw_ostream &OS) const;
|
|
||||||
|
|
||||||
void EmitNullBytes(unsigned int byteCount, raw_ostream &OS) const;
|
|
||||||
|
|
||||||
void EmitByte(unsigned int byte, raw_ostream &OS) const;
|
|
||||||
|
|
||||||
void EmitTwoBytes(uint32_t bytes, raw_ostream &OS) const;
|
|
||||||
|
|
||||||
void Emit(uint32_t value, raw_ostream &OS) const;
|
|
||||||
void Emit(uint64_t value, raw_ostream &OS) const;
|
|
||||||
|
|
||||||
unsigned getHWRegIndex(unsigned reg) const;
|
|
||||||
unsigned getHWRegChan(unsigned reg) const;
|
|
||||||
unsigned getHWReg(unsigned regNo) const;
|
|
||||||
|
|
||||||
bool isFCOp(unsigned opcode) const;
|
|
||||||
bool isTexOp(unsigned opcode) const;
|
|
||||||
bool isFlagSet(const MCInst &MI, unsigned Operand, unsigned Flag) const;
|
|
||||||
|
|
||||||
/// getHWRegIndexGen - Get the register's hardware index. Implemented in
|
|
||||||
/// R600HwRegInfo.include.
|
|
||||||
unsigned getHWRegIndexGen(unsigned int Reg) const;
|
|
||||||
|
|
||||||
/// getHWRegChanGen - Get the register's channel. Implemented in
|
|
||||||
/// R600HwRegInfo.include.
|
|
||||||
unsigned getHWRegChanGen(unsigned int Reg) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
enum RegElement {
|
|
||||||
ELEMENT_X = 0,
|
|
||||||
ELEMENT_Y,
|
|
||||||
ELEMENT_Z,
|
|
||||||
ELEMENT_W
|
|
||||||
};
|
|
||||||
|
|
||||||
enum InstrTypes {
|
|
||||||
INSTR_ALU = 0,
|
|
||||||
INSTR_TEX,
|
|
||||||
INSTR_FC,
|
|
||||||
INSTR_NATIVE,
|
|
||||||
INSTR_VTX
|
|
||||||
};
|
|
||||||
|
|
||||||
enum FCInstr {
|
|
||||||
FC_IF = 0,
|
|
||||||
FC_IF_INT,
|
|
||||||
FC_ELSE,
|
|
||||||
FC_ENDIF,
|
|
||||||
FC_BGNLOOP,
|
|
||||||
FC_ENDLOOP,
|
|
||||||
FC_BREAK,
|
|
||||||
FC_BREAK_NZ_INT,
|
|
||||||
FC_CONTINUE,
|
|
||||||
FC_BREAK_Z_INT,
|
|
||||||
FC_BREAK_NZ
|
|
||||||
};
|
|
||||||
|
|
||||||
enum TextureTypes {
|
|
||||||
TEXTURE_1D = 1,
|
|
||||||
TEXTURE_2D,
|
|
||||||
TEXTURE_3D,
|
|
||||||
TEXTURE_CUBE,
|
|
||||||
TEXTURE_RECT,
|
|
||||||
TEXTURE_SHADOW1D,
|
|
||||||
TEXTURE_SHADOW2D,
|
|
||||||
TEXTURE_SHADOWRECT,
|
|
||||||
TEXTURE_1D_ARRAY,
|
|
||||||
TEXTURE_2D_ARRAY,
|
|
||||||
TEXTURE_SHADOW1D_ARRAY,
|
|
||||||
TEXTURE_SHADOW2D_ARRAY
|
|
||||||
};
|
|
||||||
|
|
||||||
MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
|
|
||||||
const MCSubtargetInfo &STI,
|
|
||||||
MCContext &Ctx) {
|
|
||||||
return new R600MCCodeEmitter(MCII, STI, Ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
if (isTexOp(MI.getOpcode())) {
|
|
||||||
EmitTexInstr(MI, Fixups, OS);
|
|
||||||
} else if (isFCOp(MI.getOpcode())){
|
|
||||||
EmitFCInstr(MI, OS);
|
|
||||||
} else if (MI.getOpcode() == AMDGPU::RETURN ||
|
|
||||||
MI.getOpcode() == AMDGPU::BUNDLE ||
|
|
||||||
MI.getOpcode() == AMDGPU::KILL) {
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
switch(MI.getOpcode()) {
|
|
||||||
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
|
|
||||||
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
|
|
||||||
{
|
|
||||||
uint64_t inst = getBinaryCodeForInstr(MI, Fixups);
|
|
||||||
EmitByte(INSTR_NATIVE, OS);
|
|
||||||
Emit(inst, OS);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case AMDGPU::CONSTANT_LOAD_eg:
|
|
||||||
case AMDGPU::VTX_READ_PARAM_i32_eg:
|
|
||||||
case AMDGPU::VTX_READ_PARAM_f32_eg:
|
|
||||||
case AMDGPU::VTX_READ_GLOBAL_i8_eg:
|
|
||||||
case AMDGPU::VTX_READ_GLOBAL_i32_eg:
|
|
||||||
case AMDGPU::VTX_READ_GLOBAL_f32_eg:
|
|
||||||
case AMDGPU::VTX_READ_GLOBAL_v4i32_eg:
|
|
||||||
case AMDGPU::VTX_READ_GLOBAL_v4f32_eg:
|
|
||||||
{
|
|
||||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
|
|
||||||
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
|
|
||||||
|
|
||||||
EmitByte(INSTR_VTX, OS);
|
|
||||||
Emit(InstWord01, OS);
|
|
||||||
Emit(InstWord2, OS);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
EmitALUInstr(MI, Fixups, OS);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitALUInstr(const MCInst &MI,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups,
|
|
||||||
raw_ostream &OS) const {
|
|
||||||
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
|
|
||||||
unsigned NumOperands = MI.getNumOperands();
|
|
||||||
|
|
||||||
if(MCDesc.findFirstPredOperandIdx() > -1)
|
|
||||||
NumOperands--;
|
|
||||||
|
|
||||||
if (GET_FLAG_OPERAND_IDX(MCDesc.TSFlags) != 0)
|
|
||||||
NumOperands--;
|
|
||||||
|
|
||||||
if(MI.getOpcode() == AMDGPU::PRED_X)
|
|
||||||
NumOperands = 2;
|
|
||||||
|
|
||||||
// XXX Check if instruction writes a result
|
|
||||||
if (NumOperands < 1) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit instruction type
|
|
||||||
EmitByte(INSTR_ALU, OS);
|
|
||||||
|
|
||||||
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups);
|
|
||||||
|
|
||||||
//older alu have different encoding for instructions with one or two src
|
|
||||||
//parameters.
|
|
||||||
if ((STI.getFeatureBits() & AMDGPU::FeatureR600ALUInst) &&
|
|
||||||
!(MCDesc.TSFlags & R600_InstFlag::OP3)) {
|
|
||||||
uint64_t ISAOpCode = InstWord01 & (0x3FFULL << 39);
|
|
||||||
InstWord01 &= ~(0x3FFULL << 39);
|
|
||||||
InstWord01 |= ISAOpCode << 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned int OpIndex;
|
|
||||||
for (OpIndex = 1; OpIndex < NumOperands; OpIndex++) {
|
|
||||||
// Literal constants are always stored as the last operand.
|
|
||||||
if (MI.getOperand(OpIndex).isImm() || MI.getOperand(OpIndex).isFPImm()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
EmitSrcISA(MI, OpIndex, InstWord01, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit zeros for unused sources
|
|
||||||
for ( ; OpIndex < 4; OpIndex++) {
|
|
||||||
EmitNullBytes(SRC_BYTE_COUNT - 6, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit destination register
|
|
||||||
const MCOperand &dstOp = MI.getOperand(0);
|
|
||||||
if (dstOp.isReg() && dstOp.getReg() != AMDGPU::PREDICATE_BIT) {
|
|
||||||
//element of destination register
|
|
||||||
InstWord01 |= uint64_t(getHWRegChan(dstOp.getReg())) << 61;
|
|
||||||
|
|
||||||
// isClamped
|
|
||||||
if (isFlagSet(MI, 0, MO_FLAG_CLAMP)) {
|
|
||||||
InstWord01 |= 1ULL << 63;
|
|
||||||
}
|
|
||||||
|
|
||||||
// write mask
|
|
||||||
if (!isFlagSet(MI, 0, MO_FLAG_MASK) && NumOperands < 4) {
|
|
||||||
InstWord01 |= 1ULL << 36;
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Emit relative addressing mode
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit ALU
|
|
||||||
|
|
||||||
// Emit IsLast (for this instruction group) (1 byte)
|
|
||||||
if (!isFlagSet(MI, 0, MO_FLAG_NOT_LAST)) {
|
|
||||||
InstWord01 |= 1ULL << 31;
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Emit push modifier
|
|
||||||
if(isFlagSet(MI, 1, MO_FLAG_PUSH)) {
|
|
||||||
InstWord01 |= 1ULL << 34;
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Emit predicate (1 byte)
|
|
||||||
int PredIdx = MCDesc.findFirstPredOperandIdx();
|
|
||||||
if (PredIdx != -1) {
|
|
||||||
switch(MI.getOperand(PredIdx).getReg()) {
|
|
||||||
case AMDGPU::PRED_SEL_ZERO:
|
|
||||||
InstWord01 |= 2ULL << 29;
|
|
||||||
break;
|
|
||||||
case AMDGPU::PRED_SEL_ONE:
|
|
||||||
InstWord01 |= 3ULL << 29;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//XXX: predicate
|
|
||||||
//XXX: bank swizzle
|
|
||||||
//XXX: OMOD
|
|
||||||
//XXX: index mode
|
|
||||||
|
|
||||||
Emit(InstWord01, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitSrc(const MCInst &MI, unsigned OpIdx,
|
|
||||||
raw_ostream &OS) const {
|
|
||||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
|
||||||
union {
|
|
||||||
float f;
|
|
||||||
uint32_t i;
|
|
||||||
} Value;
|
|
||||||
Value.i = 0;
|
|
||||||
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
|
||||||
// For other potential instruction operands, (e.g. constant registers) the
|
|
||||||
// value of the source select is defined in the r600isa docs.
|
|
||||||
if (MO.isReg()) {
|
|
||||||
unsigned reg = MO.getReg();
|
|
||||||
EmitTwoBytes(getHWReg(reg), OS);
|
|
||||||
if (reg == AMDGPU::ALU_LITERAL_X) {
|
|
||||||
unsigned ImmOpIndex = MI.getNumOperands() - 1;
|
|
||||||
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
|
|
||||||
if (ImmOp.isFPImm()) {
|
|
||||||
Value.f = ImmOp.getFPImm();
|
|
||||||
} else {
|
|
||||||
assert(ImmOp.isImm());
|
|
||||||
Value.i = ImmOp.getImm();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// XXX: Handle other operand types.
|
|
||||||
EmitTwoBytes(0, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit the source channel (1 byte)
|
|
||||||
if (MO.isReg()) {
|
|
||||||
EmitByte(getHWRegChan(MO.getReg()), OS);
|
|
||||||
} else {
|
|
||||||
EmitByte(0, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Emit isNegated (1 byte)
|
|
||||||
if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
|
|
||||||
&& (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
|
|
||||||
(MO.isReg() &&
|
|
||||||
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
|
|
||||||
EmitByte(1, OS);
|
|
||||||
} else {
|
|
||||||
EmitByte(0, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit isAbsolute (1 byte)
|
|
||||||
if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
|
|
||||||
EmitByte(1, OS);
|
|
||||||
} else {
|
|
||||||
EmitByte(0, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Emit relative addressing mode (1 byte)
|
|
||||||
EmitByte(0, OS);
|
|
||||||
|
|
||||||
// Emit kc_bank, This will be adjusted later by r600_asm
|
|
||||||
EmitByte(0, OS);
|
|
||||||
|
|
||||||
// Emit the literal value, if applicable (4 bytes).
|
|
||||||
Emit(Value.i, OS);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitSrcISA(const MCInst &MI, unsigned OpIdx,
|
|
||||||
uint64_t &Value, raw_ostream &OS) const {
|
|
||||||
const MCOperand &MO = MI.getOperand(OpIdx);
|
|
||||||
union {
|
|
||||||
float f;
|
|
||||||
uint32_t i;
|
|
||||||
} InlineConstant;
|
|
||||||
InlineConstant.i = 0;
|
|
||||||
// Emit the source select (2 bytes). For GPRs, this is the register index.
|
|
||||||
// For other potential instruction operands, (e.g. constant registers) the
|
|
||||||
// value of the source select is defined in the r600isa docs.
|
|
||||||
if (MO.isReg()) {
|
|
||||||
unsigned Reg = MO.getReg();
|
|
||||||
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(Reg)) {
|
|
||||||
EmitByte(1, OS);
|
|
||||||
} else {
|
|
||||||
EmitByte(0, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Reg == AMDGPU::ALU_LITERAL_X) {
|
|
||||||
unsigned ImmOpIndex = MI.getNumOperands() - 1;
|
|
||||||
MCOperand ImmOp = MI.getOperand(ImmOpIndex);
|
|
||||||
if (ImmOp.isFPImm()) {
|
|
||||||
InlineConstant.f = ImmOp.getFPImm();
|
|
||||||
} else {
|
|
||||||
assert(ImmOp.isImm());
|
|
||||||
InlineConstant.i = ImmOp.getImm();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// XXX: Handle other operand types.
|
|
||||||
EmitTwoBytes(0, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// source channel
|
|
||||||
uint64_t sourceChannelValue = getHWRegChan(MO.getReg());
|
|
||||||
if (OpIdx == 1)
|
|
||||||
Value |= sourceChannelValue << 10;
|
|
||||||
if (OpIdx == 2)
|
|
||||||
Value |= sourceChannelValue << 23;
|
|
||||||
if (OpIdx == 3)
|
|
||||||
Value |= sourceChannelValue << 42;
|
|
||||||
|
|
||||||
// isNegated
|
|
||||||
if ((!(isFlagSet(MI, OpIdx, MO_FLAG_ABS)))
|
|
||||||
&& (isFlagSet(MI, OpIdx, MO_FLAG_NEG) ||
|
|
||||||
(MO.isReg() &&
|
|
||||||
(MO.getReg() == AMDGPU::NEG_ONE || MO.getReg() == AMDGPU::NEG_HALF)))){
|
|
||||||
if (OpIdx == 1)
|
|
||||||
Value |= 1ULL << 12;
|
|
||||||
else if (OpIdx == 2)
|
|
||||||
Value |= 1ULL << 25;
|
|
||||||
else if (OpIdx == 3)
|
|
||||||
Value |= 1ULL << 44;
|
|
||||||
}
|
|
||||||
|
|
||||||
// isAbsolute
|
|
||||||
if (isFlagSet(MI, OpIdx, MO_FLAG_ABS)) {
|
|
||||||
assert(OpIdx < 3);
|
|
||||||
Value |= 1ULL << (32+OpIdx-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: relative addressing mode
|
|
||||||
// XXX: kc_bank
|
|
||||||
|
|
||||||
// Emit the literal value, if applicable (4 bytes).
|
|
||||||
Emit(InlineConstant.i, OS);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitTexInstr(const MCInst &MI,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups,
|
|
||||||
raw_ostream &OS) const {
|
|
||||||
|
|
||||||
unsigned opcode = MI.getOpcode();
|
|
||||||
bool hasOffsets = (opcode == AMDGPU::TEX_LD);
|
|
||||||
unsigned op_offset = hasOffsets ? 3 : 0;
|
|
||||||
int64_t sampler = MI.getOperand(op_offset+2).getImm();
|
|
||||||
int64_t textureType = MI.getOperand(op_offset+3).getImm();
|
|
||||||
unsigned srcSelect[4] = {0, 1, 2, 3};
|
|
||||||
|
|
||||||
// Emit instruction type
|
|
||||||
EmitByte(1, OS);
|
|
||||||
|
|
||||||
// Emit instruction
|
|
||||||
EmitByte(getBinaryCodeForInstr(MI, Fixups), OS);
|
|
||||||
|
|
||||||
// XXX: Emit resource id r600_shader.c uses sampler + 1. Why?
|
|
||||||
EmitByte(sampler + 1 + 1, OS);
|
|
||||||
|
|
||||||
// Emit source register
|
|
||||||
EmitByte(getHWReg(MI.getOperand(1).getReg()), OS);
|
|
||||||
|
|
||||||
// XXX: Emit src isRelativeAddress
|
|
||||||
EmitByte(0, OS);
|
|
||||||
|
|
||||||
// Emit destination register
|
|
||||||
EmitByte(getHWReg(MI.getOperand(0).getReg()), OS);
|
|
||||||
|
|
||||||
// XXX: Emit dst isRealtiveAddress
|
|
||||||
EmitByte(0, OS);
|
|
||||||
|
|
||||||
// XXX: Emit dst select
|
|
||||||
EmitByte(0, OS); // X
|
|
||||||
EmitByte(1, OS); // Y
|
|
||||||
EmitByte(2, OS); // Z
|
|
||||||
EmitByte(3, OS); // W
|
|
||||||
|
|
||||||
// XXX: Emit lod bias
|
|
||||||
EmitByte(0, OS);
|
|
||||||
|
|
||||||
// XXX: Emit coord types
|
|
||||||
unsigned coordType[4] = {1, 1, 1, 1};
|
|
||||||
|
|
||||||
if (textureType == TEXTURE_RECT
|
|
||||||
|| textureType == TEXTURE_SHADOWRECT) {
|
|
||||||
coordType[ELEMENT_X] = 0;
|
|
||||||
coordType[ELEMENT_Y] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (textureType == TEXTURE_1D_ARRAY
|
|
||||||
|| textureType == TEXTURE_SHADOW1D_ARRAY) {
|
|
||||||
if (opcode == AMDGPU::TEX_SAMPLE_C_L || opcode == AMDGPU::TEX_SAMPLE_C_LB) {
|
|
||||||
coordType[ELEMENT_Y] = 0;
|
|
||||||
} else {
|
|
||||||
coordType[ELEMENT_Z] = 0;
|
|
||||||
srcSelect[ELEMENT_Z] = ELEMENT_Y;
|
|
||||||
}
|
|
||||||
} else if (textureType == TEXTURE_2D_ARRAY
|
|
||||||
|| textureType == TEXTURE_SHADOW2D_ARRAY) {
|
|
||||||
coordType[ELEMENT_Z] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
|
||||||
EmitByte(coordType[i], OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX: Emit offsets
|
|
||||||
if (hasOffsets)
|
|
||||||
for (unsigned i = 2; i < 5; i++)
|
|
||||||
EmitByte(MI.getOperand(i).getImm()<<1, OS);
|
|
||||||
else
|
|
||||||
EmitNullBytes(3, OS);
|
|
||||||
|
|
||||||
// Emit sampler id
|
|
||||||
EmitByte(sampler, OS);
|
|
||||||
|
|
||||||
// XXX:Emit source select
|
|
||||||
if ((textureType == TEXTURE_SHADOW1D
|
|
||||||
|| textureType == TEXTURE_SHADOW2D
|
|
||||||
|| textureType == TEXTURE_SHADOWRECT
|
|
||||||
|| textureType == TEXTURE_SHADOW1D_ARRAY)
|
|
||||||
&& opcode != AMDGPU::TEX_SAMPLE_C_L
|
|
||||||
&& opcode != AMDGPU::TEX_SAMPLE_C_LB) {
|
|
||||||
srcSelect[ELEMENT_W] = ELEMENT_Z;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
|
||||||
EmitByte(srcSelect[i], OS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitFCInstr(const MCInst &MI, raw_ostream &OS) const {
|
|
||||||
|
|
||||||
// Emit instruction type
|
|
||||||
EmitByte(INSTR_FC, OS);
|
|
||||||
|
|
||||||
// Emit SRC
|
|
||||||
unsigned NumOperands = MI.getNumOperands();
|
|
||||||
if (NumOperands > 0) {
|
|
||||||
assert(NumOperands == 1);
|
|
||||||
EmitSrc(MI, 0, OS);
|
|
||||||
} else {
|
|
||||||
EmitNullBytes(SRC_BYTE_COUNT, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit FC Instruction
|
|
||||||
enum FCInstr instr;
|
|
||||||
switch (MI.getOpcode()) {
|
|
||||||
case AMDGPU::BREAK_LOGICALZ_f32:
|
|
||||||
instr = FC_BREAK;
|
|
||||||
break;
|
|
||||||
case AMDGPU::BREAK_LOGICALNZ_f32:
|
|
||||||
instr = FC_BREAK_NZ;
|
|
||||||
break;
|
|
||||||
case AMDGPU::BREAK_LOGICALNZ_i32:
|
|
||||||
instr = FC_BREAK_NZ_INT;
|
|
||||||
break;
|
|
||||||
case AMDGPU::BREAK_LOGICALZ_i32:
|
|
||||||
instr = FC_BREAK_Z_INT;
|
|
||||||
break;
|
|
||||||
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
|
||||||
case AMDGPU::CONTINUE_LOGICALNZ_i32:
|
|
||||||
instr = FC_CONTINUE;
|
|
||||||
break;
|
|
||||||
case AMDGPU::IF_LOGICALNZ_f32:
|
|
||||||
instr = FC_IF;
|
|
||||||
case AMDGPU::IF_LOGICALNZ_i32:
|
|
||||||
instr = FC_IF_INT;
|
|
||||||
break;
|
|
||||||
case AMDGPU::IF_LOGICALZ_f32:
|
|
||||||
abort();
|
|
||||||
break;
|
|
||||||
case AMDGPU::ELSE:
|
|
||||||
instr = FC_ELSE;
|
|
||||||
break;
|
|
||||||
case AMDGPU::ENDIF:
|
|
||||||
instr = FC_ENDIF;
|
|
||||||
break;
|
|
||||||
case AMDGPU::ENDLOOP:
|
|
||||||
instr = FC_ENDLOOP;
|
|
||||||
break;
|
|
||||||
case AMDGPU::WHILELOOP:
|
|
||||||
instr = FC_BGNLOOP;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
abort();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
EmitByte(instr, OS);
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitNullBytes(unsigned int ByteCount,
|
|
||||||
raw_ostream &OS) const {
|
|
||||||
|
|
||||||
for (unsigned int i = 0; i < ByteCount; i++) {
|
|
||||||
EmitByte(0, OS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitByte(unsigned int Byte, raw_ostream &OS) const {
|
|
||||||
OS.write((uint8_t) Byte & 0xff);
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::EmitTwoBytes(unsigned int Bytes,
|
|
||||||
raw_ostream &OS) const {
|
|
||||||
OS.write((uint8_t) (Bytes & 0xff));
|
|
||||||
OS.write((uint8_t) ((Bytes >> 8) & 0xff));
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
|
||||||
OS.write((uint8_t) ((Value >> (8 * i)) & 0xff));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
|
|
||||||
for (unsigned i = 0; i < 8; i++) {
|
|
||||||
EmitByte((Value >> (8 * i)) & 0xff, OS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned R600MCCodeEmitter::getHWRegIndex(unsigned reg) const {
|
|
||||||
switch(reg) {
|
|
||||||
case AMDGPU::ZERO: return 248;
|
|
||||||
case AMDGPU::ONE:
|
|
||||||
case AMDGPU::NEG_ONE: return 249;
|
|
||||||
case AMDGPU::ONE_INT: return 250;
|
|
||||||
case AMDGPU::HALF:
|
|
||||||
case AMDGPU::NEG_HALF: return 252;
|
|
||||||
case AMDGPU::ALU_LITERAL_X: return 253;
|
|
||||||
case AMDGPU::PREDICATE_BIT:
|
|
||||||
case AMDGPU::PRED_SEL_OFF:
|
|
||||||
case AMDGPU::PRED_SEL_ZERO:
|
|
||||||
case AMDGPU::PRED_SEL_ONE:
|
|
||||||
return 0;
|
|
||||||
default: return getHWRegIndexGen(reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned R600MCCodeEmitter::getHWRegChan(unsigned reg) const {
|
|
||||||
switch(reg) {
|
|
||||||
case AMDGPU::ZERO:
|
|
||||||
case AMDGPU::ONE:
|
|
||||||
case AMDGPU::ONE_INT:
|
|
||||||
case AMDGPU::NEG_ONE:
|
|
||||||
case AMDGPU::HALF:
|
|
||||||
case AMDGPU::NEG_HALF:
|
|
||||||
case AMDGPU::ALU_LITERAL_X:
|
|
||||||
case AMDGPU::PREDICATE_BIT:
|
|
||||||
case AMDGPU::PRED_SEL_OFF:
|
|
||||||
case AMDGPU::PRED_SEL_ZERO:
|
|
||||||
case AMDGPU::PRED_SEL_ONE:
|
|
||||||
return 0;
|
|
||||||
default: return getHWRegChanGen(reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
|
|
||||||
unsigned HWReg;
|
|
||||||
|
|
||||||
HWReg = getHWRegIndex(RegNo);
|
|
||||||
if (AMDGPUMCRegisterClasses[AMDGPU::R600_CReg32RegClassID].contains(RegNo)) {
|
|
||||||
HWReg += 512;
|
|
||||||
}
|
|
||||||
return HWReg;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
|
||||||
const MCOperand &MO,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
|
||||||
if (MO.isReg()) {
|
|
||||||
return getHWRegIndex(MO.getReg());
|
|
||||||
} else if (MO.isImm()) {
|
|
||||||
return MO.getImm();
|
|
||||||
} else {
|
|
||||||
assert(0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Encoding helper functions
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
bool R600MCCodeEmitter::isFCOp(unsigned opcode) const {
|
|
||||||
switch(opcode) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::BREAK_LOGICALZ_f32:
|
|
||||||
case AMDGPU::BREAK_LOGICALNZ_i32:
|
|
||||||
case AMDGPU::BREAK_LOGICALZ_i32:
|
|
||||||
case AMDGPU::BREAK_LOGICALNZ_f32:
|
|
||||||
case AMDGPU::CONTINUE_LOGICALNZ_f32:
|
|
||||||
case AMDGPU::IF_LOGICALNZ_i32:
|
|
||||||
case AMDGPU::IF_LOGICALZ_f32:
|
|
||||||
case AMDGPU::ELSE:
|
|
||||||
case AMDGPU::ENDIF:
|
|
||||||
case AMDGPU::ENDLOOP:
|
|
||||||
case AMDGPU::IF_LOGICALNZ_f32:
|
|
||||||
case AMDGPU::WHILELOOP:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600MCCodeEmitter::isTexOp(unsigned opcode) const {
|
|
||||||
switch(opcode) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::TEX_LD:
|
|
||||||
case AMDGPU::TEX_GET_TEXTURE_RESINFO:
|
|
||||||
case AMDGPU::TEX_SAMPLE:
|
|
||||||
case AMDGPU::TEX_SAMPLE_C:
|
|
||||||
case AMDGPU::TEX_SAMPLE_L:
|
|
||||||
case AMDGPU::TEX_SAMPLE_C_L:
|
|
||||||
case AMDGPU::TEX_SAMPLE_LB:
|
|
||||||
case AMDGPU::TEX_SAMPLE_C_LB:
|
|
||||||
case AMDGPU::TEX_SAMPLE_G:
|
|
||||||
case AMDGPU::TEX_SAMPLE_C_G:
|
|
||||||
case AMDGPU::TEX_GET_GRADIENTS_H:
|
|
||||||
case AMDGPU::TEX_GET_GRADIENTS_V:
|
|
||||||
case AMDGPU::TEX_SET_GRADIENTS_H:
|
|
||||||
case AMDGPU::TEX_SET_GRADIENTS_V:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600MCCodeEmitter::isFlagSet(const MCInst &MI, unsigned Operand,
|
|
||||||
unsigned Flag) const {
|
|
||||||
const MCInstrDesc &MCDesc = MCII.get(MI.getOpcode());
|
|
||||||
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(MCDesc.TSFlags);
|
|
||||||
if (FlagIndex == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
assert(MI.getOperand(FlagIndex).isImm());
|
|
||||||
return !!((MI.getOperand(FlagIndex).getImm() >>
|
|
||||||
(NUM_MO_FLAGS * Operand)) & Flag);
|
|
||||||
}
|
|
||||||
#define R600RegisterInfo R600MCCodeEmitter
|
|
||||||
#include "R600HwRegInfo.include"
|
|
||||||
#undef R600RegisterInfo
|
|
||||||
|
|
||||||
#include "AMDGPUGenMCCodeEmitter.inc"
|
|
||||||
|
|
@ -1,296 +0,0 @@
|
||||||
//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// The SI code emitter produces machine code that can be executed directly on
|
|
||||||
// the GPU device.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
||||||
#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
|
|
||||||
#include "llvm/MC/MCCodeEmitter.h"
|
|
||||||
#include "llvm/MC/MCContext.h"
|
|
||||||
#include "llvm/MC/MCInst.h"
|
|
||||||
#include "llvm/MC/MCInstrInfo.h"
|
|
||||||
#include "llvm/MC/MCRegisterInfo.h"
|
|
||||||
#include "llvm/MC/MCSubtargetInfo.h"
|
|
||||||
#include "llvm/Support/raw_ostream.h"
|
|
||||||
|
|
||||||
#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
|
|
||||||
#define SI_INSTR_FLAGS_ENCODING_MASK 0xf
|
|
||||||
|
|
||||||
// These must be kept in sync with SIInstructions.td and also the
|
|
||||||
// InstrEncodingInfo array in SIInstrInfo.cpp.
|
|
||||||
//
|
|
||||||
// NOTE: This enum is only used to identify the encoding type within LLVM,
|
|
||||||
// the actual encoding type that is part of the instruction format is different
|
|
||||||
namespace SIInstrEncodingType {
|
|
||||||
enum Encoding {
|
|
||||||
EXP = 0,
|
|
||||||
LDS = 1,
|
|
||||||
MIMG = 2,
|
|
||||||
MTBUF = 3,
|
|
||||||
MUBUF = 4,
|
|
||||||
SMRD = 5,
|
|
||||||
SOP1 = 6,
|
|
||||||
SOP2 = 7,
|
|
||||||
SOPC = 8,
|
|
||||||
SOPK = 9,
|
|
||||||
SOPP = 10,
|
|
||||||
VINTRP = 11,
|
|
||||||
VOP1 = 12,
|
|
||||||
VOP2 = 13,
|
|
||||||
VOP3 = 14,
|
|
||||||
VOPC = 15
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
|
|
||||||
SIMCCodeEmitter(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
|
||||||
void operator=(const SIMCCodeEmitter &); // DO NOT IMPLEMENT
|
|
||||||
const MCInstrInfo &MCII;
|
|
||||||
const MCSubtargetInfo &STI;
|
|
||||||
MCContext &Ctx;
|
|
||||||
|
|
||||||
public:
|
|
||||||
SIMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
|
|
||||||
MCContext &ctx)
|
|
||||||
: MCII(mcii), STI(sti), Ctx(ctx) { }
|
|
||||||
|
|
||||||
~SIMCCodeEmitter() { }
|
|
||||||
|
|
||||||
/// EncodeInstruction - Encode the instruction and write it to the OS.
|
|
||||||
virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
|
||||||
|
|
||||||
/// getMachineOpValue - Reutrn the encoding for an MCOperand.
|
|
||||||
virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
/// GPRAlign - Encode a sequence of registers with the correct alignment.
|
|
||||||
unsigned GPRAlign(const MCInst &MI, unsigned OpNo, unsigned shift) const;
|
|
||||||
|
|
||||||
/// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
|
|
||||||
virtual unsigned GPR2AlignEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
|
||||||
|
|
||||||
/// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
|
|
||||||
virtual unsigned GPR4AlignEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
|
||||||
|
|
||||||
/// SMRDmemriEncode - Encoding for SMRD indexed loads
|
|
||||||
virtual uint32_t SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const;
|
|
||||||
|
|
||||||
/// VOPPostEncode - Post-Encoder method for VOP instructions
|
|
||||||
virtual uint64_t VOPPostEncode(const MCInst &MI, uint64_t Value) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
///getEncodingType = Return this SIInstrEncodingType for this instruction.
|
|
||||||
unsigned getEncodingType(const MCInst &MI) const;
|
|
||||||
|
|
||||||
///getEncodingBytes - Get then size in bytes of this instructions encoding.
|
|
||||||
unsigned getEncodingBytes(const MCInst &MI) const;
|
|
||||||
|
|
||||||
/// getRegBinaryCode - Returns the hardware encoding for a register
|
|
||||||
unsigned getRegBinaryCode(unsigned reg) const;
|
|
||||||
|
|
||||||
/// getHWRegNum - Generated function that returns the hardware encoding for
|
|
||||||
/// a register
|
|
||||||
unsigned getHWRegNum(unsigned reg) const;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
|
|
||||||
const MCSubtargetInfo &STI,
|
|
||||||
MCContext &Ctx) {
|
|
||||||
return new SIMCCodeEmitter(MCII, STI, Ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SIMCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups);
|
|
||||||
unsigned bytes = getEncodingBytes(MI);
|
|
||||||
for (unsigned i = 0; i < bytes; i++) {
|
|
||||||
OS.write((uint8_t) ((Encoding >> (8 * i)) & 0xff));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
|
|
||||||
const MCOperand &MO,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixups) const {
|
|
||||||
if (MO.isReg()) {
|
|
||||||
return getRegBinaryCode(MO.getReg());
|
|
||||||
} else if (MO.isImm()) {
|
|
||||||
return MO.getImm();
|
|
||||||
} else if (MO.isFPImm()) {
|
|
||||||
// XXX: Not all instructions can use inline literals
|
|
||||||
// XXX: We should make sure this is a 32-bit constant
|
|
||||||
union {
|
|
||||||
float F;
|
|
||||||
uint32_t I;
|
|
||||||
} Imm;
|
|
||||||
Imm.F = MO.getFPImm();
|
|
||||||
return Imm.I;
|
|
||||||
} else{
|
|
||||||
llvm_unreachable("Encoding of this operand type is not supported yet.");
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Custom Operand Encodings
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
unsigned SIMCCodeEmitter::GPRAlign(const MCInst &MI, unsigned OpNo,
|
|
||||||
unsigned shift) const {
|
|
||||||
unsigned regCode = getRegBinaryCode(MI.getOperand(OpNo).getReg());
|
|
||||||
return regCode >> shift;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
unsigned SIMCCodeEmitter::GPR2AlignEncode(const MCInst &MI,
|
|
||||||
unsigned OpNo ,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
|
||||||
return GPRAlign(MI, OpNo, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned SIMCCodeEmitter::GPR4AlignEncode(const MCInst &MI,
|
|
||||||
unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
|
||||||
return GPRAlign(MI, OpNo, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SMRD_OFFSET_MASK 0xff
|
|
||||||
#define SMRD_IMM_SHIFT 8
|
|
||||||
#define SMRD_SBASE_MASK 0x3f
|
|
||||||
#define SMRD_SBASE_SHIFT 9
|
|
||||||
/// SMRDmemriEncode - This function is responsibe for encoding the offset
|
|
||||||
/// and the base ptr for SMRD instructions it should return a bit string in
|
|
||||||
/// this format:
|
|
||||||
///
|
|
||||||
/// OFFSET = bits{7-0}
|
|
||||||
/// IMM = bits{8}
|
|
||||||
/// SBASE = bits{14-9}
|
|
||||||
///
|
|
||||||
uint32_t SIMCCodeEmitter::SMRDmemriEncode(const MCInst &MI, unsigned OpNo,
|
|
||||||
SmallVectorImpl<MCFixup> &Fixup) const {
|
|
||||||
uint32_t Encoding;
|
|
||||||
|
|
||||||
const MCOperand &OffsetOp = MI.getOperand(OpNo + 1);
|
|
||||||
|
|
||||||
//XXX: Use this function for SMRD loads with register offsets
|
|
||||||
assert(OffsetOp.isImm());
|
|
||||||
|
|
||||||
Encoding =
|
|
||||||
(getMachineOpValue(MI, OffsetOp, Fixup) & SMRD_OFFSET_MASK)
|
|
||||||
| (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
|
|
||||||
| ((GPR2AlignEncode(MI, OpNo, Fixup) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
|
|
||||||
;
|
|
||||||
|
|
||||||
return Encoding;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Post Encoder Callbacks
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
uint64_t SIMCCodeEmitter::VOPPostEncode(const MCInst &MI, uint64_t Value) const{
|
|
||||||
unsigned encodingType = getEncodingType(MI);
|
|
||||||
unsigned numSrcOps;
|
|
||||||
unsigned vgprBitOffset;
|
|
||||||
|
|
||||||
if (encodingType == SIInstrEncodingType::VOP3) {
|
|
||||||
numSrcOps = 3;
|
|
||||||
vgprBitOffset = 32;
|
|
||||||
} else {
|
|
||||||
numSrcOps = 1;
|
|
||||||
vgprBitOffset = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add one to skip over the destination reg operand.
|
|
||||||
for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
|
|
||||||
const MCOperand &MO = MI.getOperand(opIdx);
|
|
||||||
if (MO.isReg()) {
|
|
||||||
unsigned reg = MI.getOperand(opIdx).getReg();
|
|
||||||
if (AMDGPUMCRegisterClasses[AMDGPU::VReg_32RegClassID].contains(reg) ||
|
|
||||||
AMDGPUMCRegisterClasses[AMDGPU::VReg_64RegClassID].contains(reg)) {
|
|
||||||
Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
|
|
||||||
}
|
|
||||||
} else if (MO.isFPImm()) {
|
|
||||||
union {
|
|
||||||
float f;
|
|
||||||
uint32_t i;
|
|
||||||
} Imm;
|
|
||||||
// XXX: Not all instructions can use inline literals
|
|
||||||
// XXX: We should make sure this is a 32-bit constant
|
|
||||||
Imm.f = MO.getFPImm();
|
|
||||||
Value |= ((uint64_t)Imm.i) << 32;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Value;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Encoding helper functions
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
unsigned SIMCCodeEmitter::getEncodingType(const MCInst &MI) const {
|
|
||||||
return MCII.get(MI.getOpcode()).TSFlags & SI_INSTR_FLAGS_ENCODING_MASK;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned SIMCCodeEmitter::getEncodingBytes(const MCInst &MI) const {
|
|
||||||
|
|
||||||
// These instructions aren't real instructions with an encoding type, so
|
|
||||||
// we need to manually specify their size.
|
|
||||||
switch (MI.getOpcode()) {
|
|
||||||
default: break;
|
|
||||||
case AMDGPU::SI_LOAD_LITERAL_I32:
|
|
||||||
case AMDGPU::SI_LOAD_LITERAL_F32:
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned encoding_type = getEncodingType(MI);
|
|
||||||
switch (encoding_type) {
|
|
||||||
case SIInstrEncodingType::EXP:
|
|
||||||
case SIInstrEncodingType::LDS:
|
|
||||||
case SIInstrEncodingType::MUBUF:
|
|
||||||
case SIInstrEncodingType::MTBUF:
|
|
||||||
case SIInstrEncodingType::MIMG:
|
|
||||||
case SIInstrEncodingType::VOP3:
|
|
||||||
return 8;
|
|
||||||
default:
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
unsigned SIMCCodeEmitter::getRegBinaryCode(unsigned reg) const {
|
|
||||||
switch (reg) {
|
|
||||||
case AMDGPU::VCC: return 106;
|
|
||||||
case AMDGPU::M0: return 124;
|
|
||||||
case AMDGPU::EXEC: return 126;
|
|
||||||
case AMDGPU::EXEC_LO: return 126;
|
|
||||||
case AMDGPU::EXEC_HI: return 127;
|
|
||||||
case AMDGPU::SREG_LIT_0: return 128;
|
|
||||||
case AMDGPU::SI_LITERAL_CONSTANT: return 255;
|
|
||||||
default: return getHWRegNum(reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define SIRegisterInfo SIMCCodeEmitter
|
|
||||||
#include "SIRegisterGetHWRegNum.inc"
|
|
||||||
#undef SIRegisterInfo
|
|
||||||
|
|
@ -8,74 +8,8 @@ LIBNAME = radeon
|
||||||
|
|
||||||
LIBRARY_INCLUDES = -I$(TOP)/include
|
LIBRARY_INCLUDES = -I$(TOP)/include
|
||||||
|
|
||||||
TBLGEN = $(LLVM_BINDIR)/llvm-tblgen
|
|
||||||
|
|
||||||
CXXFLAGS+= $(LLVM_CXXFLAGS)
|
CXXFLAGS+= $(LLVM_CXXFLAGS)
|
||||||
|
|
||||||
ifeq ($(LLVM_VERSION),3.1)
|
|
||||||
CPP_SOURCES += $(LLVM_CPP_SOURCES)
|
|
||||||
GENERATED_SOURCES = $(LLVM_GENERATED_SOURCES)
|
|
||||||
else
|
|
||||||
CXXFLAGS+= -DEXTERNAL_LLVM
|
|
||||||
endif
|
|
||||||
|
|
||||||
include ../../Makefile.template
|
include ../../Makefile.template
|
||||||
|
|
||||||
CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
|
CXXFLAGS := $(filter-out -DDEBUG, $(CXXFLAGS))
|
||||||
|
|
||||||
tablegen = $(TBLGEN) -I $(LLVM_INCLUDEDIR) $1 $2 -o $3
|
|
||||||
|
|
||||||
HAVE_LLVM_INTRINSICS = $(shell grep IntrinsicsR600.td $(LLVM_INCLUDEDIR)/llvm/Intrinsics.td)
|
|
||||||
|
|
||||||
SIRegisterInfo.td: SIGenRegisterInfo.pl
|
|
||||||
$(PERL) $^ > $@
|
|
||||||
|
|
||||||
SIRegisterGetHWRegNum.inc: SIGenRegisterInfo.pl
|
|
||||||
$(PERL) $^ $@ > /dev/null
|
|
||||||
|
|
||||||
R600Intrinsics.td: R600IntrinsicsNoOpenCL.td R600IntrinsicsOpenCL.td
|
|
||||||
ifeq ($(HAVE_LLVM_INTRINSICS),)
|
|
||||||
cp R600IntrinsicsNoOpenCL.td R600Intrinsics.td
|
|
||||||
else
|
|
||||||
cp R600IntrinsicsOpenCL.td R600Intrinsics.td
|
|
||||||
endif
|
|
||||||
|
|
||||||
R600RegisterInfo.td: R600GenRegisterInfo.pl
|
|
||||||
$(PERL) $^ > $@
|
|
||||||
|
|
||||||
AMDGPUGenRegisterInfo.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-register-info, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenInstrInfo.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-instr-info, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenAsmWriter.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-asm-writer, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenDAGISel.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-dag-isel, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenCallingConv.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-callingconv, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenSubtargetInfo.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-subtarget, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenEDInfo.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-enhanced-disassembly-info, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenIntrinsics.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-tgt-intrinsic, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenCodeEmitter.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-emitter, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenMCCodeEmitter.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -mc-emitter -gen-emitter, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
AMDGPUGenDFAPacketizer.inc: $(TD_FILES)
|
|
||||||
$(call tablegen, -gen-dfa-packetizer, AMDGPU.td, $@)
|
|
||||||
|
|
||||||
LOADER_LIBS=$(shell llvm-config --libs bitreader asmparser)
|
|
||||||
loader: loader.o libradeon.a
|
|
||||||
gcc -o loader $(LLVM_LDFLAGS) -L/usr/local/lib $(LDFLAGS) loader.o libradeon.a $(LLVM_LIBS) $(LOADER_LIBS) -lpthread -ldl -lstdc++ -lm
|
|
||||||
|
|
|
||||||
|
|
@ -1,86 +1,3 @@
|
||||||
|
|
||||||
TD_FILES := \
|
|
||||||
AMDGPU.td \
|
|
||||||
AMDGPUInstrInfo.td \
|
|
||||||
AMDGPUInstructions.td \
|
|
||||||
AMDGPUIntrinsics.td \
|
|
||||||
AMDGPURegisterInfo.td \
|
|
||||||
AMDILBase.td \
|
|
||||||
AMDILInstrInfo.td \
|
|
||||||
AMDILIntrinsics.td \
|
|
||||||
AMDILRegisterInfo.td \
|
|
||||||
Processors.td \
|
|
||||||
R600Instructions.td \
|
|
||||||
R600Intrinsics.td \
|
|
||||||
R600IntrinsicsNoOpenCL.td \
|
|
||||||
R600IntrinsicsOpenCL.td \
|
|
||||||
R600RegisterInfo.td \
|
|
||||||
R600Schedule.td \
|
|
||||||
SIInstrFormats.td \
|
|
||||||
SIInstrInfo.td \
|
|
||||||
SIInstructions.td \
|
|
||||||
SIIntrinsics.td \
|
|
||||||
SIRegisterInfo.td \
|
|
||||||
SISchedule.td
|
|
||||||
|
|
||||||
LLVM_GENERATED_SOURCES := \
|
|
||||||
R600Intrinsics.td \
|
|
||||||
R600RegisterInfo.td \
|
|
||||||
SIRegisterInfo.td \
|
|
||||||
SIRegisterGetHWRegNum.inc \
|
|
||||||
AMDGPUGenRegisterInfo.inc \
|
|
||||||
AMDGPUGenInstrInfo.inc \
|
|
||||||
AMDGPUGenAsmWriter.inc \
|
|
||||||
AMDGPUGenDAGISel.inc \
|
|
||||||
AMDGPUGenCallingConv.inc \
|
|
||||||
AMDGPUGenSubtargetInfo.inc \
|
|
||||||
AMDGPUGenEDInfo.inc \
|
|
||||||
AMDGPUGenIntrinsics.inc \
|
|
||||||
AMDGPUGenCodeEmitter.inc \
|
|
||||||
AMDGPUGenMCCodeEmitter.inc \
|
|
||||||
AMDGPUGenDFAPacketizer.inc
|
|
||||||
|
|
||||||
LLVM_CPP_SOURCES := \
|
|
||||||
AMDIL7XXDevice.cpp \
|
|
||||||
AMDILCFGStructurizer.cpp \
|
|
||||||
AMDILDevice.cpp \
|
|
||||||
AMDILDeviceInfo.cpp \
|
|
||||||
AMDILEvergreenDevice.cpp \
|
|
||||||
AMDILFrameLowering.cpp \
|
|
||||||
AMDILIntrinsicInfo.cpp \
|
|
||||||
AMDILISelDAGToDAG.cpp \
|
|
||||||
AMDILISelLowering.cpp \
|
|
||||||
AMDILNIDevice.cpp \
|
|
||||||
AMDILPeepholeOptimizer.cpp \
|
|
||||||
AMDILSIDevice.cpp \
|
|
||||||
AMDGPUAsmPrinter.cpp \
|
|
||||||
AMDGPUMCInstLower.cpp \
|
|
||||||
AMDGPUSubtarget.cpp \
|
|
||||||
AMDGPUTargetMachine.cpp \
|
|
||||||
AMDGPUISelLowering.cpp \
|
|
||||||
AMDGPUConvertToISA.cpp \
|
|
||||||
AMDGPUInstrInfo.cpp \
|
|
||||||
AMDGPURegisterInfo.cpp \
|
|
||||||
R600ExpandSpecialInstrs.cpp \
|
|
||||||
R600ISelLowering.cpp \
|
|
||||||
R600InstrInfo.cpp \
|
|
||||||
R600MachineFunctionInfo.cpp \
|
|
||||||
R600RegisterInfo.cpp \
|
|
||||||
SIAssignInterpRegs.cpp \
|
|
||||||
SIInstrInfo.cpp \
|
|
||||||
SIISelLowering.cpp \
|
|
||||||
SILowerLiteralConstants.cpp \
|
|
||||||
SILowerFlowControl.cpp \
|
|
||||||
SIMachineFunctionInfo.cpp \
|
|
||||||
SIRegisterInfo.cpp \
|
|
||||||
InstPrinter/AMDGPUInstPrinter.cpp \
|
|
||||||
MCTargetDesc/AMDGPUMCAsmInfo.cpp \
|
|
||||||
MCTargetDesc/AMDGPUAsmBackend.cpp \
|
|
||||||
MCTargetDesc/AMDGPUMCTargetDesc.cpp \
|
|
||||||
MCTargetDesc/SIMCCodeEmitter.cpp \
|
|
||||||
MCTargetDesc/R600MCCodeEmitter.cpp \
|
|
||||||
TargetInfo/AMDGPUTargetInfo.cpp \
|
|
||||||
|
|
||||||
CPP_SOURCES := \
|
CPP_SOURCES := \
|
||||||
radeon_llvm_emit.cpp
|
radeon_llvm_emit.cpp
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,29 +0,0 @@
|
||||||
//===-- Processors.td - TODO: Add brief description -------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// AMDIL processors supported.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
class Proc<string Name, ProcessorItineraries itin, list<SubtargetFeature> Features>
|
|
||||||
: Processor<Name, itin, Features>;
|
|
||||||
def : Proc<"r600", R600_EG_Itin, [FeatureR600ALUInst]>;
|
|
||||||
def : Proc<"rv710", R600_EG_Itin, []>;
|
|
||||||
def : Proc<"rv730", R600_EG_Itin, []>;
|
|
||||||
def : Proc<"rv770", R600_EG_Itin, [FeatureFP64]>;
|
|
||||||
def : Proc<"cedar", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
|
||||||
def : Proc<"redwood", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
|
||||||
def : Proc<"juniper", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
|
||||||
def : Proc<"cypress", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
|
||||||
def : Proc<"barts", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
|
||||||
def : Proc<"turks", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
|
||||||
def : Proc<"caicos", R600_EG_Itin, [FeatureByteAddress, FeatureImages]>;
|
|
||||||
def : Proc<"cayman", R600_EG_Itin, [FeatureByteAddress, FeatureImages, FeatureFP64]>;
|
|
||||||
def : Proc<"SI", SI_Itin, [Feature64BitPtr]>;
|
|
||||||
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
//===-- R600Defines.h - R600 Helper Macros ----------------------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
// Operand Flags
|
|
||||||
#define MO_FLAG_CLAMP (1 << 0)
|
|
||||||
#define MO_FLAG_NEG (1 << 1)
|
|
||||||
#define MO_FLAG_ABS (1 << 2)
|
|
||||||
#define MO_FLAG_MASK (1 << 3)
|
|
||||||
#define MO_FLAG_PUSH (1 << 4)
|
|
||||||
#define MO_FLAG_NOT_LAST (1 << 5)
|
|
||||||
#define NUM_MO_FLAGS 6
|
|
||||||
|
|
||||||
// Helper for finding getting the operand index for the instruction flags
|
|
||||||
// operand.
|
|
||||||
#define GET_FLAG_OPERAND_IDX(Flags) (((Flags) >> 7) & 0x3)
|
|
||||||
|
|
||||||
namespace R600_InstFlag {
|
|
||||||
enum TIF {
|
|
||||||
TRANS_ONLY = (1 << 0),
|
|
||||||
TEX = (1 << 1),
|
|
||||||
REDUCTION = (1 << 2),
|
|
||||||
FC = (1 << 3),
|
|
||||||
TRIG = (1 << 4),
|
|
||||||
OP3 = (1 << 5),
|
|
||||||
VECTOR = (1 << 6)
|
|
||||||
//FlagOperand bits 7, 8
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
@ -1,292 +0,0 @@
|
||||||
//===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Vector, Reduction, and Cube instructions need to fill the entire instruction
|
|
||||||
// group to work correctly. This pass expands these individual instructions
|
|
||||||
// into several instructions that will completely fill the instruction group.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "R600Defines.h"
|
|
||||||
#include "R600InstrInfo.h"
|
|
||||||
#include "R600RegisterInfo.h"
|
|
||||||
#include "R600MachineFunctionInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
|
|
||||||
|
|
||||||
private:
|
|
||||||
static char ID;
|
|
||||||
const R600InstrInfo *TII;
|
|
||||||
|
|
||||||
bool ExpandInputPerspective(MachineInstr& MI);
|
|
||||||
bool ExpandInputConstant(MachineInstr& MI);
|
|
||||||
|
|
||||||
public:
|
|
||||||
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
|
|
||||||
TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
|
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
||||||
|
|
||||||
const char *getPassName() const {
|
|
||||||
return "R600 Expand special instructions pass";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
char R600ExpandSpecialInstrsPass::ID = 0;
|
|
||||||
|
|
||||||
FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
|
|
||||||
return new R600ExpandSpecialInstrsPass(TM);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
|
|
||||||
{
|
|
||||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
|
||||||
if (MI.getOpcode() != AMDGPU::input_perspective)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
MachineBasicBlock::iterator I = &MI;
|
|
||||||
unsigned DstReg = MI.getOperand(0).getReg();
|
|
||||||
R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
|
|
||||||
->getInfo<R600MachineFunctionInfo>();
|
|
||||||
unsigned IJIndexBase;
|
|
||||||
|
|
||||||
// In Evergreen ISA doc section 8.3.2 :
|
|
||||||
// We need to interpolate XY and ZW in two different instruction groups.
|
|
||||||
// An INTERP_* must occupy all 4 slots of an instruction group.
|
|
||||||
// Output of INTERP_XY is written in X,Y slots
|
|
||||||
// Output of INTERP_ZW is written in Z,W slots
|
|
||||||
//
|
|
||||||
// Thus interpolation requires the following sequences :
|
|
||||||
//
|
|
||||||
// AnyGPR.x = INTERP_ZW; (Write Masked Out)
|
|
||||||
// AnyGPR.y = INTERP_ZW; (Write Masked Out)
|
|
||||||
// DstGPR.z = INTERP_ZW;
|
|
||||||
// DstGPR.w = INTERP_ZW; (End of first IG)
|
|
||||||
// DstGPR.x = INTERP_XY;
|
|
||||||
// DstGPR.y = INTERP_XY;
|
|
||||||
// AnyGPR.z = INTERP_XY; (Write Masked Out)
|
|
||||||
// AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
|
|
||||||
//
|
|
||||||
switch (MI.getOperand(1).getImm()) {
|
|
||||||
case 0:
|
|
||||||
IJIndexBase = MFI->GetIJPerspectiveIndex();
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
IJIndexBase = MFI->GetIJLinearIndex();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0 && "Unknow ij index");
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 8; i++) {
|
|
||||||
unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
|
|
||||||
2 * IJIndexBase + ((i + 1) % 2));
|
|
||||||
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
|
|
||||||
4 * MI.getOperand(2).getImm());
|
|
||||||
|
|
||||||
unsigned Sel;
|
|
||||||
switch (i % 4) {
|
|
||||||
case 0:Sel = AMDGPU::sel_x;break;
|
|
||||||
case 1:Sel = AMDGPU::sel_y;break;
|
|
||||||
case 2:Sel = AMDGPU::sel_z;break;
|
|
||||||
case 3:Sel = AMDGPU::sel_w;break;
|
|
||||||
default:break;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned Res = TRI.getSubReg(DstReg, Sel);
|
|
||||||
|
|
||||||
const MCInstrDesc &Opcode = (i < 4)?
|
|
||||||
TII->get(AMDGPU::INTERP_ZW):
|
|
||||||
TII->get(AMDGPU::INTERP_XY);
|
|
||||||
|
|
||||||
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
|
|
||||||
I, MI.getParent()->findDebugLoc(I),
|
|
||||||
Opcode, Res)
|
|
||||||
.addReg(IJIndex)
|
|
||||||
.addReg(ReadReg)
|
|
||||||
.addImm(0);
|
|
||||||
|
|
||||||
if (!(i> 1 && i < 6)) {
|
|
||||||
TII->addFlag(NewMI, 0, MO_FLAG_MASK);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i % 4 != 3)
|
|
||||||
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
|
|
||||||
}
|
|
||||||
|
|
||||||
MI.eraseFromParent();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
|
|
||||||
{
|
|
||||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
|
||||||
if (MI.getOpcode() != AMDGPU::input_constant)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
MachineBasicBlock::iterator I = &MI;
|
|
||||||
unsigned DstReg = MI.getOperand(0).getReg();
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
|
||||||
unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
|
|
||||||
4 * MI.getOperand(1).getImm() + i);
|
|
||||||
|
|
||||||
unsigned Sel;
|
|
||||||
switch (i % 4) {
|
|
||||||
case 0:Sel = AMDGPU::sel_x;break;
|
|
||||||
case 1:Sel = AMDGPU::sel_y;break;
|
|
||||||
case 2:Sel = AMDGPU::sel_z;break;
|
|
||||||
case 3:Sel = AMDGPU::sel_w;break;
|
|
||||||
default:break;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned Res = TRI.getSubReg(DstReg, Sel);
|
|
||||||
|
|
||||||
MachineInstr *NewMI = BuildMI(*(MI.getParent()),
|
|
||||||
I, MI.getParent()->findDebugLoc(I),
|
|
||||||
TII->get(AMDGPU::INTERP_LOAD_P0), Res)
|
|
||||||
.addReg(ReadReg)
|
|
||||||
.addImm(0);
|
|
||||||
|
|
||||||
if (i % 4 != 3)
|
|
||||||
TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
|
|
||||||
}
|
|
||||||
|
|
||||||
MI.eraseFromParent();
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
|
|
||||||
|
|
||||||
const R600RegisterInfo &TRI = TII->getRegisterInfo();
|
|
||||||
|
|
||||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
|
||||||
BB != BB_E; ++BB) {
|
|
||||||
MachineBasicBlock &MBB = *BB;
|
|
||||||
MachineBasicBlock::iterator I = MBB.begin();
|
|
||||||
while (I != MBB.end()) {
|
|
||||||
MachineInstr &MI = *I;
|
|
||||||
I = llvm::next(I);
|
|
||||||
|
|
||||||
if (ExpandInputPerspective(MI))
|
|
||||||
continue;
|
|
||||||
if (ExpandInputConstant(MI))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
bool IsReduction = TII->isReductionOp(MI.getOpcode());
|
|
||||||
bool IsVector = TII->isVector(MI);
|
|
||||||
bool IsCube = TII->isCubeOp(MI.getOpcode());
|
|
||||||
if (!IsReduction && !IsVector && !IsCube) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Expand the instruction
|
|
||||||
//
|
|
||||||
// Reduction instructions:
|
|
||||||
// T0_X = DP4 T1_XYZW, T2_XYZW
|
|
||||||
// becomes:
|
|
||||||
// TO_X = DP4 T1_X, T2_X
|
|
||||||
// TO_Y (write masked) = DP4 T1_Y, T2_Y
|
|
||||||
// TO_Z (write masked) = DP4 T1_Z, T2_Z
|
|
||||||
// TO_W (write masked) = DP4 T1_W, T2_W
|
|
||||||
//
|
|
||||||
// Vector instructions:
|
|
||||||
// T0_X = MULLO_INT T1_X, T2_X
|
|
||||||
// becomes:
|
|
||||||
// T0_X = MULLO_INT T1_X, T2_X
|
|
||||||
// T0_Y (write masked) = MULLO_INT T1_X, T2_X
|
|
||||||
// T0_Z (write masked) = MULLO_INT T1_X, T2_X
|
|
||||||
// T0_W (write masked) = MULLO_INT T1_X, T2_X
|
|
||||||
//
|
|
||||||
// Cube instructions:
|
|
||||||
// T0_XYZW = CUBE T1_XYZW
|
|
||||||
// becomes:
|
|
||||||
// TO_X = CUBE T1_Z, T1_Y
|
|
||||||
// T0_Y = CUBE T1_Z, T1_X
|
|
||||||
// T0_Z = CUBE T1_X, T1_Z
|
|
||||||
// T0_W = CUBE T1_Y, T1_Z
|
|
||||||
for (unsigned Chan = 0; Chan < 4; Chan++) {
|
|
||||||
unsigned DstReg = MI.getOperand(0).getReg();
|
|
||||||
unsigned Src0 = MI.getOperand(1).getReg();
|
|
||||||
unsigned Src1 = 0;
|
|
||||||
|
|
||||||
// Determine the correct source registers
|
|
||||||
if (!IsCube) {
|
|
||||||
Src1 = MI.getOperand(2).getReg();
|
|
||||||
}
|
|
||||||
if (IsReduction) {
|
|
||||||
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
|
|
||||||
Src0 = TRI.getSubReg(Src0, SubRegIndex);
|
|
||||||
Src1 = TRI.getSubReg(Src1, SubRegIndex);
|
|
||||||
} else if (IsCube) {
|
|
||||||
static const int CubeSrcSwz[] = {2, 2, 0, 1};
|
|
||||||
unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
|
|
||||||
unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
|
|
||||||
Src1 = TRI.getSubReg(Src0, SubRegIndex1);
|
|
||||||
Src0 = TRI.getSubReg(Src0, SubRegIndex0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Determine the correct destination registers;
|
|
||||||
unsigned Flags = 0;
|
|
||||||
if (IsCube) {
|
|
||||||
unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
|
|
||||||
DstReg = TRI.getSubReg(DstReg, SubRegIndex);
|
|
||||||
} else {
|
|
||||||
// Mask the write if the original instruction does not write to
|
|
||||||
// the current Channel.
|
|
||||||
Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
|
|
||||||
unsigned DstBase = TRI.getHWRegIndex(DstReg);
|
|
||||||
DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the IsLast bit
|
|
||||||
Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
|
|
||||||
|
|
||||||
// Add the new instruction
|
|
||||||
unsigned Opcode;
|
|
||||||
if (IsCube) {
|
|
||||||
switch (MI.getOpcode()) {
|
|
||||||
case AMDGPU::CUBE_r600_pseudo:
|
|
||||||
Opcode = AMDGPU::CUBE_r600_real;
|
|
||||||
break;
|
|
||||||
case AMDGPU::CUBE_eg_pseudo:
|
|
||||||
Opcode = AMDGPU::CUBE_eg_real;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(!"Unknown CUBE instruction");
|
|
||||||
Opcode = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Opcode = MI.getOpcode();
|
|
||||||
}
|
|
||||||
MachineInstr *NewMI =
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
|
|
||||||
.addReg(Src0)
|
|
||||||
.addReg(Src1)
|
|
||||||
.addImm(0); // Flag
|
|
||||||
|
|
||||||
NewMI->setIsInsideBundle(Chan != 0);
|
|
||||||
TII->addFlag(NewMI, 0, Flags);
|
|
||||||
}
|
|
||||||
MI.eraseFromParent();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
@ -1,206 +0,0 @@
|
||||||
#===-- R600GenRegisterInfo.pl - Script for generating register info files --===#
|
|
||||||
#
|
|
||||||
# The LLVM Compiler Infrastructure
|
|
||||||
#
|
|
||||||
# This file is distributed under the University of Illinois Open Source
|
|
||||||
# License. See LICENSE.TXT for details.
|
|
||||||
#
|
|
||||||
#===------------------------------------------------------------------------===#
|
|
||||||
#
|
|
||||||
# This perl script prints to stdout .td code to be used as R600RegisterInfo.td
|
|
||||||
# it also generates a file called R600HwRegInfo.include, which contains helper
|
|
||||||
# functions for determining the hw encoding of registers.
|
|
||||||
#
|
|
||||||
#===------------------------------------------------------------------------===#
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use warnings;
|
|
||||||
|
|
||||||
use constant CONST_REG_COUNT => 512;
|
|
||||||
use constant TEMP_REG_COUNT => 128;
|
|
||||||
|
|
||||||
my $CREG_MAX = CONST_REG_COUNT - 1;
|
|
||||||
my $TREG_MAX = TEMP_REG_COUNT - 1;
|
|
||||||
|
|
||||||
print <<STRING;
|
|
||||||
|
|
||||||
class R600Reg <string name> : Register<name> {
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
}
|
|
||||||
|
|
||||||
class R600Reg_128<string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
|
||||||
}
|
|
||||||
|
|
||||||
STRING
|
|
||||||
|
|
||||||
my $i;
|
|
||||||
|
|
||||||
### REG DEFS ###
|
|
||||||
|
|
||||||
my @creg_list = print_reg_defs(CONST_REG_COUNT * 4, "C");
|
|
||||||
my @treg_list = print_reg_defs(TEMP_REG_COUNT * 4, "T");
|
|
||||||
|
|
||||||
my @t128reg;
|
|
||||||
my @treg_x;
|
|
||||||
for (my $i = 0; $i < TEMP_REG_COUNT; $i++) {
|
|
||||||
my $name = "T$i\_XYZW";
|
|
||||||
print qq{def $name : R600Reg_128 <"T$i.XYZW", [T$i\_X, T$i\_Y, T$i\_Z, T$i\_W] >;\n};
|
|
||||||
$t128reg[$i] = $name;
|
|
||||||
$treg_x[$i] = "T$i\_X";
|
|
||||||
}
|
|
||||||
|
|
||||||
my $treg_string = join(",", @treg_list);
|
|
||||||
my $creg_list = join(",", @creg_list);
|
|
||||||
my $t128_string = join(",", @t128reg);
|
|
||||||
my $treg_x_string = join(",", @treg_x);
|
|
||||||
print <<STRING;
|
|
||||||
|
|
||||||
class RegSet <dag s> {
|
|
||||||
dag set = s;
|
|
||||||
}
|
|
||||||
|
|
||||||
def ZERO : R600Reg<"0.0">;
|
|
||||||
def HALF : R600Reg<"0.5">;
|
|
||||||
def ONE : R600Reg<"1.0">;
|
|
||||||
def ONE_INT : R600Reg<"1">;
|
|
||||||
def NEG_HALF : R600Reg<"-0.5">;
|
|
||||||
def NEG_ONE : R600Reg<"-1.0">;
|
|
||||||
def PV_X : R600Reg<"pv.x">;
|
|
||||||
def ALU_LITERAL_X : R600Reg<"literal.x">;
|
|
||||||
def PREDICATE_BIT : R600Reg<"PredicateBit">;
|
|
||||||
def PRED_SEL_OFF: R600Reg<"Pred_sel_off">;
|
|
||||||
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero">;
|
|
||||||
def PRED_SEL_ONE : R600Reg<"Pred_sel_one">;
|
|
||||||
|
|
||||||
def R600_CReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
|
||||||
$creg_list)>;
|
|
||||||
|
|
||||||
def R600_TReg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
|
||||||
$treg_string)>;
|
|
||||||
|
|
||||||
def R600_TReg32_X : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
|
||||||
$treg_x_string)>;
|
|
||||||
|
|
||||||
def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add
|
|
||||||
R600_TReg32,
|
|
||||||
R600_CReg32,
|
|
||||||
ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF)>;
|
|
||||||
|
|
||||||
def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add
|
|
||||||
PRED_SEL_OFF, PRED_SEL_ZERO, PRED_SEL_ONE)>;
|
|
||||||
|
|
||||||
def R600_Predicate_Bit: RegisterClass <"AMDGPU", [i32], 32, (add
|
|
||||||
PREDICATE_BIT)>;
|
|
||||||
|
|
||||||
def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add
|
|
||||||
$t128_string)>
|
|
||||||
{
|
|
||||||
let SubRegClasses = [(R600_TReg32 sel_x, sel_y, sel_z, sel_w)];
|
|
||||||
let CopyCost = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
STRING
|
|
||||||
|
|
||||||
my %index_map;
|
|
||||||
my %chan_map;
|
|
||||||
|
|
||||||
for ($i = 0; $i <= $#creg_list; $i++) {
|
|
||||||
push(@{$index_map{get_hw_index($i)}}, $creg_list[$i]);
|
|
||||||
push(@{$chan_map{get_chan_str($i)}}, $creg_list[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for ($i = 0; $i <= $#treg_list; $i++) {
|
|
||||||
push(@{$index_map{get_hw_index($i)}}, $treg_list[$i]);
|
|
||||||
push(@{$chan_map{get_chan_str($i)}}, $treg_list[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for ($i = 0; $i <= $#t128reg; $i++) {
|
|
||||||
push(@{$index_map{$i}}, $t128reg[$i]);
|
|
||||||
push(@{$chan_map{'X'}}, $t128reg[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
open(OUTFILE, ">", "R600HwRegInfo.include");
|
|
||||||
|
|
||||||
print OUTFILE <<STRING;
|
|
||||||
|
|
||||||
unsigned R600RegisterInfo::getHWRegIndexGen(unsigned reg) const
|
|
||||||
{
|
|
||||||
switch(reg) {
|
|
||||||
default: assert(!"Unknown register"); return 0;
|
|
||||||
STRING
|
|
||||||
foreach my $key (keys(%index_map)) {
|
|
||||||
foreach my $reg (@{$index_map{$key}}) {
|
|
||||||
print OUTFILE " case AMDGPU::$reg:\n";
|
|
||||||
}
|
|
||||||
print OUTFILE " return $key;\n\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
print OUTFILE " }\n}\n\n";
|
|
||||||
|
|
||||||
print OUTFILE <<STRING;
|
|
||||||
|
|
||||||
unsigned R600RegisterInfo::getHWRegChanGen(unsigned reg) const
|
|
||||||
{
|
|
||||||
switch(reg) {
|
|
||||||
default: assert(!"Unknown register"); return 0;
|
|
||||||
STRING
|
|
||||||
|
|
||||||
foreach my $key (keys(%chan_map)) {
|
|
||||||
foreach my $reg (@{$chan_map{$key}}) {
|
|
||||||
print OUTFILE " case AMDGPU::$reg:\n";
|
|
||||||
}
|
|
||||||
my $val;
|
|
||||||
if ($key eq 'X') {
|
|
||||||
$val = 0;
|
|
||||||
} elsif ($key eq 'Y') {
|
|
||||||
$val = 1;
|
|
||||||
} elsif ($key eq 'Z') {
|
|
||||||
$val = 2;
|
|
||||||
} elsif ($key eq 'W') {
|
|
||||||
$val = 3;
|
|
||||||
} else {
|
|
||||||
die("Unknown chan value; $key");
|
|
||||||
}
|
|
||||||
print OUTFILE " return $val;\n\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
print OUTFILE " }\n}\n\n";
|
|
||||||
|
|
||||||
sub print_reg_defs {
|
|
||||||
my ($count, $prefix) = @_;
|
|
||||||
|
|
||||||
my @reg_list;
|
|
||||||
|
|
||||||
for ($i = 0; $i < $count; $i++) {
|
|
||||||
my $hw_index = get_hw_index($i);
|
|
||||||
my $chan= get_chan_str($i);
|
|
||||||
my $name = "$prefix$hw_index\_$chan";
|
|
||||||
print qq{def $name : R600Reg <"$prefix$hw_index.$chan">;\n};
|
|
||||||
$reg_list[$i] = $name;
|
|
||||||
}
|
|
||||||
return @reg_list;
|
|
||||||
}
|
|
||||||
|
|
||||||
#Helper functions
|
|
||||||
sub get_hw_index {
|
|
||||||
my ($index) = @_;
|
|
||||||
return int($index / 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub get_chan_str {
|
|
||||||
my ($index) = @_;
|
|
||||||
my $chan = $index % 4;
|
|
||||||
if ($chan == 0 ) {
|
|
||||||
return 'X';
|
|
||||||
} elsif ($chan == 1) {
|
|
||||||
return 'Y';
|
|
||||||
} elsif ($chan == 2) {
|
|
||||||
return 'Z';
|
|
||||||
} elsif ($chan == 3) {
|
|
||||||
return 'W';
|
|
||||||
} else {
|
|
||||||
die("Unknown chan value: $chan");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,740 +0,0 @@
|
||||||
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file
|
|
||||||
// is mostly EmitInstrWithCustomInserter().
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "R600ISelLowering.h"
|
|
||||||
#include "R600Defines.h"
|
|
||||||
#include "R600InstrInfo.h"
|
|
||||||
#include "R600MachineFunctionInfo.h"
|
|
||||||
#include "llvm/Argument.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
|
|
||||||
AMDGPUTargetLowering(TM),
|
|
||||||
TII(static_cast<const R600InstrInfo*>(TM.getInstrInfo()))
|
|
||||||
{
|
|
||||||
setOperationAction(ISD::MUL, MVT::i64, Expand);
|
|
||||||
addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
|
|
||||||
addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
|
|
||||||
addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
|
|
||||||
addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
|
|
||||||
computeRegisterProperties();
|
|
||||||
|
|
||||||
setOperationAction(ISD::FADD, MVT::v4f32, Expand);
|
|
||||||
setOperationAction(ISD::FMUL, MVT::v4f32, Expand);
|
|
||||||
|
|
||||||
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
|
|
||||||
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
|
||||||
|
|
||||||
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
|
||||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
|
||||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::ROTL, MVT::i32, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::SETCC, MVT::i32, Custom);
|
|
||||||
setOperationAction(ISD::SETCC, MVT::f32, Custom);
|
|
||||||
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
|
|
||||||
|
|
||||||
setTargetDAGCombine(ISD::FP_ROUND);
|
|
||||||
|
|
||||||
setSchedulingPreference(Sched::VLIW);
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
|
|
||||||
MachineInstr * MI, MachineBasicBlock * BB) const
|
|
||||||
{
|
|
||||||
MachineFunction * MF = BB->getParent();
|
|
||||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
|
||||||
MachineBasicBlock::iterator I = *MI;
|
|
||||||
|
|
||||||
switch (MI->getOpcode()) {
|
|
||||||
default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
|
||||||
case AMDGPU::SHADER_TYPE: break;
|
|
||||||
case AMDGPU::CLAMP_R600:
|
|
||||||
{
|
|
||||||
MachineInstr *NewMI =
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0) // Flags
|
|
||||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
|
||||||
TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case AMDGPU::FABS_R600:
|
|
||||||
{
|
|
||||||
MachineInstr *NewMI =
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0) // Flags
|
|
||||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
|
||||||
TII->addFlag(NewMI, 1, MO_FLAG_ABS);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case AMDGPU::FNEG_R600:
|
|
||||||
{
|
|
||||||
MachineInstr *NewMI =
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0) // Flags
|
|
||||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
|
||||||
TII->addFlag(NewMI, 1, MO_FLAG_NEG);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case AMDGPU::R600_LOAD_CONST:
|
|
||||||
{
|
|
||||||
int64_t RegIndex = MI->getOperand(1).getImm();
|
|
||||||
unsigned ConstantReg = AMDGPU::R600_CReg32RegClass.getRegister(RegIndex);
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::COPY))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addReg(ConstantReg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case AMDGPU::MASK_WRITE:
|
|
||||||
{
|
|
||||||
unsigned maskedRegister = MI->getOperand(0).getReg();
|
|
||||||
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
|
|
||||||
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
|
|
||||||
TII->addFlag(defInstr, 0, MO_FLAG_MASK);
|
|
||||||
// Return early so the instruction is not erased
|
|
||||||
return BB;
|
|
||||||
}
|
|
||||||
|
|
||||||
case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
|
|
||||||
case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
|
|
||||||
{
|
|
||||||
// Convert to DWORD address
|
|
||||||
unsigned NewAddr = MRI.createVirtualRegister(
|
|
||||||
&AMDGPU::R600_TReg32_XRegClass);
|
|
||||||
unsigned ShiftValue = MRI.createVirtualRegister(
|
|
||||||
&AMDGPU::R600_TReg32RegClass);
|
|
||||||
unsigned EOP = (llvm::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
|
|
||||||
|
|
||||||
// XXX In theory, we should be able to pass ShiftValue directly to
|
|
||||||
// the LSHR_eg instruction as an inline literal, but I tried doing it
|
|
||||||
// this way and it didn't produce the correct results.
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::MOV_IMM_I32),
|
|
||||||
ShiftValue)
|
|
||||||
.addReg(AMDGPU::ALU_LITERAL_X)
|
|
||||||
.addReg(AMDGPU::PRED_SEL_OFF)
|
|
||||||
.addImm(2);
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::LSHR_eg), NewAddr)
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addReg(ShiftValue)
|
|
||||||
.addReg(AMDGPU::PRED_SEL_OFF);
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addReg(NewAddr)
|
|
||||||
.addImm(EOP); // Set End of program bit
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case AMDGPU::RESERVE_REG:
|
|
||||||
{
|
|
||||||
R600MachineFunctionInfo * MFI = MF->getInfo<R600MachineFunctionInfo>();
|
|
||||||
int64_t ReservedIndex = MI->getOperand(0).getImm();
|
|
||||||
unsigned ReservedReg =
|
|
||||||
AMDGPU::R600_TReg32RegClass.getRegister(ReservedIndex);
|
|
||||||
MFI->ReservedRegs.push_back(ReservedReg);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case AMDGPU::TXD:
|
|
||||||
{
|
|
||||||
unsigned t0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
|
||||||
unsigned t1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
|
|
||||||
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
|
||||||
.addOperand(MI->getOperand(3))
|
|
||||||
.addOperand(MI->getOperand(4))
|
|
||||||
.addOperand(MI->getOperand(5));
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
|
||||||
.addOperand(MI->getOperand(2))
|
|
||||||
.addOperand(MI->getOperand(4))
|
|
||||||
.addOperand(MI->getOperand(5));
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(4))
|
|
||||||
.addOperand(MI->getOperand(5))
|
|
||||||
.addReg(t0, RegState::Implicit)
|
|
||||||
.addReg(t1, RegState::Implicit);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case AMDGPU::TXD_SHADOW:
|
|
||||||
{
|
|
||||||
unsigned t0 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
|
|
||||||
unsigned t1 = MRI.createVirtualRegister(AMDGPU::R600_Reg128RegisterClass);
|
|
||||||
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), t0)
|
|
||||||
.addOperand(MI->getOperand(3))
|
|
||||||
.addOperand(MI->getOperand(4))
|
|
||||||
.addOperand(MI->getOperand(5));
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), t1)
|
|
||||||
.addOperand(MI->getOperand(2))
|
|
||||||
.addOperand(MI->getOperand(4))
|
|
||||||
.addOperand(MI->getOperand(5));
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(4))
|
|
||||||
.addOperand(MI->getOperand(5))
|
|
||||||
.addReg(t0, RegState::Implicit)
|
|
||||||
.addReg(t1, RegState::Implicit);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case AMDGPU::BRANCH:
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addReg(0);
|
|
||||||
break;
|
|
||||||
case AMDGPU::BRANCH_COND_f32:
|
|
||||||
{
|
|
||||||
MachineInstr *NewMI =
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
|
|
||||||
.addReg(AMDGPU::PREDICATE_BIT)
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(OPCODE_IS_NOT_ZERO)
|
|
||||||
.addImm(0); // Flags
|
|
||||||
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case AMDGPU::BRANCH_COND_i32:
|
|
||||||
{
|
|
||||||
MachineInstr *NewMI =
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X))
|
|
||||||
.addReg(AMDGPU::PREDICATE_BIT)
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(OPCODE_IS_NOT_ZERO_INT)
|
|
||||||
.addImm(0); // Flags
|
|
||||||
TII->addFlag(NewMI, 1, MO_FLAG_PUSH);
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case AMDGPU::input_perspective:
|
|
||||||
{
|
|
||||||
R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
|
|
||||||
|
|
||||||
// XXX Be more fine about register reservation
|
|
||||||
for (unsigned i = 0; i < 4; i ++) {
|
|
||||||
unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
|
|
||||||
MFI->ReservedRegs.push_back(ReservedReg);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (MI->getOperand(1).getImm()) {
|
|
||||||
case 0:// Perspective
|
|
||||||
MFI->HasPerspectiveInterpolation = true;
|
|
||||||
break;
|
|
||||||
case 1:// Linear
|
|
||||||
MFI->HasLinearInterpolation = true;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
assert(0 && "Unknow ij index");
|
|
||||||
}
|
|
||||||
|
|
||||||
return BB;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MI->eraseFromParent();
|
|
||||||
return BB;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Custom DAG Lowering Operations
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
using namespace llvm::Intrinsic;
|
|
||||||
using namespace llvm::AMDGPUIntrinsic;
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
switch (Op.getOpcode()) {
|
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
|
||||||
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
|
|
||||||
case ISD::ROTL: return LowerROTL(Op, DAG);
|
|
||||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
|
||||||
case ISD::SETCC: return LowerSETCC(Op, DAG);
|
|
||||||
case ISD::INTRINSIC_VOID: {
|
|
||||||
SDValue Chain = Op.getOperand(0);
|
|
||||||
unsigned IntrinsicID =
|
|
||||||
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
||||||
switch (IntrinsicID) {
|
|
||||||
case AMDGPUIntrinsic::AMDGPU_store_output: {
|
|
||||||
MachineFunction &MF = DAG.getMachineFunction();
|
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
||||||
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
|
|
||||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
|
||||||
if (!MRI.isLiveOut(Reg)) {
|
|
||||||
MRI.addLiveOut(Reg);
|
|
||||||
}
|
|
||||||
return DAG.getCopyToReg(Chain, Op.getDebugLoc(), Reg, Op.getOperand(2));
|
|
||||||
}
|
|
||||||
// default for switch(IntrinsicID)
|
|
||||||
default: break;
|
|
||||||
}
|
|
||||||
// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ISD::INTRINSIC_WO_CHAIN: {
|
|
||||||
unsigned IntrinsicID =
|
|
||||||
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
switch(IntrinsicID) {
|
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
|
||||||
case AMDGPUIntrinsic::R600_load_input: {
|
|
||||||
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
||||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
|
|
||||||
}
|
|
||||||
case AMDGPUIntrinsic::R600_load_input_perspective: {
|
|
||||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
||||||
SDValue FullVector = DAG.getNode(
|
|
||||||
AMDGPUISD::INTERP,
|
|
||||||
DL, MVT::v4f32,
|
|
||||||
DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
|
|
||||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
|
||||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
|
||||||
}
|
|
||||||
case AMDGPUIntrinsic::R600_load_input_linear: {
|
|
||||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
||||||
SDValue FullVector = DAG.getNode(
|
|
||||||
AMDGPUISD::INTERP,
|
|
||||||
DL, MVT::v4f32,
|
|
||||||
DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
|
|
||||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
|
||||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
|
||||||
}
|
|
||||||
case AMDGPUIntrinsic::R600_load_input_constant: {
|
|
||||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
||||||
SDValue FullVector = DAG.getNode(
|
|
||||||
AMDGPUISD::INTERP_P0,
|
|
||||||
DL, MVT::v4f32,
|
|
||||||
DAG.getConstant(slot / 4 , MVT::i32));
|
|
||||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
|
|
||||||
DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
|
|
||||||
}
|
|
||||||
case AMDGPUIntrinsic::R600_load_input_position: {
|
|
||||||
unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
||||||
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
|
|
||||||
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
RegIndex, MVT::f32);
|
|
||||||
if ((slot % 4) == 3) {
|
|
||||||
return DAG.getNode(ISD::FDIV,
|
|
||||||
DL, VT,
|
|
||||||
DAG.getConstantFP(1.0f, MVT::f32),
|
|
||||||
Reg);
|
|
||||||
} else {
|
|
||||||
return Reg;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
case r600_read_ngroups_x:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 0);
|
|
||||||
case r600_read_ngroups_y:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 1);
|
|
||||||
case r600_read_ngroups_z:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 2);
|
|
||||||
case r600_read_global_size_x:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 3);
|
|
||||||
case r600_read_global_size_y:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 4);
|
|
||||||
case r600_read_global_size_z:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 5);
|
|
||||||
case r600_read_local_size_x:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 6);
|
|
||||||
case r600_read_local_size_y:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 7);
|
|
||||||
case r600_read_local_size_z:
|
|
||||||
return LowerImplicitParameter(DAG, VT, DL, 8);
|
|
||||||
|
|
||||||
case r600_read_tgid_x:
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
AMDGPU::T1_X, VT);
|
|
||||||
case r600_read_tgid_y:
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
AMDGPU::T1_Y, VT);
|
|
||||||
case r600_read_tgid_z:
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
AMDGPU::T1_Z, VT);
|
|
||||||
case r600_read_tidig_x:
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
AMDGPU::T0_X, VT);
|
|
||||||
case r600_read_tidig_y:
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
AMDGPU::T0_Y, VT);
|
|
||||||
case r600_read_tidig_z:
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
AMDGPU::T0_Z, VT);
|
|
||||||
}
|
|
||||||
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} // end switch(Op.getOpcode())
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
|
||||||
SmallVectorImpl<SDValue> &Results,
|
|
||||||
SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
switch (N->getOpcode()) {
|
|
||||||
default: return;
|
|
||||||
case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
|
|
||||||
case ISD::INTRINSIC_WO_CHAIN:
|
|
||||||
{
|
|
||||||
unsigned IntrinsicID =
|
|
||||||
cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
|
||||||
if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
|
|
||||||
Results.push_back(LowerInputFace(N, DAG));
|
|
||||||
} else {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
|
|
||||||
unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
|
|
||||||
SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
|
|
||||||
RegIndex, MVT::f32);
|
|
||||||
return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
|
|
||||||
Reg, DAG.getConstantFP(0.0f, MVT::f32),
|
|
||||||
DAG.getCondCode(ISD::SETUGT));
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
return DAG.getNode(
|
|
||||||
ISD::SETCC,
|
|
||||||
Op.getDebugLoc(),
|
|
||||||
MVT::i1,
|
|
||||||
Op, DAG.getConstantFP(0.0f, MVT::f32),
|
|
||||||
DAG.getCondCode(ISD::SETNE)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
SDValue Chain = Op.getOperand(0);
|
|
||||||
SDValue CC = Op.getOperand(1);
|
|
||||||
SDValue LHS = Op.getOperand(2);
|
|
||||||
SDValue RHS = Op.getOperand(3);
|
|
||||||
SDValue JumpT = Op.getOperand(4);
|
|
||||||
SDValue CmpValue;
|
|
||||||
SDValue Result;
|
|
||||||
|
|
||||||
if (LHS.getValueType() == MVT::i32) {
|
|
||||||
CmpValue = DAG.getNode(
|
|
||||||
ISD::SELECT_CC,
|
|
||||||
Op.getDebugLoc(),
|
|
||||||
MVT::i32,
|
|
||||||
LHS, RHS,
|
|
||||||
DAG.getConstant(-1, MVT::i32),
|
|
||||||
DAG.getConstant(0, MVT::i32),
|
|
||||||
CC);
|
|
||||||
} else if (LHS.getValueType() == MVT::f32) {
|
|
||||||
CmpValue = DAG.getNode(
|
|
||||||
ISD::SELECT_CC,
|
|
||||||
Op.getDebugLoc(),
|
|
||||||
MVT::f32,
|
|
||||||
LHS, RHS,
|
|
||||||
DAG.getConstantFP(1.0f, MVT::f32),
|
|
||||||
DAG.getConstantFP(0.0f, MVT::f32),
|
|
||||||
CC);
|
|
||||||
} else {
|
|
||||||
assert(0 && "Not valid type for br_cc");
|
|
||||||
}
|
|
||||||
Result = DAG.getNode(
|
|
||||||
AMDGPUISD::BRANCH_COND,
|
|
||||||
CmpValue.getDebugLoc(),
|
|
||||||
MVT::Other, Chain,
|
|
||||||
JumpT, CmpValue);
|
|
||||||
return Result;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
|
||||||
DebugLoc DL,
|
|
||||||
unsigned DwordOffset) const
|
|
||||||
{
|
|
||||||
unsigned ByteOffset = DwordOffset * 4;
|
|
||||||
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
|
||||||
AMDGPUAS::PARAM_I_ADDRESS);
|
|
||||||
|
|
||||||
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
|
|
||||||
assert(isInt<16>(ByteOffset));
|
|
||||||
|
|
||||||
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
|
|
||||||
DAG.getConstant(ByteOffset, MVT::i32), // PTR
|
|
||||||
MachinePointerInfo(ConstantPointerNull::get(PtrType)),
|
|
||||||
false, false, false, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
|
|
||||||
return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
|
|
||||||
Op.getOperand(0),
|
|
||||||
Op.getOperand(0),
|
|
||||||
DAG.getNode(ISD::SUB, DL, VT,
|
|
||||||
DAG.getConstant(32, MVT::i32),
|
|
||||||
Op.getOperand(1)));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600TargetLowering::isZero(SDValue Op) const
|
|
||||||
{
|
|
||||||
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
|
|
||||||
return Cst->isNullValue();
|
|
||||||
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
|
|
||||||
return CstFP->isZero();
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
|
|
||||||
SDValue LHS = Op.getOperand(0);
|
|
||||||
SDValue RHS = Op.getOperand(1);
|
|
||||||
SDValue True = Op.getOperand(2);
|
|
||||||
SDValue False = Op.getOperand(3);
|
|
||||||
SDValue CC = Op.getOperand(4);
|
|
||||||
SDValue Temp;
|
|
||||||
|
|
||||||
// LHS and RHS are guaranteed to be the same value type
|
|
||||||
EVT CompareVT = LHS.getValueType();
|
|
||||||
|
|
||||||
// We need all the operands of SELECT_CC to have the same value type, so if
|
|
||||||
// necessary we need to convert LHS and RHS to be the same type True and
|
|
||||||
// False. True and False are guaranteed to have the same type as this
|
|
||||||
// SELECT_CC node.
|
|
||||||
|
|
||||||
if (isHWTrueValue(True) && isHWFalseValue(False)) {
|
|
||||||
if (CompareVT != VT) {
|
|
||||||
if (VT == MVT::f32 && CompareVT == MVT::i32) {
|
|
||||||
SDValue Boolean = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
|
||||||
LHS, RHS,
|
|
||||||
DAG.getConstant(-1, MVT::i32),
|
|
||||||
DAG.getConstant(0, MVT::i32),
|
|
||||||
CC);
|
|
||||||
return DAG.getNode(ISD::UINT_TO_FP, DL, VT, Boolean);
|
|
||||||
} else if (VT == MVT::i32 && CompareVT == MVT::f32) {
|
|
||||||
SDValue BoolAsFlt = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
|
||||||
LHS, RHS,
|
|
||||||
DAG.getConstantFP(1.0f, MVT::f32),
|
|
||||||
DAG.getConstantFP(0.0f, MVT::f32),
|
|
||||||
CC);
|
|
||||||
return DAG.getNode(ISD::FP_TO_UINT, DL, VT, BoolAsFlt);
|
|
||||||
} else {
|
|
||||||
// I don't think there will be any other type pairings.
|
|
||||||
assert(!"Unhandled operand type parings in SELECT_CC");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// XXX If True is a hardware TRUE value and False is a hardware FALSE value,
|
|
||||||
// we can handle this with a native instruction, but we need to swap true
|
|
||||||
// and false and change the conditional.
|
|
||||||
if (isHWTrueValue(False) && isHWFalseValue(True)) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if we can lower this to a native operation.
|
|
||||||
// CND* instructions requires all operands to have the same type,
|
|
||||||
// and RHS to be zero.
|
|
||||||
|
|
||||||
if (isZero(LHS) || isZero(RHS)) {
|
|
||||||
SDValue Cond = (isZero(LHS) ? RHS : LHS);
|
|
||||||
SDValue Zero = (isZero(LHS) ? LHS : RHS);
|
|
||||||
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
|
||||||
if (CompareVT != VT) {
|
|
||||||
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
|
|
||||||
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
|
|
||||||
}
|
|
||||||
if (isZero(LHS)) {
|
|
||||||
CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (CCOpcode) {
|
|
||||||
case ISD::SETONE:
|
|
||||||
case ISD::SETUNE:
|
|
||||||
case ISD::SETNE:
|
|
||||||
case ISD::SETULE:
|
|
||||||
case ISD::SETULT:
|
|
||||||
case ISD::SETOLE:
|
|
||||||
case ISD::SETOLT:
|
|
||||||
case ISD::SETLE:
|
|
||||||
case ISD::SETLT:
|
|
||||||
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
|
|
||||||
Temp = True;
|
|
||||||
True = False;
|
|
||||||
False = Temp;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
|
||||||
Cond, Zero,
|
|
||||||
True, False,
|
|
||||||
DAG.getCondCode(CCOpcode));
|
|
||||||
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// If we make it this for it means we have no native instructions to handle
|
|
||||||
// this SELECT_CC, so we must lower it.
|
|
||||||
SDValue HWTrue, HWFalse;
|
|
||||||
|
|
||||||
if (CompareVT == MVT::f32) {
|
|
||||||
HWTrue = DAG.getConstantFP(1.0f, CompareVT);
|
|
||||||
HWFalse = DAG.getConstantFP(0.0f, CompareVT);
|
|
||||||
} else if (CompareVT == MVT::i32) {
|
|
||||||
HWTrue = DAG.getConstant(-1, CompareVT);
|
|
||||||
HWFalse = DAG.getConstant(0, CompareVT);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
assert(!"Unhandled value type in LowerSELECT_CC");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lower this unsupported SELECT_CC into a combination of two supported
|
|
||||||
// SELECT_CC operations.
|
|
||||||
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
|
|
||||||
|
|
||||||
return DAG.getNode(ISD::SELECT_CC, DL, VT,
|
|
||||||
Cond, HWFalse,
|
|
||||||
True, False,
|
|
||||||
DAG.getCondCode(ISD::SETNE));
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
SDValue Cond;
|
|
||||||
SDValue LHS = Op.getOperand(0);
|
|
||||||
SDValue RHS = Op.getOperand(1);
|
|
||||||
SDValue CC = Op.getOperand(2);
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
assert(Op.getValueType() == MVT::i32);
|
|
||||||
if (LHS.getValueType() == MVT::i32) {
|
|
||||||
Cond = DAG.getNode(
|
|
||||||
ISD::SELECT_CC,
|
|
||||||
Op.getDebugLoc(),
|
|
||||||
MVT::i32,
|
|
||||||
LHS, RHS,
|
|
||||||
DAG.getConstant(-1, MVT::i32),
|
|
||||||
DAG.getConstant(0, MVT::i32),
|
|
||||||
CC);
|
|
||||||
} else if (LHS.getValueType() == MVT::f32) {
|
|
||||||
Cond = DAG.getNode(
|
|
||||||
ISD::SELECT_CC,
|
|
||||||
Op.getDebugLoc(),
|
|
||||||
MVT::f32,
|
|
||||||
LHS, RHS,
|
|
||||||
DAG.getConstantFP(1.0f, MVT::f32),
|
|
||||||
DAG.getConstantFP(0.0f, MVT::f32),
|
|
||||||
CC);
|
|
||||||
Cond = DAG.getNode(
|
|
||||||
ISD::FP_TO_SINT,
|
|
||||||
DL,
|
|
||||||
MVT::i32,
|
|
||||||
Cond);
|
|
||||||
} else {
|
|
||||||
assert(0 && "Not valid type for set_cc");
|
|
||||||
}
|
|
||||||
Cond = DAG.getNode(
|
|
||||||
ISD::AND,
|
|
||||||
DL,
|
|
||||||
MVT::i32,
|
|
||||||
DAG.getConstant(1, MVT::i32),
|
|
||||||
Cond);
|
|
||||||
return Cond;
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX Only kernel functions are supporte, so we can assume for now that
|
|
||||||
// every function is a kernel function, but in the future we should use
|
|
||||||
// separate calling conventions for kernel and non-kernel functions.
|
|
||||||
// Only kernel functions are supported, so we can assume for now
|
|
||||||
SDValue R600TargetLowering::LowerFormalArguments(
|
|
||||||
SDValue Chain,
|
|
||||||
CallingConv::ID CallConv,
|
|
||||||
bool isVarArg,
|
|
||||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
||||||
DebugLoc DL, SelectionDAG &DAG,
|
|
||||||
SmallVectorImpl<SDValue> &InVals) const
|
|
||||||
{
|
|
||||||
unsigned ParamOffsetBytes = 36;
|
|
||||||
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
|
||||||
EVT VT = Ins[i].VT;
|
|
||||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
|
||||||
AMDGPUAS::PARAM_I_ADDRESS);
|
|
||||||
SDValue Arg = DAG.getLoad(VT, DL, DAG.getRoot(),
|
|
||||||
DAG.getConstant(ParamOffsetBytes, MVT::i32),
|
|
||||||
MachinePointerInfo(new Argument(PtrTy)),
|
|
||||||
false, false, false, 4);
|
|
||||||
InVals.push_back(Arg);
|
|
||||||
ParamOffsetBytes += (VT.getStoreSize());
|
|
||||||
}
|
|
||||||
return Chain;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Custom DAG Optimizations
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
|
||||||
DAGCombinerInfo &DCI) const
|
|
||||||
{
|
|
||||||
SelectionDAG &DAG = DCI.DAG;
|
|
||||||
|
|
||||||
switch (N->getOpcode()) {
|
|
||||||
// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
|
|
||||||
case ISD::FP_ROUND: {
|
|
||||||
SDValue Arg = N->getOperand(0);
|
|
||||||
if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
|
|
||||||
return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), N->getValueType(0),
|
|
||||||
Arg.getOperand(0));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
@ -1,69 +0,0 @@
|
||||||
//===-- R600ISelLowering.h - R600 DAG Lowering Interface -*- C++ -*--------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// R600 DAG Lowering interface definition
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef R600ISELLOWERING_H
|
|
||||||
#define R600ISELLOWERING_H
|
|
||||||
|
|
||||||
#include "AMDGPUISelLowering.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class R600InstrInfo;
|
|
||||||
|
|
||||||
class R600TargetLowering : public AMDGPUTargetLowering
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
R600TargetLowering(TargetMachine &TM);
|
|
||||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
|
|
||||||
MachineBasicBlock * BB) const;
|
|
||||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
||||||
void ReplaceNodeResults(SDNode * N,
|
|
||||||
SmallVectorImpl<SDValue> &Results,
|
|
||||||
SelectionDAG &DAG) const;
|
|
||||||
virtual SDValue LowerFormalArguments(
|
|
||||||
SDValue Chain,
|
|
||||||
CallingConv::ID CallConv,
|
|
||||||
bool isVarArg,
|
|
||||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
|
||||||
DebugLoc DL, SelectionDAG &DAG,
|
|
||||||
SmallVectorImpl<SDValue> &InVals) const;
|
|
||||||
private:
|
|
||||||
const R600InstrInfo * TII;
|
|
||||||
|
|
||||||
/// lowerImplicitParameter - Each OpenCL kernel has nine implicit parameters
|
|
||||||
/// that are stored in the first nine dwords of a Vertex Buffer. These
|
|
||||||
/// implicit parameters are lowered to load instructions which retreive the
|
|
||||||
/// values from the Vertex Buffer.
|
|
||||||
SDValue LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
|
||||||
DebugLoc DL, unsigned DwordOffset) const;
|
|
||||||
|
|
||||||
void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineRegisterInfo & MRI, unsigned dword_offset) const;
|
|
||||||
|
|
||||||
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
|
|
||||||
/// LowerROTL - Lower ROTL opcode to BITALIGN
|
|
||||||
SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
|
|
||||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
|
|
||||||
bool isZero(SDValue Op) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm;
|
|
||||||
|
|
||||||
#endif // R600ISELLOWERING_H
|
|
||||||
|
|
@ -1,512 +0,0 @@
|
||||||
//===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// R600 Implementation of TargetInstrInfo.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "R600InstrInfo.h"
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
#include "AMDGPUSubtarget.h"
|
|
||||||
#include "R600Defines.h"
|
|
||||||
#include "R600RegisterInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "AMDILUtilityFunctions.h"
|
|
||||||
|
|
||||||
#define GET_INSTRINFO_CTOR
|
|
||||||
#include "AMDGPUGenDFAPacketizer.inc"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
|
|
||||||
: AMDGPUInstrInfo(tm),
|
|
||||||
RI(tm, *this),
|
|
||||||
TM(tm)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
|
|
||||||
{
|
|
||||||
return RI;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600InstrInfo::isTrig(const MachineInstr &MI) const
|
|
||||||
{
|
|
||||||
return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600InstrInfo::isVector(const MachineInstr &MI) const
|
|
||||||
{
|
|
||||||
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
|
||||||
unsigned DestReg, unsigned SrcReg,
|
|
||||||
bool KillSrc) const
|
|
||||||
{
|
|
||||||
if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
|
||||||
&& AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
|
|
||||||
for (unsigned I = 0; I < 4; I++) {
|
|
||||||
unsigned SubRegIndex = RI.getSubRegFromChannel(I);
|
|
||||||
BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
|
|
||||||
.addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
|
|
||||||
.addReg(RI.getSubReg(SrcReg, SubRegIndex))
|
|
||||||
.addImm(0) // Flag
|
|
||||||
.addReg(0) // PREDICATE_BIT
|
|
||||||
.addReg(DestReg, RegState::Define | RegState::Implicit);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
|
|
||||||
/* We can't copy vec4 registers */
|
|
||||||
assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
|
|
||||||
&& !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
|
|
||||||
|
|
||||||
BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
|
||||||
.addImm(0) // Flag
|
|
||||||
.addReg(0); // PREDICATE_BIT
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
|
|
||||||
unsigned DstReg, int64_t Imm) const
|
|
||||||
{
|
|
||||||
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
|
|
||||||
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
|
||||||
MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
|
|
||||||
MachineInstrBuilder(MI).addImm(Imm);
|
|
||||||
MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
|
|
||||||
|
|
||||||
return MI;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned R600InstrInfo::getIEQOpcode() const
|
|
||||||
{
|
|
||||||
return AMDGPU::SETE_INT;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600InstrInfo::isMov(unsigned Opcode) const
|
|
||||||
{
|
|
||||||
|
|
||||||
|
|
||||||
switch(Opcode) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::MOV:
|
|
||||||
case AMDGPU::MOV_IMM_F32:
|
|
||||||
case AMDGPU::MOV_IMM_I32:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Some instructions act as place holders to emulate operations that the GPU
|
|
||||||
// hardware does automatically. This function can be used to check if
|
|
||||||
// an opcode falls into this category.
|
|
||||||
bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
|
|
||||||
{
|
|
||||||
switch (Opcode) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::RETURN:
|
|
||||||
case AMDGPU::MASK_WRITE:
|
|
||||||
case AMDGPU::RESERVE_REG:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600InstrInfo::isReductionOp(unsigned Opcode) const
|
|
||||||
{
|
|
||||||
switch(Opcode) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::DOT4_r600:
|
|
||||||
case AMDGPU::DOT4_eg:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool R600InstrInfo::isCubeOp(unsigned Opcode) const
|
|
||||||
{
|
|
||||||
switch(Opcode) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::CUBE_r600_pseudo:
|
|
||||||
case AMDGPU::CUBE_r600_real:
|
|
||||||
case AMDGPU::CUBE_eg_pseudo:
|
|
||||||
case AMDGPU::CUBE_eg_real:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
|
|
||||||
const ScheduleDAG *DAG) const
|
|
||||||
{
|
|
||||||
const InstrItineraryData *II = TM->getInstrItineraryData();
|
|
||||||
return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
isPredicateSetter(unsigned Opcode)
|
|
||||||
{
|
|
||||||
switch (Opcode) {
|
|
||||||
case AMDGPU::PRED_X:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static MachineInstr *
|
|
||||||
findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator I)
|
|
||||||
{
|
|
||||||
while (I != MBB.begin()) {
|
|
||||||
--I;
|
|
||||||
MachineInstr *MI = I;
|
|
||||||
if (isPredicateSetter(MI->getOpcode()))
|
|
||||||
return MI;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock *&TBB,
|
|
||||||
MachineBasicBlock *&FBB,
|
|
||||||
SmallVectorImpl<MachineOperand> &Cond,
|
|
||||||
bool AllowModify) const
|
|
||||||
{
|
|
||||||
// Most of the following comes from the ARM implementation of AnalyzeBranch
|
|
||||||
|
|
||||||
// If the block has no terminators, it just falls into the block after it.
|
|
||||||
MachineBasicBlock::iterator I = MBB.end();
|
|
||||||
if (I == MBB.begin())
|
|
||||||
return false;
|
|
||||||
--I;
|
|
||||||
while (I->isDebugValue()) {
|
|
||||||
if (I == MBB.begin())
|
|
||||||
return false;
|
|
||||||
--I;
|
|
||||||
}
|
|
||||||
if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the last instruction in the block.
|
|
||||||
MachineInstr *LastInst = I;
|
|
||||||
|
|
||||||
// If there is only one terminator instruction, process it.
|
|
||||||
unsigned LastOpc = LastInst->getOpcode();
|
|
||||||
if (I == MBB.begin() ||
|
|
||||||
static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
|
|
||||||
if (LastOpc == AMDGPU::JUMP) {
|
|
||||||
if(!isPredicated(LastInst)) {
|
|
||||||
TBB = LastInst->getOperand(0).getMBB();
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
MachineInstr *predSet = I;
|
|
||||||
while (!isPredicateSetter(predSet->getOpcode())) {
|
|
||||||
predSet = --I;
|
|
||||||
}
|
|
||||||
TBB = LastInst->getOperand(0).getMBB();
|
|
||||||
Cond.push_back(predSet->getOperand(1));
|
|
||||||
Cond.push_back(predSet->getOperand(2));
|
|
||||||
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true; // Can't handle indirect branch.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the instruction before it if it is a terminator.
|
|
||||||
MachineInstr *SecondLastInst = I;
|
|
||||||
unsigned SecondLastOpc = SecondLastInst->getOpcode();
|
|
||||||
|
|
||||||
// If the block ends with a B and a Bcc, handle it.
|
|
||||||
if (SecondLastOpc == AMDGPU::JUMP &&
|
|
||||||
isPredicated(SecondLastInst) &&
|
|
||||||
LastOpc == AMDGPU::JUMP &&
|
|
||||||
!isPredicated(LastInst)) {
|
|
||||||
MachineInstr *predSet = --I;
|
|
||||||
while (!isPredicateSetter(predSet->getOpcode())) {
|
|
||||||
predSet = --I;
|
|
||||||
}
|
|
||||||
TBB = SecondLastInst->getOperand(0).getMBB();
|
|
||||||
FBB = LastInst->getOperand(0).getMBB();
|
|
||||||
Cond.push_back(predSet->getOperand(1));
|
|
||||||
Cond.push_back(predSet->getOperand(2));
|
|
||||||
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Otherwise, can't handle this.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
|
|
||||||
const MachineInstr *MI = op.getParent();
|
|
||||||
|
|
||||||
switch (MI->getDesc().OpInfo->RegClass) {
|
|
||||||
default: // FIXME: fallthrough??
|
|
||||||
case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
|
|
||||||
case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned
|
|
||||||
R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock *TBB,
|
|
||||||
MachineBasicBlock *FBB,
|
|
||||||
const SmallVectorImpl<MachineOperand> &Cond,
|
|
||||||
DebugLoc DL) const
|
|
||||||
{
|
|
||||||
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
|
||||||
|
|
||||||
if (FBB == 0) {
|
|
||||||
if (Cond.empty()) {
|
|
||||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
|
|
||||||
return 1;
|
|
||||||
} else {
|
|
||||||
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
|
|
||||||
assert(PredSet && "No previous predicate !");
|
|
||||||
addFlag(PredSet, 1, MO_FLAG_PUSH);
|
|
||||||
PredSet->getOperand(2).setImm(Cond[1].getImm());
|
|
||||||
|
|
||||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
|
|
||||||
.addMBB(TBB)
|
|
||||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
|
|
||||||
assert(PredSet && "No previous predicate !");
|
|
||||||
addFlag(PredSet, 1, MO_FLAG_PUSH);
|
|
||||||
PredSet->getOperand(2).setImm(Cond[1].getImm());
|
|
||||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
|
|
||||||
.addMBB(TBB)
|
|
||||||
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
|
|
||||||
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned
|
|
||||||
R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
|
|
||||||
{
|
|
||||||
|
|
||||||
// Note : we leave PRED* instructions there.
|
|
||||||
// They may be needed when predicating instructions.
|
|
||||||
|
|
||||||
MachineBasicBlock::iterator I = MBB.end();
|
|
||||||
|
|
||||||
if (I == MBB.begin()) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
--I;
|
|
||||||
switch (I->getOpcode()) {
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
case AMDGPU::JUMP:
|
|
||||||
if (isPredicated(I)) {
|
|
||||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
|
||||||
clearFlag(predSet, 1, MO_FLAG_PUSH);
|
|
||||||
}
|
|
||||||
I->eraseFromParent();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
I = MBB.end();
|
|
||||||
|
|
||||||
if (I == MBB.begin()) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
--I;
|
|
||||||
switch (I->getOpcode()) {
|
|
||||||
// FIXME: only one case??
|
|
||||||
default:
|
|
||||||
return 1;
|
|
||||||
case AMDGPU::JUMP:
|
|
||||||
if (isPredicated(I)) {
|
|
||||||
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
|
|
||||||
clearFlag(predSet, 1, MO_FLAG_PUSH);
|
|
||||||
}
|
|
||||||
I->eraseFromParent();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::isPredicated(const MachineInstr *MI) const
|
|
||||||
{
|
|
||||||
int idx = MI->findFirstPredOperandIdx();
|
|
||||||
if (idx < 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
unsigned Reg = MI->getOperand(idx).getReg();
|
|
||||||
switch (Reg) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::PRED_SEL_ONE:
|
|
||||||
case AMDGPU::PRED_SEL_ZERO:
|
|
||||||
case AMDGPU::PREDICATE_BIT:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::isPredicable(MachineInstr *MI) const
|
|
||||||
{
|
|
||||||
return AMDGPUInstrInfo::isPredicable(MI);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
|
||||||
unsigned NumCyles,
|
|
||||||
unsigned ExtraPredCycles,
|
|
||||||
const BranchProbability &Probability) const{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
|
||||||
unsigned NumTCycles,
|
|
||||||
unsigned ExtraTCycles,
|
|
||||||
MachineBasicBlock &FMBB,
|
|
||||||
unsigned NumFCycles,
|
|
||||||
unsigned ExtraFCycles,
|
|
||||||
const BranchProbability &Probability) const
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
|
|
||||||
unsigned NumCyles,
|
|
||||||
const BranchProbability &Probability)
|
|
||||||
const
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
|
|
||||||
MachineBasicBlock &FMBB) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
|
|
||||||
{
|
|
||||||
MachineOperand &MO = Cond[1];
|
|
||||||
switch (MO.getImm()) {
|
|
||||||
case OPCODE_IS_ZERO_INT:
|
|
||||||
MO.setImm(OPCODE_IS_NOT_ZERO_INT);
|
|
||||||
break;
|
|
||||||
case OPCODE_IS_NOT_ZERO_INT:
|
|
||||||
MO.setImm(OPCODE_IS_ZERO_INT);
|
|
||||||
break;
|
|
||||||
case OPCODE_IS_ZERO:
|
|
||||||
MO.setImm(OPCODE_IS_NOT_ZERO);
|
|
||||||
break;
|
|
||||||
case OPCODE_IS_NOT_ZERO:
|
|
||||||
MO.setImm(OPCODE_IS_ZERO);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineOperand &MO2 = Cond[2];
|
|
||||||
switch (MO2.getReg()) {
|
|
||||||
case AMDGPU::PRED_SEL_ZERO:
|
|
||||||
MO2.setReg(AMDGPU::PRED_SEL_ONE);
|
|
||||||
break;
|
|
||||||
case AMDGPU::PRED_SEL_ONE:
|
|
||||||
MO2.setReg(AMDGPU::PRED_SEL_ZERO);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::DefinesPredicate(MachineInstr *MI,
|
|
||||||
std::vector<MachineOperand> &Pred) const
|
|
||||||
{
|
|
||||||
return isPredicateSetter(MI->getOpcode());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
|
||||||
const SmallVectorImpl<MachineOperand> &Pred2) const
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600InstrInfo::PredicateInstruction(MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<MachineOperand> &Pred) const
|
|
||||||
{
|
|
||||||
int PIdx = MI->findFirstPredOperandIdx();
|
|
||||||
|
|
||||||
if (PIdx != -1) {
|
|
||||||
MachineOperand &PMO = MI->getOperand(PIdx);
|
|
||||||
PMO.setReg(Pred[2].getReg());
|
|
||||||
MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
|
||||||
const MachineInstr *MI,
|
|
||||||
unsigned *PredCost) const
|
|
||||||
{
|
|
||||||
if (PredCost)
|
|
||||||
*PredCost = 2;
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Instruction flag getters/setters
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
|
|
||||||
{
|
|
||||||
return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
|
|
||||||
{
|
|
||||||
unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
|
|
||||||
assert(FlagIndex != 0 &&
|
|
||||||
"Instruction flags not supported for this instruction");
|
|
||||||
MachineOperand &FlagOp = MI->getOperand(FlagIndex);
|
|
||||||
assert(FlagOp.isImm());
|
|
||||||
return FlagOp;
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
|
|
||||||
unsigned Flag) const
|
|
||||||
{
|
|
||||||
MachineOperand &FlagOp = getFlagOp(MI);
|
|
||||||
FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
|
|
||||||
}
|
|
||||||
|
|
||||||
void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
|
|
||||||
unsigned Flag) const
|
|
||||||
{
|
|
||||||
MachineOperand &FlagOp = getFlagOp(MI);
|
|
||||||
unsigned InstFlags = FlagOp.getImm();
|
|
||||||
InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
|
|
||||||
FlagOp.setImm(InstFlags);
|
|
||||||
}
|
|
||||||
|
|
@ -1,132 +0,0 @@
|
||||||
//===-- R600InstrInfo.h - R600 Instruction Info Interface -------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface definition for R600InstrInfo
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef R600INSTRUCTIONINFO_H_
|
|
||||||
#define R600INSTRUCTIONINFO_H_
|
|
||||||
|
|
||||||
#include "AMDIL.h"
|
|
||||||
#include "AMDGPUInstrInfo.h"
|
|
||||||
#include "R600RegisterInfo.h"
|
|
||||||
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUTargetMachine;
|
|
||||||
class DFAPacketizer;
|
|
||||||
class ScheduleDAG;
|
|
||||||
class MachineFunction;
|
|
||||||
class MachineInstr;
|
|
||||||
class MachineInstrBuilder;
|
|
||||||
|
|
||||||
class R600InstrInfo : public AMDGPUInstrInfo {
|
|
||||||
private:
|
|
||||||
const R600RegisterInfo RI;
|
|
||||||
AMDGPUTargetMachine &TM;
|
|
||||||
|
|
||||||
int getBranchInstr(const MachineOperand &op) const;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
|
|
||||||
|
|
||||||
const R600RegisterInfo &getRegisterInfo() const;
|
|
||||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
|
||||||
unsigned DestReg, unsigned SrcReg,
|
|
||||||
bool KillSrc) const;
|
|
||||||
|
|
||||||
bool isTrig(const MachineInstr &MI) const;
|
|
||||||
bool isPlaceHolderOpcode(unsigned opcode) const;
|
|
||||||
bool isReductionOp(unsigned opcode) const;
|
|
||||||
bool isCubeOp(unsigned opcode) const;
|
|
||||||
|
|
||||||
/// isVector - Vector instructions are instructions that must fill all
|
|
||||||
/// instruction slots within an instruction group.
|
|
||||||
bool isVector(const MachineInstr &MI) const;
|
|
||||||
|
|
||||||
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
|
||||||
int64_t Imm) const;
|
|
||||||
|
|
||||||
virtual unsigned getIEQOpcode() const;
|
|
||||||
virtual bool isMov(unsigned Opcode) const;
|
|
||||||
|
|
||||||
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
|
|
||||||
const ScheduleDAG *DAG) const;
|
|
||||||
|
|
||||||
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
|
|
||||||
|
|
||||||
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
|
|
||||||
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
|
|
||||||
|
|
||||||
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
|
|
||||||
|
|
||||||
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
|
||||||
|
|
||||||
bool isPredicated(const MachineInstr *MI) const;
|
|
||||||
|
|
||||||
bool isPredicable(MachineInstr *MI) const;
|
|
||||||
|
|
||||||
bool
|
|
||||||
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
|
|
||||||
const BranchProbability &Probability) const;
|
|
||||||
|
|
||||||
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
|
|
||||||
unsigned ExtraPredCycles,
|
|
||||||
const BranchProbability &Probability) const ;
|
|
||||||
|
|
||||||
bool
|
|
||||||
isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
|
||||||
unsigned NumTCycles, unsigned ExtraTCycles,
|
|
||||||
MachineBasicBlock &FMBB,
|
|
||||||
unsigned NumFCycles, unsigned ExtraFCycles,
|
|
||||||
const BranchProbability &Probability) const;
|
|
||||||
|
|
||||||
bool DefinesPredicate(MachineInstr *MI,
|
|
||||||
std::vector<MachineOperand> &Pred) const;
|
|
||||||
|
|
||||||
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
|
||||||
const SmallVectorImpl<MachineOperand> &Pred2) const;
|
|
||||||
|
|
||||||
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
|
|
||||||
MachineBasicBlock &FMBB) const;
|
|
||||||
|
|
||||||
bool PredicateInstruction(MachineInstr *MI,
|
|
||||||
const SmallVectorImpl<MachineOperand> &Pred) const;
|
|
||||||
|
|
||||||
int getInstrLatency(const InstrItineraryData *ItinData,
|
|
||||||
const MachineInstr *MI,
|
|
||||||
unsigned *PredCost = 0) const;
|
|
||||||
|
|
||||||
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
|
||||||
SDNode *Node) const { return 1;}
|
|
||||||
|
|
||||||
///hasFlagOperand - Returns true if this instruction has an operand for
|
|
||||||
/// storing target flags.
|
|
||||||
bool hasFlagOperand(const MachineInstr &MI) const;
|
|
||||||
|
|
||||||
///addFlag - Add one of the MO_FLAG* flags to the specified Operand.
|
|
||||||
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
|
|
||||||
|
|
||||||
///isFlagSet - Determine if the specified flag is set on this Operand.
|
|
||||||
bool isFlagSet(const MachineInstr &MI, unsigned Operand, unsigned Flag) const;
|
|
||||||
|
|
||||||
///getFlagOp - Return the operand containing the flags for this instruction.
|
|
||||||
MachineOperand &getFlagOp(MachineInstr *MI) const;
|
|
||||||
|
|
||||||
///clearFlag - Clear the specified flag on the instruction.
|
|
||||||
void clearFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End llvm namespace
|
|
||||||
|
|
||||||
#endif // R600INSTRINFO_H_
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,50 +0,0 @@
|
||||||
//===-- R600Intrinsics.td - R600 Instrinsic defs -------*- tablegen -*-----===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// R600 Intrinsic Definitions
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
let TargetPrefix = "R600", isTarget = 1 in {
|
|
||||||
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_R600_load_input_perspective :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_constant :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_linear :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_position :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_face :
|
|
||||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let TargetPrefix = "r600", isTarget = 1 in {
|
|
||||||
|
|
||||||
class R600ReadPreloadRegisterIntrinsic<string name>
|
|
||||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
|
||||||
GCCBuiltin<name>;
|
|
||||||
|
|
||||||
multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
|
|
||||||
def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
|
|
||||||
def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
|
|
||||||
def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
|
|
||||||
}
|
|
||||||
|
|
||||||
defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
|
|
||||||
"__builtin_r600_read_global_size">;
|
|
||||||
defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
|
|
||||||
"__builtin_r600_read_local_size">;
|
|
||||||
defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
|
|
||||||
"__builtin_r600_read_ngroups">;
|
|
||||||
defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
|
|
||||||
"__builtin_r600_read_tgid">;
|
|
||||||
defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
|
|
||||||
"__builtin_r600_read_tidig">;
|
|
||||||
} // End TargetPrefix = "r600"
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
||||||
//===-- R600Intrinsics.td - TODO: Add brief description -------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// TODO: Add full description
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
let TargetPrefix = "R600", isTarget = 1 in {
|
|
||||||
def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
def int_R600_load_input_perspective :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_constant :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_linear :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_position :
|
|
||||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
def int_R600_load_input_face :
|
|
||||||
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
}
|
|
||||||
|
|
@ -1,33 +0,0 @@
|
||||||
//===-- R600MachineFunctionInfo.cpp - R600 Machine Function Info-*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "R600MachineFunctionInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
|
|
||||||
: MachineFunctionInfo(),
|
|
||||||
HasLinearInterpolation(false),
|
|
||||||
HasPerspectiveInterpolation(false)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
|
|
||||||
{
|
|
||||||
assert(HasPerspectiveInterpolation);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
|
|
||||||
{
|
|
||||||
assert(HasLinearInterpolation);
|
|
||||||
if (HasPerspectiveInterpolation)
|
|
||||||
return 1;
|
|
||||||
else
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
||||||
//===-- R600MachineFunctionInfo.h - R600 Machine Function Info ----*- C++ -*-=//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// R600MachineFunctionInfo is used for keeping track of which registers have
|
|
||||||
// been reserved by the llvm.AMDGPU.reserve.reg intrinsic.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef R600MACHINEFUNCTIONINFO_H
|
|
||||||
#define R600MACHINEFUNCTIONINFO_H
|
|
||||||
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class R600MachineFunctionInfo : public MachineFunctionInfo {
|
|
||||||
|
|
||||||
public:
|
|
||||||
R600MachineFunctionInfo(const MachineFunction &MF);
|
|
||||||
std::vector<unsigned> ReservedRegs;
|
|
||||||
bool HasLinearInterpolation;
|
|
||||||
bool HasPerspectiveInterpolation;
|
|
||||||
|
|
||||||
unsigned GetIJLinearIndex() const;
|
|
||||||
unsigned GetIJPerspectiveIndex() const;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End llvm namespace
|
|
||||||
|
|
||||||
#endif //R600MACHINEFUNCTIONINFO_H
|
|
||||||
|
|
@ -1,128 +0,0 @@
|
||||||
//===-- R600RegisterInfo.cpp - R600 Register Information ------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// The file contains the R600 implementation of the TargetRegisterInfo class.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "R600RegisterInfo.h"
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
#include "R600MachineFunctionInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm,
|
|
||||||
const TargetInstrInfo &tii)
|
|
||||||
: AMDGPURegisterInfo(tm, tii),
|
|
||||||
TM(tm),
|
|
||||||
TII(tii)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
|
||||||
{
|
|
||||||
BitVector Reserved(getNumRegs());
|
|
||||||
const R600MachineFunctionInfo * MFI = MF.getInfo<R600MachineFunctionInfo>();
|
|
||||||
|
|
||||||
Reserved.set(AMDGPU::ZERO);
|
|
||||||
Reserved.set(AMDGPU::HALF);
|
|
||||||
Reserved.set(AMDGPU::ONE);
|
|
||||||
Reserved.set(AMDGPU::ONE_INT);
|
|
||||||
Reserved.set(AMDGPU::NEG_HALF);
|
|
||||||
Reserved.set(AMDGPU::NEG_ONE);
|
|
||||||
Reserved.set(AMDGPU::PV_X);
|
|
||||||
Reserved.set(AMDGPU::ALU_LITERAL_X);
|
|
||||||
Reserved.set(AMDGPU::PREDICATE_BIT);
|
|
||||||
Reserved.set(AMDGPU::PRED_SEL_OFF);
|
|
||||||
Reserved.set(AMDGPU::PRED_SEL_ZERO);
|
|
||||||
Reserved.set(AMDGPU::PRED_SEL_ONE);
|
|
||||||
|
|
||||||
for (TargetRegisterClass::iterator I = AMDGPU::R600_CReg32RegClass.begin(),
|
|
||||||
E = AMDGPU::R600_CReg32RegClass.end(); I != E; ++I) {
|
|
||||||
Reserved.set(*I);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (std::vector<unsigned>::const_iterator I = MFI->ReservedRegs.begin(),
|
|
||||||
E = MFI->ReservedRegs.end(); I != E; ++I) {
|
|
||||||
Reserved.set(*I);
|
|
||||||
Reserved.set(*(getSuperRegisters(*I)));
|
|
||||||
}
|
|
||||||
|
|
||||||
return Reserved;
|
|
||||||
}
|
|
||||||
|
|
||||||
const TargetRegisterClass *
|
|
||||||
R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
|
|
||||||
{
|
|
||||||
switch (rc->getID()) {
|
|
||||||
case AMDGPU::GPRF32RegClassID:
|
|
||||||
case AMDGPU::GPRI32RegClassID:
|
|
||||||
return &AMDGPU::R600_Reg32RegClass;
|
|
||||||
default: return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned R600RegisterInfo::getHWRegIndex(unsigned reg) const
|
|
||||||
{
|
|
||||||
switch(reg) {
|
|
||||||
case AMDGPU::ZERO: return 248;
|
|
||||||
case AMDGPU::ONE:
|
|
||||||
case AMDGPU::NEG_ONE: return 249;
|
|
||||||
case AMDGPU::ONE_INT: return 250;
|
|
||||||
case AMDGPU::HALF:
|
|
||||||
case AMDGPU::NEG_HALF: return 252;
|
|
||||||
case AMDGPU::ALU_LITERAL_X: return 253;
|
|
||||||
case AMDGPU::PREDICATE_BIT:
|
|
||||||
case AMDGPU::PRED_SEL_OFF:
|
|
||||||
case AMDGPU::PRED_SEL_ZERO:
|
|
||||||
case AMDGPU::PRED_SEL_ONE:
|
|
||||||
return 0;
|
|
||||||
default: return getHWRegIndexGen(reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const
|
|
||||||
{
|
|
||||||
switch(reg) {
|
|
||||||
case AMDGPU::ZERO:
|
|
||||||
case AMDGPU::ONE:
|
|
||||||
case AMDGPU::ONE_INT:
|
|
||||||
case AMDGPU::NEG_ONE:
|
|
||||||
case AMDGPU::HALF:
|
|
||||||
case AMDGPU::NEG_HALF:
|
|
||||||
case AMDGPU::ALU_LITERAL_X:
|
|
||||||
case AMDGPU::PREDICATE_BIT:
|
|
||||||
case AMDGPU::PRED_SEL_OFF:
|
|
||||||
case AMDGPU::PRED_SEL_ZERO:
|
|
||||||
case AMDGPU::PRED_SEL_ONE:
|
|
||||||
return 0;
|
|
||||||
default: return getHWRegChanGen(reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const TargetRegisterClass * R600RegisterInfo::getCFGStructurizerRegClass(
|
|
||||||
MVT VT) const
|
|
||||||
{
|
|
||||||
switch(VT.SimpleTy) {
|
|
||||||
default:
|
|
||||||
case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) const
|
|
||||||
{
|
|
||||||
switch (Channel) {
|
|
||||||
default: assert(!"Invalid channel index"); return 0;
|
|
||||||
case 0: return AMDGPU::sel_x;
|
|
||||||
case 1: return AMDGPU::sel_y;
|
|
||||||
case 2: return AMDGPU::sel_z;
|
|
||||||
case 3: return AMDGPU::sel_w;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "R600HwRegInfo.include"
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
//===-- R600RegisterInfo.h - R600 Register Info Interface ------*- C++ -*--===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface definition for R600RegisterInfo
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef R600REGISTERINFO_H_
|
|
||||||
#define R600REGISTERINFO_H_
|
|
||||||
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
#include "AMDGPURegisterInfo.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class R600TargetMachine;
|
|
||||||
class TargetInstrInfo;
|
|
||||||
|
|
||||||
struct R600RegisterInfo : public AMDGPURegisterInfo
|
|
||||||
{
|
|
||||||
AMDGPUTargetMachine &TM;
|
|
||||||
const TargetInstrInfo &TII;
|
|
||||||
|
|
||||||
R600RegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
|
||||||
|
|
||||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
|
||||||
|
|
||||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
|
||||||
/// R600 reg class that is equivalent to the given AMDIL reg class.
|
|
||||||
virtual const TargetRegisterClass * getISARegClass(
|
|
||||||
const TargetRegisterClass * rc) const;
|
|
||||||
|
|
||||||
/// getHWRegIndex - get the HW encoding for a register.
|
|
||||||
unsigned getHWRegIndex(unsigned reg) const;
|
|
||||||
|
|
||||||
/// getHWRegChan - get the HW encoding for a register's channel.
|
|
||||||
unsigned getHWRegChan(unsigned reg) const;
|
|
||||||
|
|
||||||
/// getCFGStructurizerRegClass - get the register class of the specified
|
|
||||||
/// type to use in the CFGStructurizer
|
|
||||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
|
||||||
|
|
||||||
/// getSubRegFromChannel - Return the sub reg enum value for the given
|
|
||||||
/// Channel (e.g. getSubRegFromChannel(0) -> AMDGPU::sel_x)
|
|
||||||
unsigned getSubRegFromChannel(unsigned Channel) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
/// getHWRegIndexGen - Generated function returns a register's encoding
|
|
||||||
unsigned getHWRegIndexGen(unsigned reg) const;
|
|
||||||
/// getHWRegChanGen - Generated function returns a register's channel
|
|
||||||
/// encoding.
|
|
||||||
unsigned getHWRegChanGen(unsigned reg) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // AMDIDSAREGISTERINFO_H_
|
|
||||||
|
|
@ -1,36 +0,0 @@
|
||||||
//===-- R600Schedule.td - R600 Scheduling definitions ------*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// R600 has a VLIW architecture. On pre-cayman cards there are 5 instruction
|
|
||||||
// slots ALU.X, ALU.Y, ALU.Z, ALU.W, and TRANS. For cayman cards, the TRANS
|
|
||||||
// slot has been removed.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
def ALU_X : FuncUnit;
|
|
||||||
def ALU_Y : FuncUnit;
|
|
||||||
def ALU_Z : FuncUnit;
|
|
||||||
def ALU_W : FuncUnit;
|
|
||||||
def TRANS : FuncUnit;
|
|
||||||
|
|
||||||
def AnyALU : InstrItinClass;
|
|
||||||
def VecALU : InstrItinClass;
|
|
||||||
def TransALU : InstrItinClass;
|
|
||||||
|
|
||||||
def R600_EG_Itin : ProcessorItineraries <
|
|
||||||
[ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS, ALU_NULL],
|
|
||||||
[],
|
|
||||||
[
|
|
||||||
InstrItinData<AnyALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_Z, ALU_W, TRANS]>]>,
|
|
||||||
InstrItinData<VecALU, [InstrStage<1, [ALU_X, ALU_Y, ALU_X, ALU_W]>]>,
|
|
||||||
InstrItinData<TransALU, [InstrStage<1, [TRANS]>]>,
|
|
||||||
InstrItinData<NullALU, [InstrStage<1, [ALU_NULL]>]>
|
|
||||||
]
|
|
||||||
>;
|
|
||||||
|
|
@ -1,151 +0,0 @@
|
||||||
//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This pass maps the pseudo interpolation registers to the correct physical
|
|
||||||
// registers. Prior to executing a fragment shader, the GPU loads interpolation
|
|
||||||
// parameters into physical registers. The specific physical register that each
|
|
||||||
// interpolation parameter ends up in depends on the type of the interpolation
|
|
||||||
// parameter as well as how many interpolation parameters are used by the
|
|
||||||
// shader.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "AMDIL.h"
|
|
||||||
#include "SIMachineFunctionInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class SIAssignInterpRegsPass : public MachineFunctionPass {
|
|
||||||
|
|
||||||
private:
|
|
||||||
static char ID;
|
|
||||||
TargetMachine &TM;
|
|
||||||
|
|
||||||
void addLiveIn(MachineFunction * MF, MachineRegisterInfo & MRI,
|
|
||||||
unsigned physReg, unsigned virtReg);
|
|
||||||
|
|
||||||
public:
|
|
||||||
SIAssignInterpRegsPass(TargetMachine &tm) :
|
|
||||||
MachineFunctionPass(ID), TM(tm) { }
|
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
||||||
|
|
||||||
const char *getPassName() const { return "SI Assign intrpolation registers"; }
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
char SIAssignInterpRegsPass::ID = 0;
|
|
||||||
|
|
||||||
#define INTERP_VALUES 16
|
|
||||||
#define REQUIRED_VALUE_MAX_INDEX 7
|
|
||||||
|
|
||||||
struct InterpInfo {
|
|
||||||
bool Enabled;
|
|
||||||
unsigned Regs[3];
|
|
||||||
unsigned RegCount;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
|
|
||||||
return new SIAssignInterpRegsPass(tm);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF)
|
|
||||||
{
|
|
||||||
|
|
||||||
struct InterpInfo InterpUse[INTERP_VALUES] = {
|
|
||||||
{false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
|
|
||||||
{false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
|
|
||||||
{false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
|
|
||||||
{false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
|
|
||||||
{false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
|
|
||||||
{false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
|
|
||||||
{false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
|
|
||||||
{false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
|
|
||||||
{false, {AMDGPU::POS_X_FLOAT}, 1},
|
|
||||||
{false, {AMDGPU::POS_Y_FLOAT}, 1},
|
|
||||||
{false, {AMDGPU::POS_Z_FLOAT}, 1},
|
|
||||||
{false, {AMDGPU::POS_W_FLOAT}, 1},
|
|
||||||
{false, {AMDGPU::FRONT_FACE}, 1},
|
|
||||||
{false, {AMDGPU::ANCILLARY}, 1},
|
|
||||||
{false, {AMDGPU::SAMPLE_COVERAGE}, 1},
|
|
||||||
{false, {AMDGPU::POS_FIXED_PT}, 1}
|
|
||||||
};
|
|
||||||
|
|
||||||
SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
||||||
// This pass is only needed for pixel shaders.
|
|
||||||
if (MFI->ShaderType != ShaderType::PIXEL) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
|
||||||
bool ForceEnable = true;
|
|
||||||
|
|
||||||
// First pass, mark the interpolation values that are used.
|
|
||||||
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
|
|
||||||
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
|
|
||||||
RegIdx++) {
|
|
||||||
InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
|
|
||||||
!MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
|
|
||||||
if (InterpUse[InterpIdx].Enabled &&
|
|
||||||
InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
|
|
||||||
ForceEnable = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// At least one interpolation mode must be enabled or else the GPU will hang.
|
|
||||||
if (ForceEnable) {
|
|
||||||
InterpUse[0].Enabled = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned UsedVgprs = 0;
|
|
||||||
|
|
||||||
// Second pass, replace with VGPRs.
|
|
||||||
for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
|
|
||||||
if (!InterpUse[InterpIdx].Enabled) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
MFI->SPIPSInputAddr |= (1 << InterpIdx);
|
|
||||||
|
|
||||||
for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
|
|
||||||
RegIdx++, UsedVgprs++) {
|
|
||||||
unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
|
|
||||||
unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
|
||||||
MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
|
|
||||||
addLiveIn(&MF, MRI, NewReg, VirtReg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
|
|
||||||
MachineRegisterInfo & MRI,
|
|
||||||
unsigned physReg, unsigned virtReg)
|
|
||||||
{
|
|
||||||
const TargetInstrInfo * TII = TM.getInstrInfo();
|
|
||||||
if (!MRI.isLiveIn(physReg)) {
|
|
||||||
MRI.addLiveIn(physReg, virtReg);
|
|
||||||
MF->front().addLiveIn(physReg);
|
|
||||||
BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
|
|
||||||
TII->get(TargetOpcode::COPY), virtReg)
|
|
||||||
.addReg(physReg);
|
|
||||||
} else {
|
|
||||||
MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,291 +0,0 @@
|
||||||
#===-- SIGenRegisterInfo.pl - Script for generating register info files ----===#
|
|
||||||
#
|
|
||||||
# The LLVM Compiler Infrastructure
|
|
||||||
#
|
|
||||||
# This file is distributed under the University of Illinois Open Source
|
|
||||||
# License. See LICENSE.TXT for details.
|
|
||||||
#
|
|
||||||
#===------------------------------------------------------------------------===#
|
|
||||||
#
|
|
||||||
# This perl script prints to stdout .td code to be used as SIRegisterInfo.td
|
|
||||||
# it also generates a file called SIHwRegInfo.include, which contains helper
|
|
||||||
# functions for determining the hw encoding of registers.
|
|
||||||
#
|
|
||||||
#===------------------------------------------------------------------------===#
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
use warnings;
|
|
||||||
|
|
||||||
my $SGPR_COUNT = 104;
|
|
||||||
my $VGPR_COUNT = 256;
|
|
||||||
|
|
||||||
my $SGPR_MAX_IDX = $SGPR_COUNT - 1;
|
|
||||||
my $VGPR_MAX_IDX = $VGPR_COUNT - 1;
|
|
||||||
|
|
||||||
my $INDEX_FILE = defined($ARGV[0]) ? $ARGV[0] : '';
|
|
||||||
|
|
||||||
print <<STRING;
|
|
||||||
|
|
||||||
let Namespace = "AMDGPU" in {
|
|
||||||
def low : SubRegIndex;
|
|
||||||
def high : SubRegIndex;
|
|
||||||
|
|
||||||
def sub0 : SubRegIndex;
|
|
||||||
def sub1 : SubRegIndex;
|
|
||||||
def sub2 : SubRegIndex;
|
|
||||||
def sub3 : SubRegIndex;
|
|
||||||
def sub4 : SubRegIndex;
|
|
||||||
def sub5 : SubRegIndex;
|
|
||||||
def sub6 : SubRegIndex;
|
|
||||||
def sub7 : SubRegIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
class SIReg <string n> : Register<n> {
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
}
|
|
||||||
|
|
||||||
class SI_64 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
let SubRegIndices = [low, high];
|
|
||||||
}
|
|
||||||
|
|
||||||
class SI_128 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
let SubRegIndices = [sel_x, sel_y, sel_z, sel_w];
|
|
||||||
}
|
|
||||||
|
|
||||||
class SI_256 <string n, list<Register> subregs> : RegisterWithSubRegs<n, subregs> {
|
|
||||||
let Namespace = "AMDGPU";
|
|
||||||
let SubRegIndices = [sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7];
|
|
||||||
}
|
|
||||||
|
|
||||||
class SGPR_32 <bits<8> num, string name> : SIReg<name> {
|
|
||||||
field bits<8> Num;
|
|
||||||
|
|
||||||
let Num = num;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class VGPR_32 <bits<9> num, string name> : SIReg<name> {
|
|
||||||
field bits<9> Num;
|
|
||||||
|
|
||||||
let Num = num;
|
|
||||||
}
|
|
||||||
|
|
||||||
class SGPR_64 <bits<8> num, string name, list<Register> subregs> :
|
|
||||||
SI_64 <name, subregs>;
|
|
||||||
|
|
||||||
class VGPR_64 <bits<9> num, string name, list<Register> subregs> :
|
|
||||||
SI_64 <name, subregs>;
|
|
||||||
|
|
||||||
class SGPR_128 <bits<8> num, string name, list<Register> subregs> :
|
|
||||||
SI_128 <name, subregs>;
|
|
||||||
|
|
||||||
class VGPR_128 <bits<9> num, string name, list<Register> subregs> :
|
|
||||||
SI_128 <name, subregs>;
|
|
||||||
|
|
||||||
class SGPR_256 <bits<8> num, string name, list<Register> subregs> :
|
|
||||||
SI_256 <name, subregs>;
|
|
||||||
|
|
||||||
def VCC : SIReg<"VCC">;
|
|
||||||
def EXEC_LO : SIReg<"EXEC LO">;
|
|
||||||
def EXEC_HI : SIReg<"EXEC HI">;
|
|
||||||
def EXEC : SI_64<"EXEC", [EXEC_LO,EXEC_HI]>;
|
|
||||||
def SCC : SIReg<"SCC">;
|
|
||||||
def SREG_LIT_0 : SIReg <"S LIT 0">;
|
|
||||||
def SI_LITERAL_CONSTANT : SIReg<"LITERAL CONSTANT">;
|
|
||||||
|
|
||||||
def M0 : SIReg <"M0">;
|
|
||||||
|
|
||||||
//Interpolation registers
|
|
||||||
|
|
||||||
def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
|
|
||||||
def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
|
|
||||||
def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
|
|
||||||
def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
|
|
||||||
def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
|
|
||||||
def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
|
|
||||||
def PERSP_I_W : SIReg <"PERSP_I_W">;
|
|
||||||
def PERSP_J_W : SIReg <"PERSP_J_W">;
|
|
||||||
def PERSP_1_W : SIReg <"PERSP_1_W">;
|
|
||||||
def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
|
|
||||||
def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
|
|
||||||
def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
|
|
||||||
def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
|
|
||||||
def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
|
|
||||||
def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
|
|
||||||
def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
|
|
||||||
def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
|
|
||||||
def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
|
|
||||||
def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
|
|
||||||
def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
|
|
||||||
def FRONT_FACE : SIReg <"FRONT_FACE">;
|
|
||||||
def ANCILLARY : SIReg <"ANCILLARY">;
|
|
||||||
def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
|
|
||||||
def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
|
|
||||||
|
|
||||||
STRING
|
|
||||||
|
|
||||||
#32 bit register
|
|
||||||
|
|
||||||
my @SGPR;
|
|
||||||
for (my $i = 0; $i < $SGPR_COUNT; $i++) {
|
|
||||||
print "def SGPR$i : SGPR_32 <$i, \"SGPR$i\">;\n";
|
|
||||||
$SGPR[$i] = "SGPR$i";
|
|
||||||
}
|
|
||||||
|
|
||||||
my @VGPR;
|
|
||||||
for (my $i = 0; $i < $VGPR_COUNT; $i++) {
|
|
||||||
print "def VGPR$i : VGPR_32 <$i, \"VGPR$i\">;\n";
|
|
||||||
$VGPR[$i] = "VGPR$i";
|
|
||||||
}
|
|
||||||
|
|
||||||
print <<STRING;
|
|
||||||
|
|
||||||
def SReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
|
||||||
(add (sequence "SGPR%u", 0, $SGPR_MAX_IDX), SREG_LIT_0, M0, EXEC_LO, EXEC_HI)
|
|
||||||
>;
|
|
||||||
|
|
||||||
def VReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
|
||||||
(add (sequence "VGPR%u", 0, $VGPR_MAX_IDX),
|
|
||||||
PERSP_SAMPLE_I, PERSP_SAMPLE_J,
|
|
||||||
PERSP_CENTER_I, PERSP_CENTER_J,
|
|
||||||
PERSP_CENTROID_I, PERSP_CENTROID_J,
|
|
||||||
PERSP_I_W, PERSP_J_W, PERSP_1_W,
|
|
||||||
LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
|
|
||||||
LINEAR_CENTER_I, LINEAR_CENTER_J,
|
|
||||||
LINEAR_CENTROID_I, LINEAR_CENTROID_J,
|
|
||||||
LINE_STIPPLE_TEX_COORD,
|
|
||||||
POS_X_FLOAT,
|
|
||||||
POS_Y_FLOAT,
|
|
||||||
POS_Z_FLOAT,
|
|
||||||
POS_W_FLOAT,
|
|
||||||
FRONT_FACE,
|
|
||||||
ANCILLARY,
|
|
||||||
SAMPLE_COVERAGE,
|
|
||||||
POS_FIXED_PT
|
|
||||||
)
|
|
||||||
>;
|
|
||||||
|
|
||||||
def AllReg_32 : RegisterClass<"AMDGPU", [f32, i32], 32,
|
|
||||||
(add VReg_32, SReg_32)
|
|
||||||
>;
|
|
||||||
|
|
||||||
def SCCReg : RegisterClass<"AMDGPU", [i1], 1, (add SCC)>;
|
|
||||||
def VCCReg : RegisterClass<"AMDGPU", [i1], 1, (add VCC)>;
|
|
||||||
def EXECReg : RegisterClass<"AMDGPU", [i1], 1, (add EXEC)>;
|
|
||||||
def M0Reg : RegisterClass<"AMDGPU", [i32], 32, (add M0)>;
|
|
||||||
|
|
||||||
|
|
||||||
STRING
|
|
||||||
|
|
||||||
my @subregs_64 = ('low', 'high');
|
|
||||||
my @subregs_128 = ('sel_x', 'sel_y', 'sel_z', 'sel_w');
|
|
||||||
my @subregs_256 = ('sub0', 'sub1', 'sub2', 'sub3', 'sub4', 'sub5', 'sub6', 'sub7');
|
|
||||||
|
|
||||||
my @SGPR64 = print_sgpr_class(64, \@subregs_64, ('i64'));
|
|
||||||
my @SGPR128 = print_sgpr_class(128, \@subregs_128, ('v4f32', 'v4i32'));
|
|
||||||
my @SGPR256 = print_sgpr_class(256, \@subregs_256, ('v8i32'));
|
|
||||||
|
|
||||||
my @VGPR64 = print_vgpr_class(64, \@subregs_64, ('i64'));
|
|
||||||
my @VGPR128 = print_vgpr_class(128, \@subregs_128, ('v4f32'));
|
|
||||||
|
|
||||||
|
|
||||||
my $sgpr64_list = join(',', @SGPR64);
|
|
||||||
my $vgpr64_list = join(',', @VGPR64);
|
|
||||||
print <<STRING;
|
|
||||||
|
|
||||||
def AllReg_64 : RegisterClass<"AMDGPU", [f64, i64], 64,
|
|
||||||
(add $sgpr64_list, $vgpr64_list)
|
|
||||||
>;
|
|
||||||
|
|
||||||
STRING
|
|
||||||
|
|
||||||
if ($INDEX_FILE ne '') {
|
|
||||||
open(my $fh, ">", $INDEX_FILE);
|
|
||||||
my %hw_values;
|
|
||||||
|
|
||||||
for (my $i = 0; $i <= $#SGPR; $i++) {
|
|
||||||
push (@{$hw_values{$i}}, $SGPR[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (my $i = 0; $i <= $#SGPR64; $i++) {
|
|
||||||
push (@{$hw_values{$i * 2}}, $SGPR64[$i])
|
|
||||||
}
|
|
||||||
|
|
||||||
for (my $i = 0; $i <= $#SGPR128; $i++) {
|
|
||||||
push (@{$hw_values{$i * 4}}, $SGPR128[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (my $i = 0; $i <= $#SGPR256; $i++) {
|
|
||||||
push (@{$hw_values{$i * 8}}, $SGPR256[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (my $i = 0; $i <= $#VGPR; $i++) {
|
|
||||||
push (@{$hw_values{$i}}, $VGPR[$i]);
|
|
||||||
}
|
|
||||||
for (my $i = 0; $i <= $#VGPR64; $i++) {
|
|
||||||
push (@{$hw_values{$i * 2}}, $VGPR64[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (my $i = 0; $i <= $#VGPR128; $i++) {
|
|
||||||
push (@{$hw_values{$i * 4}}, $VGPR128[$i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
print $fh "unsigned SIRegisterInfo::getHWRegNum(unsigned reg) const\n{\n switch(reg) {\n";
|
|
||||||
for my $key (keys(%hw_values)) {
|
|
||||||
my @names = @{$hw_values{$key}};
|
|
||||||
for my $regname (@names) {
|
|
||||||
print $fh " case AMDGPU::$regname:\n"
|
|
||||||
}
|
|
||||||
print $fh " return $key;\n";
|
|
||||||
}
|
|
||||||
print $fh " default: assert(!\"Unknown Register\"); return 0;\n }\n}\n"
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
sub print_sgpr_class {
|
|
||||||
my ($reg_width, $sub_reg_ref, @types) = @_;
|
|
||||||
return print_reg_class('SReg', 'SGPR', $reg_width, $SGPR_COUNT, $sub_reg_ref, @types);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub print_vgpr_class {
|
|
||||||
my ($reg_width, $sub_reg_ref, @types) = @_;
|
|
||||||
return print_reg_class('VReg', 'VGPR', $reg_width, $VGPR_COUNT, $sub_reg_ref, @types);
|
|
||||||
}
|
|
||||||
|
|
||||||
sub print_reg_class {
|
|
||||||
my ($class_prefix, $reg_prefix, $reg_width, $reg_count, $sub_reg_ref, @types) = @_;
|
|
||||||
my @registers;
|
|
||||||
my $component_count = $reg_width / 32;
|
|
||||||
|
|
||||||
for (my $i = 0; $i < $reg_count; $i += $component_count) {
|
|
||||||
my $reg_name = $reg_prefix . $i . '_' . $reg_width;
|
|
||||||
my @sub_regs;
|
|
||||||
for (my $idx = 0; $idx < $component_count; $idx++) {
|
|
||||||
my $sub_idx = $i + $idx;
|
|
||||||
push(@sub_regs, $reg_prefix . $sub_idx);
|
|
||||||
}
|
|
||||||
print "def $reg_name : $reg_prefix\_$reg_width <$i, \"$reg_name\", [ ", join(',', @sub_regs) , "]>;\n";
|
|
||||||
push (@registers, $reg_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
#Add VCC to SReg_64
|
|
||||||
if ($class_prefix eq 'SReg' and $reg_width == 64) {
|
|
||||||
push (@registers, 'VCC')
|
|
||||||
}
|
|
||||||
|
|
||||||
#Add EXEC to SReg_64
|
|
||||||
if ($class_prefix eq 'SReg' and $reg_width == 64) {
|
|
||||||
push (@registers, 'EXEC')
|
|
||||||
}
|
|
||||||
|
|
||||||
my $reg_list = join(', ', @registers);
|
|
||||||
|
|
||||||
print "def $class_prefix\_$reg_width : RegisterClass<\"AMDGPU\", [" . join (', ', @types) . "], $reg_width,\n (add $reg_list)\n>{\n";
|
|
||||||
print " let SubRegClasses = [($class_prefix\_", ($reg_width / $component_count) , ' ', join(', ', @{$sub_reg_ref}), ")];\n}\n";
|
|
||||||
return @registers;
|
|
||||||
}
|
|
||||||
|
|
@ -1,466 +0,0 @@
|
||||||
//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Most of the DAG lowering is handled in AMDGPUISelLowering.cpp. This file is
|
|
||||||
// mostly EmitInstrWithCustomInserter().
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "SIISelLowering.h"
|
|
||||||
#include "AMDIL.h"
|
|
||||||
#include "AMDILIntrinsicInfo.h"
|
|
||||||
#include "SIInstrInfo.h"
|
|
||||||
#include "SIMachineFunctionInfo.h"
|
|
||||||
#include "SIRegisterInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
#include "llvm/CodeGen/SelectionDAG.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
SITargetLowering::SITargetLowering(TargetMachine &TM) :
|
|
||||||
AMDGPUTargetLowering(TM),
|
|
||||||
TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo()))
|
|
||||||
{
|
|
||||||
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
|
||||||
addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
|
|
||||||
addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
|
|
||||||
addRegisterClass(MVT::i64, &AMDGPU::VReg_64RegClass);
|
|
||||||
addRegisterClass(MVT::i1, &AMDGPU::SCCRegRegClass);
|
|
||||||
addRegisterClass(MVT::i1, &AMDGPU::VCCRegRegClass);
|
|
||||||
|
|
||||||
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
|
|
||||||
addRegisterClass(MVT::v8i32, &AMDGPU::SReg_256RegClass);
|
|
||||||
|
|
||||||
computeRegisterProperties();
|
|
||||||
|
|
||||||
setOperationAction(ISD::AND, MVT::i1, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::ADD, MVT::i64, Legal);
|
|
||||||
setOperationAction(ISD::ADD, MVT::i32, Legal);
|
|
||||||
|
|
||||||
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
|
||||||
|
|
||||||
// We need to custom lower loads from the USER_SGPR address space, so we can
|
|
||||||
// add the SGPRs as livein registers.
|
|
||||||
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
|
||||||
setOperationAction(ISD::LOAD, MVT::i64, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
|
||||||
|
|
||||||
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
|
||||||
setTargetDAGCombine(ISD::SELECT_CC);
|
|
||||||
|
|
||||||
setTargetDAGCombine(ISD::SETCC);
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
|
|
||||||
MachineInstr * MI, MachineBasicBlock * BB) const
|
|
||||||
{
|
|
||||||
const TargetInstrInfo * TII = getTargetMachine().getInstrInfo();
|
|
||||||
MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
|
|
||||||
MachineBasicBlock::iterator I = MI;
|
|
||||||
|
|
||||||
if (TII->get(MI->getOpcode()).TSFlags & SIInstrFlags::NEED_WAIT) {
|
|
||||||
AppendS_WAITCNT(MI, *BB, llvm::next(I));
|
|
||||||
return BB;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (MI->getOpcode()) {
|
|
||||||
default:
|
|
||||||
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
|
||||||
case AMDGPU::BRANCH: return BB;
|
|
||||||
case AMDGPU::CLAMP_SI:
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
// VSRC1-2 are unused, but we still need to fill all the
|
|
||||||
// operand slots, so we just reuse the VSRC0 operand
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0) // ABS
|
|
||||||
.addImm(1) // CLAMP
|
|
||||||
.addImm(0) // OMOD
|
|
||||||
.addImm(0); // NEG
|
|
||||||
MI->eraseFromParent();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case AMDGPU::FABS_SI:
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
// VSRC1-2 are unused, but we still need to fill all the
|
|
||||||
// operand slots, so we just reuse the VSRC0 operand
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(1) // ABS
|
|
||||||
.addImm(0) // CLAMP
|
|
||||||
.addImm(0) // OMOD
|
|
||||||
.addImm(0); // NEG
|
|
||||||
MI->eraseFromParent();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case AMDGPU::FNEG_SI:
|
|
||||||
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::V_MOV_B32_e64))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
// VSRC1-2 are unused, but we still need to fill all the
|
|
||||||
// operand slots, so we just reuse the VSRC0 operand
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addOperand(MI->getOperand(1))
|
|
||||||
.addImm(0) // ABS
|
|
||||||
.addImm(0) // CLAMP
|
|
||||||
.addImm(0) // OMOD
|
|
||||||
.addImm(1); // NEG
|
|
||||||
MI->eraseFromParent();
|
|
||||||
break;
|
|
||||||
case AMDGPU::SHADER_TYPE:
|
|
||||||
BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
|
|
||||||
MI->getOperand(0).getImm();
|
|
||||||
MI->eraseFromParent();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case AMDGPU::SI_INTERP:
|
|
||||||
LowerSI_INTERP(MI, *BB, I, MRI);
|
|
||||||
break;
|
|
||||||
case AMDGPU::SI_INTERP_CONST:
|
|
||||||
LowerSI_INTERP_CONST(MI, *BB, I, MRI);
|
|
||||||
break;
|
|
||||||
case AMDGPU::SI_KIL:
|
|
||||||
LowerSI_KIL(MI, *BB, I, MRI);
|
|
||||||
break;
|
|
||||||
case AMDGPU::SI_WQM:
|
|
||||||
LowerSI_WQM(MI, *BB, I, MRI);
|
|
||||||
break;
|
|
||||||
case AMDGPU::SI_V_CNDLT:
|
|
||||||
LowerSI_V_CNDLT(MI, *BB, I, MRI);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return BB;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SITargetLowering::AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I) const
|
|
||||||
{
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WAITCNT))
|
|
||||||
.addImm(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
|
||||||
{
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_WQM_B64), AMDGPU::EXEC)
|
|
||||||
.addReg(AMDGPU::EXEC);
|
|
||||||
|
|
||||||
MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
|
||||||
{
|
|
||||||
unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
|
|
||||||
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
|
|
||||||
MachineOperand dst = MI->getOperand(0);
|
|
||||||
MachineOperand iReg = MI->getOperand(1);
|
|
||||||
MachineOperand jReg = MI->getOperand(2);
|
|
||||||
MachineOperand attr_chan = MI->getOperand(3);
|
|
||||||
MachineOperand attr = MI->getOperand(4);
|
|
||||||
MachineOperand params = MI->getOperand(5);
|
|
||||||
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
|
|
||||||
.addOperand(params);
|
|
||||||
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
|
|
||||||
.addOperand(iReg)
|
|
||||||
.addOperand(attr_chan)
|
|
||||||
.addOperand(attr)
|
|
||||||
.addReg(M0);
|
|
||||||
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
|
|
||||||
.addOperand(dst)
|
|
||||||
.addReg(tmp)
|
|
||||||
.addOperand(jReg)
|
|
||||||
.addOperand(attr_chan)
|
|
||||||
.addOperand(attr)
|
|
||||||
.addReg(M0);
|
|
||||||
|
|
||||||
MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SITargetLowering::LowerSI_INTERP_CONST(MachineInstr *MI,
|
|
||||||
MachineBasicBlock &BB, MachineBasicBlock::iterator I,
|
|
||||||
MachineRegisterInfo &MRI) const
|
|
||||||
{
|
|
||||||
MachineOperand dst = MI->getOperand(0);
|
|
||||||
MachineOperand attr_chan = MI->getOperand(1);
|
|
||||||
MachineOperand attr = MI->getOperand(2);
|
|
||||||
MachineOperand params = MI->getOperand(3);
|
|
||||||
unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
|
|
||||||
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
|
|
||||||
.addOperand(params);
|
|
||||||
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_MOV_F32))
|
|
||||||
.addOperand(dst)
|
|
||||||
.addOperand(attr_chan)
|
|
||||||
.addOperand(attr)
|
|
||||||
.addReg(M0);
|
|
||||||
|
|
||||||
MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SITargetLowering::LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
|
||||||
{
|
|
||||||
// Clear this pixel from the exec mask if the operand is negative
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMPX_LE_F32_e32),
|
|
||||||
AMDGPU::VCC)
|
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
|
||||||
.addOperand(MI->getOperand(0));
|
|
||||||
|
|
||||||
// If the exec mask is non-zero, skip the next two instructions
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
|
||||||
.addImm(3)
|
|
||||||
.addReg(AMDGPU::EXEC);
|
|
||||||
|
|
||||||
// Exec mask is zero: Export to NULL target...
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::EXP))
|
|
||||||
.addImm(0)
|
|
||||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
|
||||||
.addImm(0)
|
|
||||||
.addImm(1)
|
|
||||||
.addImm(1)
|
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
|
||||||
.addReg(AMDGPU::SREG_LIT_0);
|
|
||||||
|
|
||||||
// ... and terminate wavefront
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_ENDPGM));
|
|
||||||
|
|
||||||
MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
void SITargetLowering::LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const
|
|
||||||
{
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CMP_GT_F32_e32),
|
|
||||||
AMDGPU::VCC)
|
|
||||||
.addReg(AMDGPU::SREG_LIT_0)
|
|
||||||
.addOperand(MI->getOperand(1));
|
|
||||||
|
|
||||||
BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_CNDMASK_B32))
|
|
||||||
.addOperand(MI->getOperand(0))
|
|
||||||
.addOperand(MI->getOperand(3))
|
|
||||||
.addOperand(MI->getOperand(2))
|
|
||||||
.addReg(AMDGPU::VCC);
|
|
||||||
|
|
||||||
MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
|
|
||||||
EVT SITargetLowering::getSetCCResultType(EVT VT) const
|
|
||||||
{
|
|
||||||
return MVT::i1;
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Custom DAG Lowering Operations
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
switch (Op.getOpcode()) {
|
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
|
||||||
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
|
|
||||||
case ISD::LOAD: return LowerLOAD(Op, DAG);
|
|
||||||
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
|
||||||
case ISD::AND: return Loweri1ContextSwitch(Op, DAG, ISD::AND);
|
|
||||||
case ISD::INTRINSIC_WO_CHAIN: {
|
|
||||||
unsigned IntrinsicID =
|
|
||||||
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
switch (IntrinsicID) {
|
|
||||||
case AMDGPUIntrinsic::SI_vs_load_buffer_index:
|
|
||||||
return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
|
|
||||||
AMDGPU::VGPR0, VT);
|
|
||||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Loweri1ContextSwitch - The function is for lowering i1 operations on the
|
|
||||||
/// VCC register. In the VALU context, VCC is a one bit register, but in the
|
|
||||||
/// SALU context the VCC is a 64-bit register (1-bit per thread). Since only
|
|
||||||
/// the SALU can perform operations on the VCC register, we need to promote
|
|
||||||
/// the operand types from i1 to i64 in order for tablegen to be able to match
|
|
||||||
/// this operation to the correct SALU instruction. We do this promotion by
|
|
||||||
/// wrapping the operands in a CopyToReg node.
|
|
||||||
///
|
|
||||||
SDValue SITargetLowering::Loweri1ContextSwitch(SDValue Op,
|
|
||||||
SelectionDAG &DAG,
|
|
||||||
unsigned VCCNode) const
|
|
||||||
{
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
|
|
||||||
SDValue OpNode = DAG.getNode(VCCNode, DL, MVT::i64,
|
|
||||||
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
|
|
||||||
Op.getOperand(0)),
|
|
||||||
DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i64,
|
|
||||||
Op.getOperand(1)));
|
|
||||||
|
|
||||||
return DAG.getNode(SIISD::VCC_BITCAST, DL, MVT::i1, OpNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
SDValue Chain = Op.getOperand(0);
|
|
||||||
SDValue CC = Op.getOperand(1);
|
|
||||||
SDValue LHS = Op.getOperand(2);
|
|
||||||
SDValue RHS = Op.getOperand(3);
|
|
||||||
SDValue JumpT = Op.getOperand(4);
|
|
||||||
SDValue CmpValue;
|
|
||||||
SDValue Result;
|
|
||||||
CmpValue = DAG.getNode(
|
|
||||||
ISD::SETCC,
|
|
||||||
Op.getDebugLoc(),
|
|
||||||
MVT::i1,
|
|
||||||
LHS, RHS,
|
|
||||||
CC);
|
|
||||||
|
|
||||||
Result = DAG.getNode(
|
|
||||||
AMDGPUISD::BRANCH_COND,
|
|
||||||
CmpValue.getDebugLoc(),
|
|
||||||
MVT::Other, Chain,
|
|
||||||
JumpT, CmpValue);
|
|
||||||
return Result;
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
|
|
||||||
|
|
||||||
assert(Ptr);
|
|
||||||
|
|
||||||
unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
|
|
||||||
|
|
||||||
// We only need to lower USER_SGPR address space loads
|
|
||||||
if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Loads from the USER_SGPR address space can only have constant value
|
|
||||||
// pointers.
|
|
||||||
ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
|
|
||||||
assert(BasePtr);
|
|
||||||
|
|
||||||
unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
|
|
||||||
const TargetRegisterClass * dstClass;
|
|
||||||
switch (TypeDwordWidth) {
|
|
||||||
default:
|
|
||||||
assert(!"USER_SGPR value size not implemented");
|
|
||||||
return SDValue();
|
|
||||||
case 1:
|
|
||||||
dstClass = &AMDGPU::SReg_32RegClass;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
dstClass = &AMDGPU::SReg_64RegClass;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
uint64_t Index = BasePtr->getZExtValue();
|
|
||||||
assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
|
|
||||||
unsigned SGPRIndex = Index / TypeDwordWidth;
|
|
||||||
unsigned Reg = dstClass->getRegister(SGPRIndex);
|
|
||||||
|
|
||||||
DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
|
|
||||||
VT));
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
|
|
||||||
{
|
|
||||||
SDValue LHS = Op.getOperand(0);
|
|
||||||
SDValue RHS = Op.getOperand(1);
|
|
||||||
SDValue True = Op.getOperand(2);
|
|
||||||
SDValue False = Op.getOperand(3);
|
|
||||||
SDValue CC = Op.getOperand(4);
|
|
||||||
EVT VT = Op.getValueType();
|
|
||||||
DebugLoc DL = Op.getDebugLoc();
|
|
||||||
|
|
||||||
SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
|
|
||||||
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// Custom DAG optimizations
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
|
||||||
DAGCombinerInfo &DCI) const {
|
|
||||||
SelectionDAG &DAG = DCI.DAG;
|
|
||||||
DebugLoc DL = N->getDebugLoc();
|
|
||||||
EVT VT = N->getValueType(0);
|
|
||||||
|
|
||||||
switch (N->getOpcode()) {
|
|
||||||
default: break;
|
|
||||||
case ISD::SELECT_CC: {
|
|
||||||
N->dump();
|
|
||||||
ConstantSDNode *True, *False;
|
|
||||||
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
|
|
||||||
if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
|
|
||||||
&& (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
|
|
||||||
&& True->isAllOnesValue()
|
|
||||||
&& False->isNullValue()
|
|
||||||
&& VT == MVT::i1) {
|
|
||||||
return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
|
|
||||||
N->getOperand(1), N->getOperand(4));
|
|
||||||
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case ISD::SETCC: {
|
|
||||||
SDValue Arg0 = N->getOperand(0);
|
|
||||||
SDValue Arg1 = N->getOperand(1);
|
|
||||||
SDValue CC = N->getOperand(2);
|
|
||||||
ConstantSDNode * C = NULL;
|
|
||||||
ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
|
|
||||||
|
|
||||||
// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
|
|
||||||
if (VT == MVT::i1
|
|
||||||
&& Arg0.getOpcode() == ISD::SIGN_EXTEND
|
|
||||||
&& Arg0.getOperand(0).getValueType() == MVT::i1
|
|
||||||
&& (C = dyn_cast<ConstantSDNode>(Arg1))
|
|
||||||
&& C->isNullValue()
|
|
||||||
&& CCOp == ISD::SETNE) {
|
|
||||||
return SimplifySetCC(VT, Arg0.getOperand(0),
|
|
||||||
DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return SDValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
#define NODE_NAME_CASE(node) case SIISD::node: return #node;
|
|
||||||
|
|
||||||
const char* SITargetLowering::getTargetNodeName(unsigned Opcode) const
|
|
||||||
{
|
|
||||||
switch (Opcode) {
|
|
||||||
default: return AMDGPUTargetLowering::getTargetNodeName(Opcode);
|
|
||||||
NODE_NAME_CASE(VCC_AND)
|
|
||||||
NODE_NAME_CASE(VCC_BITCAST)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,63 +0,0 @@
|
||||||
//===-- SIISelLowering.h - SI DAG Lowering Interface ------------*- C++ -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// SI DAG Lowering interface definition
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#ifndef SIISELLOWERING_H
|
|
||||||
#define SIISELLOWERING_H
|
|
||||||
|
|
||||||
#include "AMDGPUISelLowering.h"
|
|
||||||
#include "SIInstrInfo.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class SITargetLowering : public AMDGPUTargetLowering
|
|
||||||
{
|
|
||||||
const SIInstrInfo * TII;
|
|
||||||
|
|
||||||
/// AppendS_WAITCNT - Memory reads and writes are syncronized using the
|
|
||||||
/// S_WAITCNT instruction. This function takes the most conservative
|
|
||||||
/// approach and inserts an S_WAITCNT instruction after every read and
|
|
||||||
/// write.
|
|
||||||
void AppendS_WAITCNT(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I) const;
|
|
||||||
void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, unsigned Opocde) const;
|
|
||||||
void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
|
||||||
void LowerSI_INTERP_CONST(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo &MRI) const;
|
|
||||||
void LowerSI_KIL(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
|
||||||
void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
|
||||||
void LowerSI_V_CNDLT(MachineInstr *MI, MachineBasicBlock &BB,
|
|
||||||
MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
|
|
||||||
|
|
||||||
SDValue Loweri1ContextSwitch(SDValue Op, SelectionDAG &DAG,
|
|
||||||
unsigned VCCNode) const;
|
|
||||||
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
|
|
||||||
public:
|
|
||||||
SITargetLowering(TargetMachine &tm);
|
|
||||||
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
|
|
||||||
MachineBasicBlock * BB) const;
|
|
||||||
virtual EVT getSetCCResultType(EVT VT) const;
|
|
||||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
|
||||||
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
||||||
virtual const char* getTargetNodeName(unsigned Opcode) const;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif //SIISELLOWERING_H
|
|
||||||
|
|
@ -1,131 +0,0 @@
|
||||||
//===-- SIInstrFormats.td - SI Instruction Formats ------------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// SI Instruction format definitions.
|
|
||||||
//
|
|
||||||
// Instructions with _32 take 32-bit operands.
|
|
||||||
// Instructions with _64 take 64-bit operands.
|
|
||||||
//
|
|
||||||
// VOP_* instructions can use either a 32-bit or 64-bit encoding. The 32-bit
|
|
||||||
// encoding is the standard encoding, but instruction that make use of
|
|
||||||
// any of the instruction modifiers must use the 64-bit encoding.
|
|
||||||
//
|
|
||||||
// Instructions with _e32 use the 32-bit encoding.
|
|
||||||
// Instructions with _e64 use the 64-bit encoding.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
class VOP3_32 <bits<9> op, string opName, list<dag> pattern>
|
|
||||||
: VOP3 <op, (outs VReg_32:$dst), (ins AllReg_32:$src0, AllReg_32:$src1, AllReg_32:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
|
||||||
|
|
||||||
class VOP3_64 <bits<9> op, string opName, list<dag> pattern>
|
|
||||||
: VOP3 <op, (outs VReg_64:$dst), (ins AllReg_64:$src0, AllReg_64:$src1, AllReg_64:$src2, i32imm:$src3, i32imm:$src4, i32imm:$src5, i32imm:$src6), opName, pattern>;
|
|
||||||
|
|
||||||
|
|
||||||
class SOP1_32 <bits<8> op, string opName, list<dag> pattern>
|
|
||||||
: SOP1 <op, (outs SReg_32:$dst), (ins SReg_32:$src0), opName, pattern>;
|
|
||||||
|
|
||||||
class SOP1_64 <bits<8> op, string opName, list<dag> pattern>
|
|
||||||
: SOP1 <op, (outs SReg_64:$dst), (ins SReg_64:$src0), opName, pattern>;
|
|
||||||
|
|
||||||
class SOP2_32 <bits<7> op, string opName, list<dag> pattern>
|
|
||||||
: SOP2 <op, (outs SReg_32:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
|
||||||
|
|
||||||
class SOP2_64 <bits<7> op, string opName, list<dag> pattern>
|
|
||||||
: SOP2 <op, (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
|
||||||
|
|
||||||
class SOP2_VCC <bits<7> op, string opName, list<dag> pattern>
|
|
||||||
: SOP2 <op, (outs VCCReg:$vcc), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
|
||||||
|
|
||||||
class VOP1_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
|
||||||
string opName, list<dag> pattern> :
|
|
||||||
VOP1 <
|
|
||||||
op, (outs vrc:$dst), (ins arc:$src0), opName, pattern
|
|
||||||
>;
|
|
||||||
|
|
||||||
multiclass VOP1_32 <bits<8> op, string opName, list<dag> pattern> {
|
|
||||||
def _e32: VOP1_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
|
||||||
def _e64 : VOP3_32 <{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
|
||||||
opName, []
|
|
||||||
>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass VOP1_64 <bits<8> op, string opName, list<dag> pattern> {
|
|
||||||
|
|
||||||
def _e32 : VOP1_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
|
||||||
|
|
||||||
def _e64 : VOP3_64 <
|
|
||||||
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
|
||||||
opName, []
|
|
||||||
>;
|
|
||||||
}
|
|
||||||
|
|
||||||
class VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
|
|
||||||
string opName, list<dag> pattern> :
|
|
||||||
VOP2 <
|
|
||||||
op, (outs vrc:$dst), (ins arc:$src0, vrc:$src1), opName, pattern
|
|
||||||
>;
|
|
||||||
|
|
||||||
multiclass VOP2_32 <bits<6> op, string opName, list<dag> pattern> {
|
|
||||||
|
|
||||||
def _e32 : VOP2_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
|
||||||
|
|
||||||
def _e64 : VOP3_32 <{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
|
||||||
opName, []
|
|
||||||
>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass VOP2_64 <bits<6> op, string opName, list<dag> pattern> {
|
|
||||||
def _e32: VOP2_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
|
||||||
|
|
||||||
def _e64 : VOP3_64 <
|
|
||||||
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
|
||||||
opName, []
|
|
||||||
>;
|
|
||||||
}
|
|
||||||
|
|
||||||
class SOPK_32 <bits<5> op, string opName, list<dag> pattern>
|
|
||||||
: SOPK <op, (outs SReg_32:$dst), (ins i16imm:$src0), opName, pattern>;
|
|
||||||
|
|
||||||
class SOPK_64 <bits<5> op, string opName, list<dag> pattern>
|
|
||||||
: SOPK <op, (outs SReg_64:$dst), (ins i16imm:$src0), opName, pattern>;
|
|
||||||
|
|
||||||
class VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
|
|
||||||
string opName, list<dag> pattern> :
|
|
||||||
VOPC <
|
|
||||||
op, (ins arc:$src0, vrc:$src1), opName, pattern
|
|
||||||
>;
|
|
||||||
|
|
||||||
multiclass VOPC_32 <bits<8> op, string opName, list<dag> pattern> {
|
|
||||||
|
|
||||||
def _e32 : VOPC_Helper <op, VReg_32, AllReg_32, opName, pattern>;
|
|
||||||
|
|
||||||
def _e64 : VOP3_32 <
|
|
||||||
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
|
||||||
opName, []
|
|
||||||
>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass VOPC_64 <bits<8> op, string opName, list<dag> pattern> {
|
|
||||||
|
|
||||||
def _e32 : VOPC_Helper <op, VReg_64, AllReg_64, opName, pattern>;
|
|
||||||
|
|
||||||
def _e64 : VOP3_64 <
|
|
||||||
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
|
|
||||||
opName, []
|
|
||||||
>;
|
|
||||||
}
|
|
||||||
|
|
||||||
class SOPC_32 <bits<7> op, string opName, list<dag> pattern>
|
|
||||||
: SOPC <op, (outs SCCReg:$dst), (ins SReg_32:$src0, SReg_32:$src1), opName, pattern>;
|
|
||||||
|
|
||||||
class SOPC_64 <bits<7> op, string opName, list<dag> pattern>
|
|
||||||
: SOPC <op, (outs SCCReg:$dst), (ins SReg_64:$src0, SReg_64:$src1), opName, pattern>;
|
|
||||||
|
|
||||||
|
|
@ -1,76 +0,0 @@
|
||||||
//===-- SIInstrInfo.cpp - SI Instruction Information ---------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// SI Implementation of TargetInstrInfo.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
#include "SIInstrInfo.h"
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
#include "llvm/MC/MCInstrDesc.h"
|
|
||||||
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm)
|
|
||||||
: AMDGPUInstrInfo(tm),
|
|
||||||
RI(tm, *this),
|
|
||||||
TM(tm)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
const SIRegisterInfo &SIInstrInfo::getRegisterInfo() const
|
|
||||||
{
|
|
||||||
return RI;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
|
||||||
unsigned DestReg, unsigned SrcReg,
|
|
||||||
bool KillSrc) const
|
|
||||||
{
|
|
||||||
|
|
||||||
// If we are trying to copy to or from SCC, there is a bug somewhere else in
|
|
||||||
// the backend. While it may be theoretically possible to do this, it should
|
|
||||||
// never be necessary.
|
|
||||||
assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
|
|
||||||
|
|
||||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
|
|
||||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineInstr * SIInstrInfo::getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
|
||||||
int64_t Imm) const
|
|
||||||
{
|
|
||||||
MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::V_MOV_IMM_I32), DebugLoc());
|
|
||||||
MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
|
|
||||||
MachineInstrBuilder(MI).addImm(Imm);
|
|
||||||
|
|
||||||
return MI;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SIInstrInfo::isMov(unsigned Opcode) const
|
|
||||||
{
|
|
||||||
switch(Opcode) {
|
|
||||||
default: return false;
|
|
||||||
case AMDGPU::S_MOV_B32:
|
|
||||||
case AMDGPU::S_MOV_B64:
|
|
||||||
case AMDGPU::V_MOV_B32_e32:
|
|
||||||
case AMDGPU::V_MOV_B32_e64:
|
|
||||||
case AMDGPU::V_MOV_IMM_F32:
|
|
||||||
case AMDGPU::V_MOV_IMM_I32:
|
|
||||||
case AMDGPU::S_MOV_IMM_I32:
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,62 +0,0 @@
|
||||||
//===-- SIInstrInfo.h - SI Instruction Info Interface ---------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface definition for SIInstrInfo.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef SIINSTRINFO_H
|
|
||||||
#define SIINSTRINFO_H
|
|
||||||
|
|
||||||
#include "AMDGPUInstrInfo.h"
|
|
||||||
#include "SIRegisterInfo.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class SIInstrInfo : public AMDGPUInstrInfo {
|
|
||||||
private:
|
|
||||||
const SIRegisterInfo RI;
|
|
||||||
AMDGPUTargetMachine &TM;
|
|
||||||
|
|
||||||
public:
|
|
||||||
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
|
|
||||||
|
|
||||||
const SIRegisterInfo &getRegisterInfo() const;
|
|
||||||
|
|
||||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator MI, DebugLoc DL,
|
|
||||||
unsigned DestReg, unsigned SrcReg,
|
|
||||||
bool KillSrc) const;
|
|
||||||
|
|
||||||
/// getEncodingType - Returns the encoding type of this instruction.
|
|
||||||
unsigned getEncodingType(const MachineInstr &MI) const;
|
|
||||||
|
|
||||||
/// getEncodingBytes - Returns the size of this instructions encoding in
|
|
||||||
/// number of bytes.
|
|
||||||
unsigned getEncodingBytes(const MachineInstr &MI) const;
|
|
||||||
|
|
||||||
virtual MachineInstr * getMovImmInstr(MachineFunction *MF, unsigned DstReg,
|
|
||||||
int64_t Imm) const;
|
|
||||||
|
|
||||||
virtual unsigned getIEQOpcode() const { assert(!"Implement"); return 0;}
|
|
||||||
virtual bool isMov(unsigned Opcode) const;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
namespace SIInstrFlags {
|
|
||||||
enum Flags {
|
|
||||||
// First 4 bits are the instruction encoding
|
|
||||||
NEED_WAIT = 1 << 4
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif //SIINSTRINFO_H
|
|
||||||
|
|
@ -1,506 +0,0 @@
|
||||||
//===-- SIInstrInfo.td - SI Instruction Encodings ---------*- tablegen -*--===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// SI DAG Profiles
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
def SDTVCCBinaryOp : SDTypeProfile<1, 2, [
|
|
||||||
SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
|
|
||||||
]>;
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// SI DAG Nodes
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
// and operation on 64-bit wide vcc
|
|
||||||
def SIvcc_and : SDNode<"SIISD::VCC_AND", SDTVCCBinaryOp,
|
|
||||||
[SDNPCommutative, SDNPAssociative]
|
|
||||||
>;
|
|
||||||
|
|
||||||
// Special bitcast node for sharing VCC register between VALU and SALU
|
|
||||||
def SIvcc_bitcast : SDNode<"SIISD::VCC_BITCAST",
|
|
||||||
SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>
|
|
||||||
>;
|
|
||||||
|
|
||||||
class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
AMDGPUInst<outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
field bits<4> EncodingType = 0;
|
|
||||||
field bits<1> NeedWait = 0;
|
|
||||||
|
|
||||||
let TSFlags{3-0} = EncodingType;
|
|
||||||
let TSFlags{4} = NeedWait;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
class Enc32 <dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
InstSI <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
field bits<32> Inst;
|
|
||||||
}
|
|
||||||
|
|
||||||
class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
InstSI <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
field bits<64> Inst;
|
|
||||||
}
|
|
||||||
|
|
||||||
class SIOperand <ValueType vt, dag opInfo>: Operand <vt> {
|
|
||||||
let EncoderMethod = "encodeOperand";
|
|
||||||
let MIOperandInfo = opInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
def IMM16bit : ImmLeaf <
|
|
||||||
i16,
|
|
||||||
[{return isInt<16>(Imm);}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def IMM8bit : ImmLeaf <
|
|
||||||
i32,
|
|
||||||
[{return (int32_t)Imm >= 0 && (int32_t)Imm <= 0xff;}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def IMM12bit : ImmLeaf <
|
|
||||||
i16,
|
|
||||||
[{return (int16_t)Imm >= 0 && (int16_t)Imm <= 0xfff;}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def IMM32bitIn64bit : ImmLeaf <
|
|
||||||
i64,
|
|
||||||
[{return isInt<32>(Imm);}]
|
|
||||||
>;
|
|
||||||
|
|
||||||
class GPR4Align <RegisterClass rc> : Operand <vAny> {
|
|
||||||
let EncoderMethod = "GPR4AlignEncode";
|
|
||||||
let MIOperandInfo = (ops rc:$reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
class GPR2Align <RegisterClass rc, ValueType vt> : Operand <vt> {
|
|
||||||
let EncoderMethod = "GPR2AlignEncode";
|
|
||||||
let MIOperandInfo = (ops rc:$reg);
|
|
||||||
}
|
|
||||||
|
|
||||||
def SMRDmemrr : Operand<iPTR> {
|
|
||||||
let MIOperandInfo = (ops SReg_64, SReg_32);
|
|
||||||
let EncoderMethod = "GPR2AlignEncode";
|
|
||||||
}
|
|
||||||
|
|
||||||
def SMRDmemri : Operand<iPTR> {
|
|
||||||
let MIOperandInfo = (ops SReg_64, i32imm);
|
|
||||||
let EncoderMethod = "SMRDmemriEncode";
|
|
||||||
}
|
|
||||||
|
|
||||||
def ADDR_Reg : ComplexPattern<i64, 2, "SelectADDRReg", [], []>;
|
|
||||||
def ADDR_Offset8 : ComplexPattern<i64, 2, "SelectADDR8BitOffset", [], []>;
|
|
||||||
|
|
||||||
let Uses = [EXEC] in {
|
|
||||||
def EXP : Enc64<
|
|
||||||
(outs),
|
|
||||||
(ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
|
|
||||||
VReg_32:$src0, VReg_32:$src1, VReg_32:$src2, VReg_32:$src3),
|
|
||||||
"EXP $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
|
|
||||||
[] > {
|
|
||||||
|
|
||||||
bits<4> EN;
|
|
||||||
bits<6> TGT;
|
|
||||||
bits<1> COMPR;
|
|
||||||
bits<1> DONE;
|
|
||||||
bits<1> VM;
|
|
||||||
bits<8> VSRC0;
|
|
||||||
bits<8> VSRC1;
|
|
||||||
bits<8> VSRC2;
|
|
||||||
bits<8> VSRC3;
|
|
||||||
|
|
||||||
let Inst{3-0} = EN;
|
|
||||||
let Inst{9-4} = TGT;
|
|
||||||
let Inst{10} = COMPR;
|
|
||||||
let Inst{11} = DONE;
|
|
||||||
let Inst{12} = VM;
|
|
||||||
let Inst{31-26} = 0x3e;
|
|
||||||
let Inst{39-32} = VSRC0;
|
|
||||||
let Inst{47-40} = VSRC1;
|
|
||||||
let Inst{55-48} = VSRC2;
|
|
||||||
let Inst{63-56} = VSRC3;
|
|
||||||
let EncodingType = 0; //SIInstrEncodingType::EXP
|
|
||||||
|
|
||||||
let NeedWait = 1;
|
|
||||||
let usesCustomInserter = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc64 <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> VDATA;
|
|
||||||
bits<4> DMASK;
|
|
||||||
bits<1> UNORM;
|
|
||||||
bits<1> GLC;
|
|
||||||
bits<1> DA;
|
|
||||||
bits<1> R128;
|
|
||||||
bits<1> TFE;
|
|
||||||
bits<1> LWE;
|
|
||||||
bits<1> SLC;
|
|
||||||
bits<8> VADDR;
|
|
||||||
bits<5> SRSRC;
|
|
||||||
bits<5> SSAMP;
|
|
||||||
|
|
||||||
let Inst{11-8} = DMASK;
|
|
||||||
let Inst{12} = UNORM;
|
|
||||||
let Inst{13} = GLC;
|
|
||||||
let Inst{14} = DA;
|
|
||||||
let Inst{15} = R128;
|
|
||||||
let Inst{16} = TFE;
|
|
||||||
let Inst{17} = LWE;
|
|
||||||
let Inst{24-18} = op;
|
|
||||||
let Inst{25} = SLC;
|
|
||||||
let Inst{31-26} = 0x3c;
|
|
||||||
let Inst{39-32} = VADDR;
|
|
||||||
let Inst{47-40} = VDATA;
|
|
||||||
let Inst{52-48} = SRSRC;
|
|
||||||
let Inst{57-53} = SSAMP;
|
|
||||||
|
|
||||||
let EncodingType = 2; //SIInstrEncodingType::MIMG
|
|
||||||
|
|
||||||
let NeedWait = 1;
|
|
||||||
let usesCustomInserter = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class MTBUF <bits<3> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc64<outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> VDATA;
|
|
||||||
bits<12> OFFSET;
|
|
||||||
bits<1> OFFEN;
|
|
||||||
bits<1> IDXEN;
|
|
||||||
bits<1> GLC;
|
|
||||||
bits<1> ADDR64;
|
|
||||||
bits<4> DFMT;
|
|
||||||
bits<3> NFMT;
|
|
||||||
bits<8> VADDR;
|
|
||||||
bits<5> SRSRC;
|
|
||||||
bits<1> SLC;
|
|
||||||
bits<1> TFE;
|
|
||||||
bits<8> SOFFSET;
|
|
||||||
|
|
||||||
let Inst{11-0} = OFFSET;
|
|
||||||
let Inst{12} = OFFEN;
|
|
||||||
let Inst{13} = IDXEN;
|
|
||||||
let Inst{14} = GLC;
|
|
||||||
let Inst{15} = ADDR64;
|
|
||||||
let Inst{18-16} = op;
|
|
||||||
let Inst{22-19} = DFMT;
|
|
||||||
let Inst{25-23} = NFMT;
|
|
||||||
let Inst{31-26} = 0x3a; //encoding
|
|
||||||
let Inst{39-32} = VADDR;
|
|
||||||
let Inst{47-40} = VDATA;
|
|
||||||
let Inst{52-48} = SRSRC;
|
|
||||||
let Inst{54} = SLC;
|
|
||||||
let Inst{55} = TFE;
|
|
||||||
let Inst{63-56} = SOFFSET;
|
|
||||||
let EncodingType = 3; //SIInstrEncodingType::MTBUF
|
|
||||||
|
|
||||||
let NeedWait = 1;
|
|
||||||
let usesCustomInserter = 1;
|
|
||||||
let neverHasSideEffects = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc64<outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> VDATA;
|
|
||||||
bits<12> OFFSET;
|
|
||||||
bits<1> OFFEN;
|
|
||||||
bits<1> IDXEN;
|
|
||||||
bits<1> GLC;
|
|
||||||
bits<1> ADDR64;
|
|
||||||
bits<1> LDS;
|
|
||||||
bits<8> VADDR;
|
|
||||||
bits<5> SRSRC;
|
|
||||||
bits<1> SLC;
|
|
||||||
bits<1> TFE;
|
|
||||||
bits<8> SOFFSET;
|
|
||||||
|
|
||||||
let Inst{11-0} = OFFSET;
|
|
||||||
let Inst{12} = OFFEN;
|
|
||||||
let Inst{13} = IDXEN;
|
|
||||||
let Inst{14} = GLC;
|
|
||||||
let Inst{15} = ADDR64;
|
|
||||||
let Inst{16} = LDS;
|
|
||||||
let Inst{24-18} = op;
|
|
||||||
let Inst{31-26} = 0x38; //encoding
|
|
||||||
let Inst{39-32} = VADDR;
|
|
||||||
let Inst{47-40} = VDATA;
|
|
||||||
let Inst{52-48} = SRSRC;
|
|
||||||
let Inst{54} = SLC;
|
|
||||||
let Inst{55} = TFE;
|
|
||||||
let Inst{63-56} = SOFFSET;
|
|
||||||
let EncodingType = 4; //SIInstrEncodingType::MUBUF
|
|
||||||
|
|
||||||
let NeedWait = 1;
|
|
||||||
let usesCustomInserter = 1;
|
|
||||||
let neverHasSideEffects = 1;
|
|
||||||
}
|
|
||||||
} // End Uses = [EXEC]
|
|
||||||
|
|
||||||
class SMRD <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32<outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<7> SDST;
|
|
||||||
bits<15> PTR;
|
|
||||||
bits<8> OFFSET = PTR{7-0};
|
|
||||||
bits<1> IMM = PTR{8};
|
|
||||||
bits<6> SBASE = PTR{14-9};
|
|
||||||
|
|
||||||
let Inst{7-0} = OFFSET;
|
|
||||||
let Inst{8} = IMM;
|
|
||||||
let Inst{14-9} = SBASE;
|
|
||||||
let Inst{21-15} = SDST;
|
|
||||||
let Inst{26-22} = op;
|
|
||||||
let Inst{31-27} = 0x18; //encoding
|
|
||||||
let EncodingType = 5; //SIInstrEncodingType::SMRD
|
|
||||||
|
|
||||||
let NeedWait = 1;
|
|
||||||
let usesCustomInserter = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class SOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32<outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<7> SDST;
|
|
||||||
bits<8> SSRC0;
|
|
||||||
|
|
||||||
let Inst{7-0} = SSRC0;
|
|
||||||
let Inst{15-8} = op;
|
|
||||||
let Inst{22-16} = SDST;
|
|
||||||
let Inst{31-23} = 0x17d; //encoding;
|
|
||||||
let EncodingType = 6; //SIInstrEncodingType::SOP1
|
|
||||||
}
|
|
||||||
|
|
||||||
class SOP2 <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32 <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<7> SDST;
|
|
||||||
bits<8> SSRC0;
|
|
||||||
bits<8> SSRC1;
|
|
||||||
|
|
||||||
let Inst{7-0} = SSRC0;
|
|
||||||
let Inst{15-8} = SSRC1;
|
|
||||||
let Inst{22-16} = SDST;
|
|
||||||
let Inst{29-23} = op;
|
|
||||||
let Inst{31-30} = 0x2; // encoding
|
|
||||||
let EncodingType = 7; // SIInstrEncodingType::SOP2
|
|
||||||
}
|
|
||||||
|
|
||||||
class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32<outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> SSRC0;
|
|
||||||
bits<8> SSRC1;
|
|
||||||
|
|
||||||
let Inst{7-0} = SSRC0;
|
|
||||||
let Inst{15-8} = SSRC1;
|
|
||||||
let Inst{22-16} = op;
|
|
||||||
let Inst{31-23} = 0x17e;
|
|
||||||
let EncodingType = 8; // SIInstrEncodingType::SOPC
|
|
||||||
|
|
||||||
let DisableEncoding = "$dst";
|
|
||||||
}
|
|
||||||
|
|
||||||
class SOPK <bits<5> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32 <outs, ins , asm, pattern> {
|
|
||||||
|
|
||||||
bits <7> SDST;
|
|
||||||
bits <16> SIMM16;
|
|
||||||
|
|
||||||
let Inst{15-0} = SIMM16;
|
|
||||||
let Inst{22-16} = SDST;
|
|
||||||
let Inst{27-23} = op;
|
|
||||||
let Inst{31-28} = 0xb; //encoding
|
|
||||||
let EncodingType = 9; // SIInstrEncodingType::SOPK
|
|
||||||
}
|
|
||||||
|
|
||||||
class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern> : Enc32 <
|
|
||||||
(outs),
|
|
||||||
ins,
|
|
||||||
asm,
|
|
||||||
pattern > {
|
|
||||||
|
|
||||||
bits <16> SIMM16;
|
|
||||||
|
|
||||||
let Inst{15-0} = SIMM16;
|
|
||||||
let Inst{22-16} = op;
|
|
||||||
let Inst{31-23} = 0x17f; // encoding
|
|
||||||
let EncodingType = 10; // SIInstrEncodingType::SOPP
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
let Uses = [EXEC] in {
|
|
||||||
class VINTRP <bits <2> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32 <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> VDST;
|
|
||||||
bits<8> VSRC;
|
|
||||||
bits<2> ATTRCHAN;
|
|
||||||
bits<6> ATTR;
|
|
||||||
|
|
||||||
let Inst{7-0} = VSRC;
|
|
||||||
let Inst{9-8} = ATTRCHAN;
|
|
||||||
let Inst{15-10} = ATTR;
|
|
||||||
let Inst{17-16} = op;
|
|
||||||
let Inst{25-18} = VDST;
|
|
||||||
let Inst{31-26} = 0x32; // encoding
|
|
||||||
let EncodingType = 11; // SIInstrEncodingType::VINTRP
|
|
||||||
|
|
||||||
let neverHasSideEffects = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32 <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> VDST;
|
|
||||||
bits<9> SRC0;
|
|
||||||
|
|
||||||
let Inst{8-0} = SRC0;
|
|
||||||
let Inst{16-9} = op;
|
|
||||||
let Inst{24-17} = VDST;
|
|
||||||
let Inst{31-25} = 0x3f; //encoding
|
|
||||||
|
|
||||||
let EncodingType = 12; // SIInstrEncodingType::VOP1
|
|
||||||
let PostEncoderMethod = "VOPPostEncode";
|
|
||||||
}
|
|
||||||
|
|
||||||
class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32 <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> VDST;
|
|
||||||
bits<9> SRC0;
|
|
||||||
bits<8> VSRC1;
|
|
||||||
|
|
||||||
let Inst{8-0} = SRC0;
|
|
||||||
let Inst{16-9} = VSRC1;
|
|
||||||
let Inst{24-17} = VDST;
|
|
||||||
let Inst{30-25} = op;
|
|
||||||
let Inst{31} = 0x0; //encoding
|
|
||||||
|
|
||||||
let EncodingType = 13; // SIInstrEncodingType::VOP2
|
|
||||||
let PostEncoderMethod = "VOPPostEncode";
|
|
||||||
}
|
|
||||||
|
|
||||||
class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc64 <outs, ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<8> VDST;
|
|
||||||
bits<9> SRC0;
|
|
||||||
bits<9> SRC1;
|
|
||||||
bits<9> SRC2;
|
|
||||||
bits<3> ABS;
|
|
||||||
bits<1> CLAMP;
|
|
||||||
bits<2> OMOD;
|
|
||||||
bits<3> NEG;
|
|
||||||
|
|
||||||
let Inst{7-0} = VDST;
|
|
||||||
let Inst{10-8} = ABS;
|
|
||||||
let Inst{11} = CLAMP;
|
|
||||||
let Inst{25-17} = op;
|
|
||||||
let Inst{31-26} = 0x34; //encoding
|
|
||||||
let Inst{40-32} = SRC0;
|
|
||||||
let Inst{49-41} = SRC1;
|
|
||||||
let Inst{58-50} = SRC2;
|
|
||||||
let Inst{60-59} = OMOD;
|
|
||||||
let Inst{63-61} = NEG;
|
|
||||||
|
|
||||||
let EncodingType = 14; // SIInstrEncodingType::VOP3
|
|
||||||
let PostEncoderMethod = "VOPPostEncode";
|
|
||||||
}
|
|
||||||
|
|
||||||
class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
|
|
||||||
Enc32 <(outs VCCReg:$dst), ins, asm, pattern> {
|
|
||||||
|
|
||||||
bits<9> SRC0;
|
|
||||||
bits<8> VSRC1;
|
|
||||||
|
|
||||||
let Inst{8-0} = SRC0;
|
|
||||||
let Inst{16-9} = VSRC1;
|
|
||||||
let Inst{24-17} = op;
|
|
||||||
let Inst{31-25} = 0x3e;
|
|
||||||
|
|
||||||
let EncodingType = 15; //SIInstrEncodingType::VOPC
|
|
||||||
let PostEncoderMethod = "VOPPostEncode";
|
|
||||||
let DisableEncoding = "$dst";
|
|
||||||
}
|
|
||||||
} // End Uses = [EXEC]
|
|
||||||
|
|
||||||
class MIMG_Load_Helper <bits<7> op, string asm> : MIMG <
|
|
||||||
op,
|
|
||||||
(outs VReg_128:$vdata),
|
|
||||||
(ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128,
|
|
||||||
i1imm:$tfe, i1imm:$lwe, i1imm:$slc, VReg_128:$vaddr,
|
|
||||||
GPR4Align<SReg_256>:$srsrc, GPR4Align<SReg_128>:$ssamp),
|
|
||||||
asm,
|
|
||||||
[]
|
|
||||||
>;
|
|
||||||
|
|
||||||
class MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> : MUBUF <
|
|
||||||
op,
|
|
||||||
(outs regClass:$dst),
|
|
||||||
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
|
|
||||||
i1imm:$lds, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc, i1imm:$slc,
|
|
||||||
i1imm:$tfe, SReg_32:$soffset),
|
|
||||||
asm,
|
|
||||||
[]> {
|
|
||||||
let mayLoad = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
|
||||||
op,
|
|
||||||
(outs regClass:$dst),
|
|
||||||
(ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
|
|
||||||
i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, GPR4Align<SReg_128>:$srsrc,
|
|
||||||
i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
|
|
||||||
asm,
|
|
||||||
[]> {
|
|
||||||
let mayLoad = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
|
|
||||||
op,
|
|
||||||
(outs),
|
|
||||||
(ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
|
|
||||||
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
|
|
||||||
GPR4Align<SReg_128>:$srsrc, i1imm:$slc, i1imm:$tfe, SReg_32:$soffset),
|
|
||||||
asm,
|
|
||||||
[]> {
|
|
||||||
let mayStore = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass dstClass,
|
|
||||||
ValueType vt> {
|
|
||||||
def _IMM : SMRD <
|
|
||||||
op,
|
|
||||||
(outs dstClass:$dst),
|
|
||||||
(ins SMRDmemri:$src0),
|
|
||||||
asm,
|
|
||||||
[(set (vt dstClass:$dst), (constant_load ADDR_Offset8:$src0))]
|
|
||||||
>;
|
|
||||||
|
|
||||||
def _SGPR : SMRD <
|
|
||||||
op,
|
|
||||||
(outs dstClass:$dst),
|
|
||||||
(ins SMRDmemrr:$src0),
|
|
||||||
asm,
|
|
||||||
[(set (vt dstClass:$dst), (constant_load ADDR_Reg:$src0))]
|
|
||||||
>;
|
|
||||||
}
|
|
||||||
|
|
||||||
multiclass SMRD_32 <bits<5> op, string asm, RegisterClass dstClass> {
|
|
||||||
defm _F32 : SMRD_Helper <op, asm, dstClass, f32>;
|
|
||||||
defm _I32 : SMRD_Helper <op, asm, dstClass, i32>;
|
|
||||||
}
|
|
||||||
|
|
||||||
include "SIInstrFormats.td"
|
|
||||||
include "SIInstructions.td"
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,40 +0,0 @@
|
||||||
//===-- SIIntrinsics.td - SI Intrinsic defs ----------------*- tablegen -*-===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// SI Intrinsic Definitions
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
let TargetPrefix = "SI", isTarget = 1 in {
|
|
||||||
|
|
||||||
def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
|
||||||
def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
|
|
||||||
/* XXX: We may need a seperate intrinsic here for loading integer values */
|
|
||||||
def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
|
|
||||||
def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
|
|
||||||
def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], []> ;
|
|
||||||
def int_SI_wqm : Intrinsic <[], [], []>;
|
|
||||||
|
|
||||||
def int_SI_sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_v4f32_ty, llvm_v8i32_ty, llvm_v4i32_ty]>;
|
|
||||||
|
|
||||||
/* Interpolation Intrinsics */
|
|
||||||
|
|
||||||
def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
|
|
||||||
class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
|
|
||||||
|
|
||||||
def int_SI_fs_interp_linear_center : Interp;
|
|
||||||
def int_SI_fs_interp_linear_centroid : Interp;
|
|
||||||
def int_SI_fs_interp_persp_center : Interp;
|
|
||||||
def int_SI_fs_interp_persp_centroid : Interp;
|
|
||||||
def int_SI_fs_interp_constant : Interp;
|
|
||||||
|
|
||||||
def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
|
|
||||||
def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
|
||||||
}
|
|
||||||
|
|
@ -1,161 +0,0 @@
|
||||||
//===-- SILowerFlowControl.cpp - Use predicates for flow control ----------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This pass lowers the pseudo flow control instructions (SI_IF_NZ, ELSE, ENDIF)
|
|
||||||
// to predicated instructions.
|
|
||||||
//
|
|
||||||
// All flow control (except loops) is handled using predicated instructions and
|
|
||||||
// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
|
|
||||||
// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
|
|
||||||
// by writting to the 64-bit EXEC register (each bit corresponds to a
|
|
||||||
// single vector ALU). Typically, for predicates, a vector ALU will write
|
|
||||||
// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
|
|
||||||
// Vector ALU) and then the ScalarALU will AND the VCC register with the
|
|
||||||
// EXEC to update the predicates.
|
|
||||||
//
|
|
||||||
// For example:
|
|
||||||
// %VCC = V_CMP_GT_F32 %VGPR1, %VGPR2
|
|
||||||
// SI_IF_NZ %VCC
|
|
||||||
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0
|
|
||||||
// ELSE
|
|
||||||
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR0
|
|
||||||
// ENDIF
|
|
||||||
//
|
|
||||||
// becomes:
|
|
||||||
//
|
|
||||||
// %SGPR0 = S_MOV_B64 %EXEC // Save the current exec mask
|
|
||||||
// %EXEC = S_AND_B64 %VCC, %EXEC // Update the exec mask
|
|
||||||
// S_CBRANCH_EXECZ label0 // This instruction is an
|
|
||||||
// // optimization which allows us to
|
|
||||||
// // branch if all the bits of
|
|
||||||
// // EXEC are zero.
|
|
||||||
// %VGPR0 = V_ADD_F32 %VGPR0, %VGPR0 // Do the IF block of the branch
|
|
||||||
//
|
|
||||||
// label0:
|
|
||||||
// %EXEC = S_NOT_B64 %EXEC // Invert the exec mask for the
|
|
||||||
// // Then block.
|
|
||||||
// %EXEC = S_AND_B64 %SGPR0, %EXEC
|
|
||||||
// S_BRANCH_EXECZ label1 // Use our branch optimization
|
|
||||||
// // instruction again.
|
|
||||||
// %VGPR0 = V_SUB_F32 %VGPR0, %VGPR // Do the THEN block
|
|
||||||
// label1:
|
|
||||||
// S_MOV_B64 // Restore the old EXEC value
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "SIInstrInfo.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class SILowerFlowControlPass : public MachineFunctionPass {
|
|
||||||
|
|
||||||
private:
|
|
||||||
static char ID;
|
|
||||||
const TargetInstrInfo *TII;
|
|
||||||
std::vector<unsigned> PredicateStack;
|
|
||||||
std::vector<unsigned> UnusedRegisters;
|
|
||||||
|
|
||||||
void pushExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
|
||||||
void popExecMask(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
|
||||||
|
|
||||||
public:
|
|
||||||
SILowerFlowControlPass(TargetMachine &tm) :
|
|
||||||
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
|
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
||||||
|
|
||||||
const char *getPassName() const {
|
|
||||||
return "SI Lower flow control instructions";
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
char SILowerFlowControlPass::ID = 0;
|
|
||||||
|
|
||||||
FunctionPass *llvm::createSILowerFlowControlPass(TargetMachine &tm) {
|
|
||||||
return new SILowerFlowControlPass(tm);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SILowerFlowControlPass::runOnMachineFunction(MachineFunction &MF) {
|
|
||||||
|
|
||||||
// Find all the unused registers that can be used for the predicate stack.
|
|
||||||
for (TargetRegisterClass::iterator S = AMDGPU::SReg_64RegClass.begin(),
|
|
||||||
I = AMDGPU::SReg_64RegClass.end();
|
|
||||||
I != S; --I) {
|
|
||||||
unsigned Reg = *I;
|
|
||||||
if (!MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
|
|
||||||
UnusedRegisters.push_back(Reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
|
||||||
BB != BB_E; ++BB) {
|
|
||||||
MachineBasicBlock &MBB = *BB;
|
|
||||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
|
||||||
I != MBB.end(); I = Next, Next = llvm::next(I)) {
|
|
||||||
MachineInstr &MI = *I;
|
|
||||||
switch (MI.getOpcode()) {
|
|
||||||
default: break;
|
|
||||||
case AMDGPU::SI_IF_NZ:
|
|
||||||
pushExecMask(MBB, I);
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
|
|
||||||
AMDGPU::EXEC)
|
|
||||||
.addOperand(MI.getOperand(0)) // VCC
|
|
||||||
.addReg(AMDGPU::EXEC);
|
|
||||||
MI.eraseFromParent();
|
|
||||||
break;
|
|
||||||
case AMDGPU::ELSE:
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_NOT_B64),
|
|
||||||
AMDGPU::EXEC)
|
|
||||||
.addReg(AMDGPU::EXEC);
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_AND_B64),
|
|
||||||
AMDGPU::EXEC)
|
|
||||||
.addReg(PredicateStack.back())
|
|
||||||
.addReg(AMDGPU::EXEC);
|
|
||||||
MI.eraseFromParent();
|
|
||||||
break;
|
|
||||||
case AMDGPU::ENDIF:
|
|
||||||
popExecMask(MBB, I);
|
|
||||||
MI.eraseFromParent();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SILowerFlowControlPass::pushExecMask(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator I) {
|
|
||||||
|
|
||||||
assert(!UnusedRegisters.empty() && "Ran out of registers for predicate stack");
|
|
||||||
unsigned StackReg = UnusedRegisters.back();
|
|
||||||
UnusedRegisters.pop_back();
|
|
||||||
PredicateStack.push_back(StackReg);
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
|
|
||||||
StackReg)
|
|
||||||
.addReg(AMDGPU::EXEC);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SILowerFlowControlPass::popExecMask(MachineBasicBlock &MBB,
|
|
||||||
MachineBasicBlock::iterator I) {
|
|
||||||
unsigned StackReg = PredicateStack.back();
|
|
||||||
PredicateStack.pop_back();
|
|
||||||
UnusedRegisters.push_back(StackReg);
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B64),
|
|
||||||
AMDGPU::EXEC)
|
|
||||||
.addReg(StackReg);
|
|
||||||
}
|
|
||||||
|
|
@ -1,105 +0,0 @@
|
||||||
//===-- SILowerLiteralConstants.cpp - Lower intrs using literal constants--===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
// This pass performs the following transformation on instructions with
|
|
||||||
// literal constants:
|
|
||||||
//
|
|
||||||
// %VGPR0 = V_MOV_IMM_I32 1
|
|
||||||
//
|
|
||||||
// becomes:
|
|
||||||
//
|
|
||||||
// BUNDLE
|
|
||||||
// * %VGPR = V_MOV_B32_32 SI_LITERAL_CONSTANT
|
|
||||||
// * SI_LOAD_LITERAL 1
|
|
||||||
//
|
|
||||||
// The resulting sequence matches exactly how the hardware handles immediate
|
|
||||||
// operands, so this transformation greatly simplifies the code generator.
|
|
||||||
//
|
|
||||||
// Only the *_MOV_IMM_* support immediate operands at the moment, but when
|
|
||||||
// support for immediate operands is added to other instructions, they
|
|
||||||
// will be lowered here as well.
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
||||||
#include "llvm/CodeGen/MachineInstrBundle.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
class SILowerLiteralConstantsPass : public MachineFunctionPass {
|
|
||||||
|
|
||||||
private:
|
|
||||||
static char ID;
|
|
||||||
const TargetInstrInfo *TII;
|
|
||||||
|
|
||||||
public:
|
|
||||||
SILowerLiteralConstantsPass(TargetMachine &tm) :
|
|
||||||
MachineFunctionPass(ID), TII(tm.getInstrInfo()) { }
|
|
||||||
|
|
||||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
|
||||||
|
|
||||||
const char *getPassName() const {
|
|
||||||
return "SI Lower literal constants pass";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End anonymous namespace
|
|
||||||
|
|
||||||
char SILowerLiteralConstantsPass::ID = 0;
|
|
||||||
|
|
||||||
FunctionPass *llvm::createSILowerLiteralConstantsPass(TargetMachine &tm) {
|
|
||||||
return new SILowerLiteralConstantsPass(tm);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool SILowerLiteralConstantsPass::runOnMachineFunction(MachineFunction &MF) {
|
|
||||||
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
|
||||||
BB != BB_E; ++BB) {
|
|
||||||
MachineBasicBlock &MBB = *BB;
|
|
||||||
for (MachineBasicBlock::iterator I = MBB.begin(), Next = llvm::next(I);
|
|
||||||
I != MBB.end(); I = Next, Next = llvm::next(I)) {
|
|
||||||
MachineInstr &MI = *I;
|
|
||||||
switch (MI.getOpcode()) {
|
|
||||||
default: break;
|
|
||||||
case AMDGPU::S_MOV_IMM_I32:
|
|
||||||
case AMDGPU::S_MOV_IMM_I64:
|
|
||||||
case AMDGPU::V_MOV_IMM_F32:
|
|
||||||
case AMDGPU::V_MOV_IMM_I32: {
|
|
||||||
unsigned MovOpcode;
|
|
||||||
unsigned LoadLiteralOpcode;
|
|
||||||
MachineOperand LiteralOp = MI.getOperand(1);
|
|
||||||
if (AMDGPU::VReg_32RegClass.contains(MI.getOperand(0).getReg())) {
|
|
||||||
MovOpcode = AMDGPU::V_MOV_B32_e32;
|
|
||||||
} else {
|
|
||||||
MovOpcode = AMDGPU::S_MOV_B32;
|
|
||||||
}
|
|
||||||
if (LiteralOp.isImm()) {
|
|
||||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_I32;
|
|
||||||
} else {
|
|
||||||
LoadLiteralOpcode = AMDGPU::SI_LOAD_LITERAL_F32;
|
|
||||||
}
|
|
||||||
MachineInstr *First =
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(MovOpcode),
|
|
||||||
MI.getOperand(0).getReg())
|
|
||||||
.addReg(AMDGPU::SI_LITERAL_CONSTANT);
|
|
||||||
MachineInstr *Last =
|
|
||||||
BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(LoadLiteralOpcode))
|
|
||||||
.addOperand(MI.getOperand(1));
|
|
||||||
Last->setIsInsideBundle();
|
|
||||||
llvm::finalizeBundle(MBB, First, Last);
|
|
||||||
MI.eraseFromParent();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
@ -1,19 +0,0 @@
|
||||||
//===-- SIMachineFunctionInfo.cpp - SI Machine Function Info -------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
#include "SIMachineFunctionInfo.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
|
|
||||||
: MachineFunctionInfo(),
|
|
||||||
SPIPSInputAddr(0),
|
|
||||||
ShaderType(0)
|
|
||||||
{ }
|
|
||||||
|
|
@ -1,38 +0,0 @@
|
||||||
//===- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface -*- C++ -*-==//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// SIMachineFunctionInfo is used to keep track of the spi_sp_input_addr config
|
|
||||||
// register, which is to tell the hardware which interpolation parameters to
|
|
||||||
// load.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef _SIMACHINEFUNCTIONINFO_H_
|
|
||||||
#define _SIMACHINEFUNCTIONINFO_H_
|
|
||||||
|
|
||||||
#include "llvm/CodeGen/MachineFunction.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class SIMachineFunctionInfo : public MachineFunctionInfo {
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
public:
|
|
||||||
SIMachineFunctionInfo(const MachineFunction &MF);
|
|
||||||
unsigned SPIPSInputAddr;
|
|
||||||
unsigned ShaderType;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
|
|
||||||
#endif //_SIMACHINEFUNCTIONINFO_H_
|
|
||||||
|
|
@ -1,60 +0,0 @@
|
||||||
//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// This file contains the SI implementation of the TargetRegisterInfo class.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
#include "SIRegisterInfo.h"
|
|
||||||
#include "AMDGPUTargetMachine.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm,
|
|
||||||
const TargetInstrInfo &tii)
|
|
||||||
: AMDGPURegisterInfo(tm, tii),
|
|
||||||
TM(tm),
|
|
||||||
TII(tii)
|
|
||||||
{ }
|
|
||||||
|
|
||||||
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const
|
|
||||||
{
|
|
||||||
BitVector Reserved(getNumRegs());
|
|
||||||
return Reserved;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned SIRegisterInfo::getBinaryCode(unsigned reg) const
|
|
||||||
{
|
|
||||||
switch (reg) {
|
|
||||||
case AMDGPU::M0: return 124;
|
|
||||||
case AMDGPU::SREG_LIT_0: return 128;
|
|
||||||
default: return getHWRegNum(reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const TargetRegisterClass *
|
|
||||||
SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const
|
|
||||||
{
|
|
||||||
switch (rc->getID()) {
|
|
||||||
case AMDGPU::GPRF32RegClassID:
|
|
||||||
return &AMDGPU::VReg_32RegClass;
|
|
||||||
default: return rc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
|
|
||||||
MVT VT) const
|
|
||||||
{
|
|
||||||
switch(VT.SimpleTy) {
|
|
||||||
default:
|
|
||||||
case MVT::i32: return &AMDGPU::VReg_32RegClass;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#include "SIRegisterGetHWRegNum.inc"
|
|
||||||
|
|
@ -1,54 +0,0 @@
|
||||||
//===-- SIRegisterInfo.h - SI Register Info Interface ----------*- C++ -*--===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// Interface definition for SIRegisterInfo
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef SIREGISTERINFO_H_
|
|
||||||
#define SIREGISTERINFO_H_
|
|
||||||
|
|
||||||
#include "AMDGPURegisterInfo.h"
|
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
|
|
||||||
class AMDGPUTargetMachine;
|
|
||||||
class TargetInstrInfo;
|
|
||||||
|
|
||||||
struct SIRegisterInfo : public AMDGPURegisterInfo
|
|
||||||
{
|
|
||||||
AMDGPUTargetMachine &TM;
|
|
||||||
const TargetInstrInfo &TII;
|
|
||||||
|
|
||||||
SIRegisterInfo(AMDGPUTargetMachine &tm, const TargetInstrInfo &tii);
|
|
||||||
|
|
||||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
|
||||||
|
|
||||||
/// getBinaryCode - Returns the hardware encoding for a register
|
|
||||||
virtual unsigned getBinaryCode(unsigned reg) const;
|
|
||||||
|
|
||||||
/// getISARegClass - rc is an AMDIL reg class. This function returns the
|
|
||||||
/// SI register class that is equivalent to the given AMDIL register class.
|
|
||||||
virtual const TargetRegisterClass *
|
|
||||||
getISARegClass(const TargetRegisterClass * rc) const;
|
|
||||||
|
|
||||||
/// getHWRegNum - Generated function that returns the hardware encoding for
|
|
||||||
/// a register
|
|
||||||
unsigned getHWRegNum(unsigned reg) const;
|
|
||||||
|
|
||||||
/// getCFGStructurizerRegClass - get the register class of the specified
|
|
||||||
/// type to use in the CFGStructurizer
|
|
||||||
virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
} // End namespace llvm
|
|
||||||
|
|
||||||
#endif // SIREGISTERINFO_H_
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
//===-- SISchedule.td - SI Scheduling definitons -------------------------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// TODO: This is just a place holder for now.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
|
|
||||||
def SI_Itin : ProcessorItineraries <[], [], []>;
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
||||||
//===-- TargetInfo/AMDGPUTargetInfo.cpp - TODO: Add brief description -------===//
|
|
||||||
//
|
|
||||||
// The LLVM Compiler Infrastructure
|
|
||||||
//
|
|
||||||
// This file is distributed under the University of Illinois Open Source
|
|
||||||
// License. See LICENSE.TXT for details.
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
//
|
|
||||||
// TODO: Add full description
|
|
||||||
//
|
|
||||||
//===----------------------------------------------------------------------===//
|
|
||||||
|
|
||||||
#include "AMDGPU.h"
|
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
/// The target for the AMDGPU backend
|
|
||||||
Target llvm::TheAMDGPUTarget;
|
|
||||||
|
|
||||||
/// Extern function to initialize the targets for the AMDGPU backend
|
|
||||||
extern "C" void LLVMInitializeAMDGPUTargetInfo() {
|
|
||||||
RegisterTarget<Triple::r600, false>
|
|
||||||
R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
|
|
||||||
}
|
|
||||||
|
|
@ -1,35 +0,0 @@
|
||||||
|
|
||||||
#include "radeon_llvm_emit.h"
|
|
||||||
|
|
||||||
#include <llvm/Support/CommandLine.h>
|
|
||||||
#include <llvm/Support/IRReader.h>
|
|
||||||
#include <llvm/Support/SourceMgr.h>
|
|
||||||
#include <llvm/LLVMContext.h>
|
|
||||||
#include <llvm/Module.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#include <llvm-c/Core.h>
|
|
||||||
|
|
||||||
using namespace llvm;
|
|
||||||
|
|
||||||
static cl::opt<std::string>
|
|
||||||
InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
|
|
||||||
|
|
||||||
static cl::opt<std::string>
|
|
||||||
TargetGPUName("gpu", cl::desc("target gpu name"), cl::value_desc("gpu_name"));
|
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
|
||||||
{
|
|
||||||
unsigned char * bytes;
|
|
||||||
unsigned byte_count;
|
|
||||||
|
|
||||||
std::auto_ptr<Module> M;
|
|
||||||
LLVMContext &Context = getGlobalContext();
|
|
||||||
SMDiagnostic Err;
|
|
||||||
cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
|
|
||||||
M.reset(ParseIRFile(InputFilename, Err, Context));
|
|
||||||
|
|
||||||
Module * mod = M.get();
|
|
||||||
|
|
||||||
radeon_llvm_compile(wrap(mod), &bytes, &byte_count, TargetGPUName.c_str(), 1);
|
|
||||||
}
|
|
||||||
|
|
@ -39,12 +39,7 @@
|
||||||
#include <llvm/Target/TargetMachine.h>
|
#include <llvm/Target/TargetMachine.h>
|
||||||
#include <llvm/Transforms/Scalar.h>
|
#include <llvm/Transforms/Scalar.h>
|
||||||
#include <llvm-c/Target.h>
|
#include <llvm-c/Target.h>
|
||||||
|
|
||||||
#if HAVE_LLVM < 0x0302
|
|
||||||
#include <llvm/Target/TargetData.h>
|
|
||||||
#else
|
|
||||||
#include <llvm/DataLayout.h>
|
#include <llvm/DataLayout.h>
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
@ -52,16 +47,6 @@
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
#ifndef EXTERNAL_LLVM
|
|
||||||
extern "C" {
|
|
||||||
|
|
||||||
void LLVMInitializeAMDGPUAsmPrinter(void);
|
|
||||||
void LLVMInitializeAMDGPUTargetMC(void);
|
|
||||||
void LLVMInitializeAMDGPUTarget(void);
|
|
||||||
void LLVMInitializeAMDGPUTargetInfo(void);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class LLVMEnsureMultithreaded {
|
class LLVMEnsureMultithreaded {
|
||||||
|
|
@ -89,17 +74,10 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
|
||||||
|
|
||||||
Triple AMDGPUTriple(sys::getDefaultTargetTriple());
|
Triple AMDGPUTriple(sys::getDefaultTargetTriple());
|
||||||
|
|
||||||
#if HAVE_LLVM == 0x0302
|
|
||||||
LLVMInitializeAMDGPUTargetInfo();
|
|
||||||
LLVMInitializeAMDGPUTarget();
|
|
||||||
LLVMInitializeAMDGPUTargetMC();
|
|
||||||
LLVMInitializeAMDGPUAsmPrinter();
|
|
||||||
#else
|
|
||||||
LLVMInitializeR600TargetInfo();
|
LLVMInitializeR600TargetInfo();
|
||||||
LLVMInitializeR600Target();
|
LLVMInitializeR600Target();
|
||||||
LLVMInitializeR600TargetMC();
|
LLVMInitializeR600TargetMC();
|
||||||
LLVMInitializeR600AsmPrinter();
|
LLVMInitializeR600AsmPrinter();
|
||||||
#endif
|
|
||||||
|
|
||||||
std::string err;
|
std::string err;
|
||||||
const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err);
|
const Target * AMDGPUTarget = TargetRegistry::lookupTarget("r600", err);
|
||||||
|
|
@ -130,11 +108,7 @@ radeon_llvm_compile(LLVMModuleRef M, unsigned char ** bytes,
|
||||||
));
|
));
|
||||||
TargetMachine &AMDGPUTargetMachine = *tm.get();
|
TargetMachine &AMDGPUTargetMachine = *tm.get();
|
||||||
PassManager PM;
|
PassManager PM;
|
||||||
#if HAVE_LLVM < 0x0302
|
|
||||||
PM.add(new TargetData(*AMDGPUTargetMachine.getTargetData()));
|
|
||||||
#else
|
|
||||||
PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout()));
|
PM.add(new DataLayout(*AMDGPUTargetMachine.getDataLayout()));
|
||||||
#endif
|
|
||||||
PM.add(createPromoteMemoryToRegisterPass());
|
PM.add(createPromoteMemoryToRegisterPass());
|
||||||
AMDGPUTargetMachine.setAsmVerbosityDefault(true);
|
AMDGPUTargetMachine.setAsmVerbosityDefault(true);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue