radeon/llvm: Remove GlobalManager and KernelManager

This commit is contained in:
Tom Stellard 2012-04-24 21:39:11 -04:00
parent 8d3bf7ced5
commit 04993c9630
11 changed files with 23 additions and 3275 deletions

View file

@ -13,8 +13,6 @@
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDILGlobalManager.h"
#include "AMDILKernelManager.h"
#include "AMDILTargetMachine.h"
#include "R600ISelLowering.h"
#include "R600InstrInfo.h"
@ -44,16 +42,9 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
:
AMDILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
Subtarget(TT, CPU, FS),
mGM(new AMDILGlobalManager(0 /* Debug mode */)),
mKM(new AMDILKernelManager(this, mGM)),
mDump(false)
{
/* XXX: Add these two initializations to fix a segfault, not sure if this
* is correct. These are normally initialized in the AsmPrinter, but AMDGPU
* does not use the asm printer */
Subtarget.setGlobalManager(mGM);
Subtarget.setKernelManager(mKM);
/* TLInfo uses InstrInfo so it must be initialized after. */
if (Subtarget.device()->getGeneration() <= AMDILDeviceInfo::HD6XXX) {
InstrInfo = new R600InstrInfo(*this);
@ -66,8 +57,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
AMDGPUTargetMachine::~AMDGPUTargetMachine()
{
delete mGM;
delete mKM;
}
bool AMDGPUTargetMachine::addPassesToEmitFile(PassManagerBase &PM,

File diff suppressed because it is too large Load diff

View file

@ -1,256 +0,0 @@
//===-- AMDILGlobalManager.h - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
// ==-----------------------------------------------------------------------===//
//
// Class that handles parsing and storing global variables that are relevant to
// the compilation of the module.
//
// ==-----------------------------------------------------------------------===//
#ifndef _AMDILGLOBALMANAGER_H_
#define _AMDILGLOBALMANAGER_H_
#include "AMDIL.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Module.h"
#include "llvm/Support/raw_ostream.h"
#include <set>
#include <string>
#define CB_BASE_OFFSET 2
namespace llvm {
class PointerType;
class AMDILKernelManager;
class AMDILSubtarget;
class TypeSymbolTable;
class Argument;
class GlobalValue;
class MachineFunction;
/// structure that holds information for a single local/region address array
typedef struct _arrayMemRec {
uint32_t vecSize; // size of each vector
uint32_t offset; // offset into the memory section
bool isHW; // flag to specify if HW is used or SW is used
bool isRegion; // flag to specify if GDS is used or not
} arraymem;
/// Structure that holds information for all local/region address
/// arrays in the kernel
typedef struct _localArgRec {
llvm::SmallVector<arraymem *, DEFAULT_VEC_SLOTS> local;
std::string name; // Kernel Name
} localArg;
/// structure that holds information about a constant address
/// space pointer that is a kernel argument
typedef struct _constPtrRec {
const Value *base;
uint32_t size;
uint32_t offset;
uint32_t cbNum; // value of 0 means that it does not use hw CB
bool isArray;
bool isArgument;
bool usesHardware;
std::string name;
} constPtr;
/// Structure that holds information for each kernel argument
typedef struct _kernelArgRec {
uint32_t reqGroupSize[3];
uint32_t reqRegionSize[3];
llvm::SmallVector<uint32_t, DEFAULT_VEC_SLOTS> argInfo;
bool mHasRWG;
bool mHasRWR;
} kernelArg;
/// Structure that holds information for each kernel
typedef struct _kernelRec {
mutable uint32_t curSize;
mutable uint32_t curRSize;
mutable uint32_t curHWSize;
mutable uint32_t curHWRSize;
uint32_t constSize;
kernelArg *sgv;
localArg *lvgv;
llvm::SmallVector<struct _constPtrRec, DEFAULT_VEC_SLOTS> constPtr;
uint32_t constSizes[HW_MAX_NUM_CB];
llvm::SmallSet<uint32_t, OPENCL_MAX_READ_IMAGES> readOnly;
llvm::SmallSet<uint32_t, OPENCL_MAX_WRITE_IMAGES> writeOnly;
llvm::SmallVector<std::pair<uint32_t, const Constant *>,
DEFAULT_VEC_SLOTS> CPOffsets;
} kernel;
class AMDILGlobalManager {
public:
AMDILGlobalManager(bool debugMode = false);
~AMDILGlobalManager();
/// Process the given module and parse out the global variable metadata passed
/// down from the frontend-compiler
void processModule(const Module &MF, const AMDILTargetMachine* mTM);
/// Returns whether the current name is the name of a kernel function or a
/// normal function
bool isKernel(const llvm::StringRef &name) const;
/// Returns true if the image ID corresponds to a read only image.
bool isReadOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
/// Returns true if the image ID corresponds to a write only image.
bool isWriteOnlyImage(const llvm::StringRef &name, uint32_t iID) const;
/// Returns the number of write only images for the kernel.
uint32_t getNumWriteImages(const llvm::StringRef &name) const;
/// Gets the group size of the kernel for the given dimension.
uint32_t getLocal(const llvm::StringRef &name, uint32_t dim) const;
/// Gets the region size of the kernel for the given dimension.
uint32_t getRegion(const llvm::StringRef &name, uint32_t dim) const;
/// Get the Region memory size in 1d for the given function/kernel.
uint32_t getRegionSize(const llvm::StringRef &name) const;
/// Get the region memory size in 1d for the given function/kernel.
uint32_t getLocalSize(const llvm::StringRef &name) const;
// Get the max group size in one 1D for the given function/kernel.
uint32_t getMaxGroupSize(const llvm::StringRef &name) const;
// Get the max region size in one 1D for the given function/kernel.
uint32_t getMaxRegionSize(const llvm::StringRef &name) const;
/// Get the constant memory size in 1d for the given function/kernel.
uint32_t getConstSize(const llvm::StringRef &name) const;
/// Get the HW local size in 1d for the given function/kernel We need to
/// seperate SW local and HW local for the case where some local memory is
/// emulated in global and some is using the hardware features. The main
/// problem is that in OpenCL 1.0/1.1 cl_khr_byte_addressable_store allows
/// these actions to happen on all memory spaces, but the hardware can only
/// write byte address stores to UAV and LDS, not GDS or Stack.
uint32_t getHWLocalSize(const llvm::StringRef &name) const;
uint32_t getHWRegionSize(const llvm::StringRef &name) const;
/// Get the offset of the array for the kernel.
int32_t getArrayOffset(const llvm::StringRef &name) const;
/// Get the offset of the const memory for the kernel.
int32_t getConstOffset(const llvm::StringRef &name) const;
/// Get the boolean value if this particular constant uses HW or not.
bool getConstHWBit(const llvm::StringRef &name) const;
/// Get a reference to the kernel metadata information for the given function
/// name.
const kernel &getKernel(const llvm::StringRef &name) const;
/// Returns whether a reqd_workgroup_size attribute has been used or not.
bool hasRWG(const llvm::StringRef &name) const;
/// Returns whether a reqd_workregion_size attribute has been used or not.
bool hasRWR(const llvm::StringRef &name) const;
/// Dump the data section to the output stream for the given kernel.
void dumpDataSection(llvm::raw_ostream &O, AMDILKernelManager *km);
/// Iterate through the constants that are global to the compilation unit.
StringMap<constPtr>::iterator consts_begin();
StringMap<constPtr>::iterator consts_end();
/// Query if the kernel has a byte store.
bool byteStoreExists(llvm::StringRef S) const;
/// Query if the kernel and argument uses hardware constant memory.
bool usesHWConstant(const kernel &krnl, const llvm::StringRef &arg);
/// Query if the constant pointer is an argument.
bool isConstPtrArgument(const kernel &krnl, const llvm::StringRef &arg);
/// Query if the constant pointer is an array that is globally scoped.
bool isConstPtrArray(const kernel &krnl, const llvm::StringRef &arg);
/// Query the size of the constant pointer.
uint32_t getConstPtrSize(const kernel &krnl, const llvm::StringRef &arg);
/// Query the offset of the constant pointer.
uint32_t getConstPtrOff(const kernel &krnl, const llvm::StringRef &arg);
/// Query the constant buffer number for a constant pointer.
uint32_t getConstPtrCB(const kernel &krnl, const llvm::StringRef &arg);
/// Query the Value* that the constant pointer originates from.
const Value *getConstPtrValue(const kernel &krnl, const llvm::StringRef &arg);
/// Get the ID of the argument.
int32_t getArgID(const Argument *arg);
/// Get the unique function ID for the specific function name and create a new
/// unique ID if it is not found.
uint32_t getOrCreateFunctionID(const GlobalValue* func);
uint32_t getOrCreateFunctionID(const std::string& func);
/// Calculate the offsets of the constant pool for the given kernel and
/// machine function.
void calculateCPOffsets(const MachineFunction *MF, kernel &krnl);
/// Print the global manager to the output stream.
void print(llvm::raw_ostream& O);
/// Dump the global manager to the output stream - debug use.
void dump();
private:
/// Various functions that parse global value information and store them in
/// the global manager. This approach is used instead of dynamic parsing as it
/// might require more space, but should allow caching of data that gets
/// requested multiple times.
kernelArg parseSGV(const GlobalValue *GV);
localArg parseLVGV(const GlobalValue *GV);
void parseGlobalAnnotate(const GlobalValue *G);
void parseImageAnnotate(const GlobalValue *G);
void parseConstantPtrAnnotate(const GlobalValue *G);
void printConstantValue(const Constant *CAval,
llvm::raw_ostream& O,
bool asByte);
void parseKernelInformation(const Value *V);
void parseAutoArray(const GlobalValue *G, bool isRegion);
void parseConstantPtr(const GlobalValue *G);
void allocateGlobalCB();
void dumpDataToCB(llvm::raw_ostream &O, AMDILKernelManager *km, uint32_t id);
bool checkConstPtrsUseHW(Module::const_iterator *F);
llvm::StringMap<arraymem> mArrayMems;
llvm::StringMap<localArg> mLocalArgs;
llvm::StringMap<kernelArg> mKernelArgs;
llvm::StringMap<kernel> mKernels;
llvm::StringMap<constPtr> mConstMems;
llvm::StringMap<uint32_t> mFuncNames;
llvm::DenseMap<const GlobalValue*, uint32_t> mFuncPtrNames;
llvm::DenseMap<uint32_t, llvm::StringRef> mImageNameMap;
std::set<llvm::StringRef> mByteStore;
std::set<llvm::StringRef> mIgnoreStr;
llvm::DenseMap<const Argument *, int32_t> mArgIDMap;
const char *symTab;
const AMDILSubtarget *mSTM;
size_t mOffset;
uint32_t mReservedBuffs;
uint32_t mCurrentCPOffset;
bool mDebugMode;
};
} // namespace llvm
#endif // __AMDILGLOBALMANAGER_H_

View file

@ -14,9 +14,7 @@
#include "AMDILISelLowering.h"
#include "AMDILDevices.h"
#include "AMDILGlobalManager.h"
#include "AMDILIntrinsicInfo.h"
#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILSubtarget.h"
#include "AMDILTargetMachine.h"
@ -31,6 +29,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@ -1905,11 +1904,6 @@ AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
isRet = false;
IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
};
const AMDILSubtarget *stm = &this->getTargetMachine()
.getSubtarget<AMDILSubtarget>();
AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
stm->getKernelManager());
KM->setOutputInst();
Info.opc = IntNo;
Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
@ -2134,58 +2128,33 @@ AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
SDValue DST = Op;
const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *G = GADN->getGlobal();
const AMDILSubtarget *stm = &this->getTargetMachine()
.getSubtarget<AMDILSubtarget>();
const AMDILGlobalManager *GM = stm->getGlobalManager();
DebugLoc DL = Op.getDebugLoc();
int64_t base_offset = GADN->getOffset();
int32_t arrayoffset = GM->getArrayOffset(G->getName());
int32_t constoffset = GM->getConstOffset(G->getName());
if (arrayoffset != -1) {
DST = DAG.getConstant(arrayoffset, MVT::i32);
DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
DST, DAG.getConstant(base_offset, MVT::i32));
} else if (constoffset != -1) {
if (GM->getConstHWBit(G->getName())) {
DST = DAG.getConstant(constoffset, MVT::i32);
DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
DST, DAG.getConstant(base_offset, MVT::i32));
} else {
SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32);
SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32);
DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg,
DAG.getConstant(base_offset, MVT::i32));
DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg);
}
const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
if (!GV) {
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
} else {
const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
if (!GV) {
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
} else {
if (GV->hasInitializer()) {
const Constant *C = dyn_cast<Constant>(GV->getInitializer());
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
DST = DAG.getConstant(CI->getValue(), Op.getValueType());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
DST = DAG.getConstantFP(CF->getValueAPF(),
Op.getValueType());
} else if (dyn_cast<ConstantAggregateZero>(C)) {
EVT VT = Op.getValueType();
if (VT.isInteger()) {
DST = DAG.getConstant(0, VT);
} else {
DST = DAG.getConstantFP(0, VT);
}
if (GV->hasInitializer()) {
const Constant *C = dyn_cast<Constant>(GV->getInitializer());
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
DST = DAG.getConstant(CI->getValue(), Op.getValueType());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
DST = DAG.getConstantFP(CF->getValueAPF(),
Op.getValueType());
} else if (dyn_cast<ConstantAggregateZero>(C)) {
EVT VT = Op.getValueType();
if (VT.isInteger()) {
DST = DAG.getConstant(0, VT);
} else {
assert(!"lowering this type of Global Address "
"not implemented yet!");
C->dump();
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
DST = DAG.getConstantFP(0, VT);
}
} else {
assert(!"lowering this type of Global Address "
"not implemented yet!");
C->dump();
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
}
} else {
DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
}
}
return DST;

File diff suppressed because it is too large Load diff

View file

@ -1,177 +0,0 @@
//===-- AMDILKernelManager.h - TODO: Add brief description -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//==-----------------------------------------------------------------------===//
//
// Class that handles the metadata/abi management for the
// ASM printer. Handles the parsing and generation of the metadata
// for each kernel and keeps track of its arguments.
//
//==-----------------------------------------------------------------------===//
#ifndef _AMDILKERNELMANAGER_H_
#define _AMDILKERNELMANAGER_H_
#include "AMDIL.h"
#include "AMDILDevice.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/Function.h"
#include <map>
#include <set>
#include <string>
#define IMAGETYPE_2D 0
#define IMAGETYPE_3D 1
#define RESERVED_LIT_COUNT 6
namespace llvm {
class AMDILGlobalManager;
class AMDILSubtarget;
class AMDILMachineFunctionInfo;
class AMDILTargetMachine;
class AMDILAsmPrinter;
class StructType;
class Value;
class TypeSymbolTable;
class MachineFunction;
class MachineInstr;
class ConstantFP;
class PrintfInfo;
class AMDILKernelManager {
public:
typedef enum {
RELEASE_ONLY,
DEBUG_ONLY,
ALWAYS
} ErrorMsgEnum;
AMDILKernelManager(AMDILTargetMachine *TM, AMDILGlobalManager *GM);
virtual ~AMDILKernelManager();
/// Clear the state of the KernelManager putting it in its most initial state.
void clear();
void setMF(MachineFunction *MF);
/// Process the specific kernel parsing out the parameter information for the
/// kernel.
void processArgMetadata(llvm::raw_ostream &O,
uint32_t buf, bool kernel);
/// Prints the header for the kernel which includes the groupsize declaration
/// and calculation of the local/group/global id's.
void printHeader(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O,
const std::string &name);
virtual void printDecls(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O);
virtual void printGroupSize(llvm::raw_ostream &O);
/// Copies the data from the runtime setup constant buffers into registers so
/// that the program can correctly access memory or data that was set by the
/// host program.
void printArgCopies(llvm::raw_ostream &O, AMDILAsmPrinter* RegNames);
/// Prints out the end of the function.
void printFooter(llvm::raw_ostream &O);
/// Prints out the metadata for the specific function depending if it is a
/// kernel or not.
void printMetaData(llvm::raw_ostream &O, uint32_t id, bool isKernel = false);
/// Set bool value on whether to consider the function a kernel or a normal
/// function.
void setKernel(bool kernel);
/// Set the unique ID of the kernel/function.
void setID(uint32_t id);
/// Set the name of the kernel/function.
void setName(const std::string &name);
/// Flag to specify whether the function is a kernel or not.
bool isKernel();
/// Flag that specifies whether this function has a kernel wrapper.
bool wasKernel();
void getIntrinsicSetup(AMDILAsmPrinter *AsmPrinter, llvm::raw_ostream &O);
// Returns whether a compiler needs to insert a write to memory or not.
bool useCompilerWrite(const MachineInstr *MI);
// Set the flag that there exists an image write.
void setImageWrite();
void setOutputInst();
const char *getTypeName(const Type *name, const char * symTab);
void emitLiterals(llvm::raw_ostream &O);
// Set the uav id for the specific pointer value. If value is NULL, then the
// ID sets the default ID.
void setUAVID(const Value *value, uint32_t ID);
// Get the UAV id for the specific pointer value.
uint32_t getUAVID(const Value *value);
private:
/// Helper function that prints the actual metadata and should only be called
/// by printMetaData.
void printKernelArgs(llvm::raw_ostream &O);
void printCopyStructPrivate(const StructType *ST,
llvm::raw_ostream &O,
size_t stackSize,
uint32_t Buffer,
uint32_t mLitIdx,
uint32_t &counter);
virtual void
printConstantToRegMapping(AMDILAsmPrinter *RegNames,
uint32_t &LII,
llvm::raw_ostream &O,
uint32_t &counter,
uint32_t Buffer,
uint32_t n,
const char *lit = NULL,
uint32_t fcall = 0,
bool isImage = false,
bool isHWCB = false);
void updatePtrArg(llvm::Function::const_arg_iterator Ip,
int numWriteImages,
int raw_uav_buffer,
int counter,
bool isKernel,
const Function *F);
/// Name of the current kernel.
std::string mName;
uint32_t mUniqueID;
bool mIsKernel;
bool mWasKernel;
bool mCompilerWrite;
/// Flag to specify if an image write has occured or not in order to not add a
/// compiler specific write if no other writes to memory occured.
bool mHasImageWrite;
bool mHasOutputInst;
/// Map from const Value * to UAV ID.
std::map<const Value *, uint32_t> mValueIDMap;
AMDILTargetMachine * mTM;
const AMDILSubtarget * mSTM;
AMDILGlobalManager * mGM;
/// This is the global offset of the printf string id's.
MachineFunction *mMF;
AMDILMachineFunctionInfo *mMFI;
}; // class AMDILKernelManager
} // llvm namespace
#endif // _AMDILKERNELMANAGER_H_

View file

@ -12,7 +12,6 @@
#include "AMDIL.h"
#include "AMDILAlgorithms.tpp"
#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILSubtarget.h"
#include "AMDILTargetMachine.h"
@ -43,7 +42,6 @@ namespace {
bool trackLiterals(MachineBasicBlock::iterator *bbb);
TargetMachine &TM;
const AMDILSubtarget *mSTM;
AMDILKernelManager *mKM;
AMDILMachineFunctionInfo *mMFI;
int32_t mLitIdx;
bool mChanged;
@ -71,7 +69,6 @@ bool AMDILLiteralManager::runOnMachineFunction(MachineFunction &MF) {
const AMDILTargetMachine *amdtm =
reinterpret_cast<const AMDILTargetMachine *>(&TM);
mSTM = dynamic_cast<const AMDILSubtarget *>(amdtm->getSubtargetImpl());
mKM = const_cast<AMDILKernelManager *>(mSTM->getKernelManager());
safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
std::bind1st(std::mem_fun(&AMDILLiteralManager::trackLiterals), this));
return mChanged;

View file

@ -16,8 +16,6 @@
#include "AMDILAlgorithms.tpp"
#include "AMDILDevices.h"
#include "AMDILGlobalManager.h"
#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/ADT/Statistic.h"
@ -39,7 +37,6 @@ STATISTIC(PointerAssignments, "Number of dynamic pointer "
"assigments discovered");
STATISTIC(PointerSubtract, "Number of pointer subtractions discovered");
#endif
STATISTIC(LocalFuncs, "Number of get_local_size(N) functions removed");
using namespace llvm;
// The Peephole optimization pass is used to do simple last minute optimizations
@ -76,7 +73,6 @@ private:
void doIsConstCallConversionIfNeeded();
bool mChanged;
bool mDebug;
bool mRWGOpt;
bool mConvertAtomics;
CodeGenOpt::Level optLevel;
// Run a series of tests to see if we can optimize a CALL instruction.
@ -104,7 +100,6 @@ private:
// specified then the result of get_local_size is known at compile time and
// can be returned accordingly.
bool isRWGLocalOpt(CallInst *CI);
void expandRWGLocalOpt(CallInst *CI);
// On northern island cards, the division is slightly less accurate than on
// previous generations, so we need to utilize a more accurate division. So we
// can translate the accurate divide to a normal divide on all other cards.
@ -251,15 +246,12 @@ AMDILPeepholeOpt::doAtomicConversionIfNeeded(Function &F)
// arena path.
Function::arg_iterator argB = F.arg_begin();
Function::arg_iterator argE = F.arg_end();
AMDILKernelManager *KM = mSTM->getKernelManager();
AMDILMachineFunctionInfo *mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
.getInfo<AMDILMachineFunctionInfo>();
for (; argB != argE; ++argB) {
if (mSTM->device()->isSupported(AMDILDeviceInfo::ArenaUAV)) {
KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::ARENA_UAV_ID));
} else {
KM->setUAVID(argB,mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
mMFI->uav_insert(mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
}
}
@ -276,16 +268,6 @@ AMDILPeepholeOpt::runOnFunction(Function &MF)
}
mCTX = &MF.getType()->getContext();
mConvertAtomics = true;
if (dumpAllIntoArena(MF)) {
for (Function::const_arg_iterator cab = MF.arg_begin(),
cae = MF.arg_end(); cab != cae; ++cab) {
const Argument *arg = cab;
AMDILKernelManager *KM = mSTM->getKernelManager();
KM->setUAVID(getBasePointerValue(arg),
mSTM->device()->getResourceID(AMDILDevice::GLOBAL_ID));
}
}
mRWGOpt = mSTM->getGlobalManager()->hasRWG(MF.getName());
safeNestedForEach(MF.begin(), MF.end(), MF.begin()->begin(),
std::bind1st(std::mem_fun(&AMDILPeepholeOpt::instLevelOptimizations),
this));
@ -313,10 +295,6 @@ AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
CI->eraseFromParent();
return true;
}
if (isRWGLocalOpt(CI)) {
expandRWGLocalOpt(CI);
return false;
}
if (propagateSamplerInst(CI)) {
return false;
}
@ -390,26 +368,7 @@ AMDILPeepholeOpt::optimizeCallInst(BasicBlock::iterator *bbb)
}
StringRef name = F->getName();
if (name.startswith("__atom") && name.find("_g") != StringRef::npos) {
Value *ptr = CI->getOperand(0);
const Value *basePtr = getBasePointerValue(ptr);
const Argument *Arg = dyn_cast<Argument>(basePtr);
if (Arg) {
AMDILGlobalManager *GM = mSTM->getGlobalManager();
int32_t id = GM->getArgID(Arg);
if (id >= 0) {
std::stringstream ss;
ss << name.data() << "_" << id << '\n';
std::string val;
ss >> val;
F = dyn_cast<Function>(
F->getParent() ->getOrInsertFunction(val, F->getFunctionType()));
atomicFuncs.push_back(std::make_pair <CallInst*, Function*>(CI, F));
} else {
mConvertAtomics = false;
}
} else {
mConvertAtomics = false;
}
mConvertAtomics = false;
}
return false;
}
@ -1088,28 +1047,11 @@ AMDILPeepholeOpt::expandSigned24BitOps(CallInst *CI)
bool
AMDILPeepholeOpt::isRWGLocalOpt(CallInst *CI)
{
return (CI != NULL && mRWGOpt
return (CI != NULL
&& CI->getOperand(CI->getNumOperands() - 1)->getName()
== "__amdil_get_local_size_int");
}
void
AMDILPeepholeOpt::expandRWGLocalOpt(CallInst *CI)
{
assert(isRWGLocalOpt(CI) &&
"This optmization only works when the call inst is get_local_size!");
std::vector<Constant *> consts;
for (uint32_t x = 0; x < 3; ++x) {
uint32_t val = mSTM->getGlobalManager()->getLocal(mF->getName(), x);
consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), val));
}
consts.push_back(ConstantInt::get(Type::getInt32Ty(*mCTX), 0));
Value *cVec = ConstantVector::get(consts);
CI->replaceAllUsesWith(cVec);
++LocalFuncs;
return;
}
bool
AMDILPeepholeOpt::convertAccurateDivide(CallInst *CI)
{

View file

@ -15,7 +15,6 @@
#endif
#include "AMDILAlgorithms.tpp"
#include "AMDILKernelManager.h"
#include "AMDILMachineFunctionInfo.h"
#include "AMDILModuleInfo.h"
#include "AMDILTargetMachine.h"
@ -50,7 +49,6 @@ namespace
private:
bool expandPrintf(BasicBlock::iterator *bbb);
AMDILMachineFunctionInfo *mMFI;
AMDILKernelManager *mKM;
bool mChanged;
SmallVector<int64_t, DEFAULT_VEC_SLOTS> bVecMap;
};
@ -256,7 +254,6 @@ AMDILPrintfConvert::expandPrintf(BasicBlock::iterator *bbb)
AMDILPrintfConvert::runOnFunction(Function &MF)
{
mChanged = false;
mKM = TM.getSubtarget<AMDILSubtarget>().getKernelManager();
mMFI = getAnalysis<MachineFunctionAnalysis>().getMF()
.getInfo<AMDILMachineFunctionInfo>();
bVecMap.clear();

View file

@ -14,8 +14,6 @@
#include "AMDILSubtarget.h"
#include "AMDIL.h"
#include "AMDILDevices.h"
#include "AMDILGlobalManager.h"
#include "AMDILKernelManager.h"
#include "AMDILUtilityFunctions.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"

View file

@ -26,13 +26,11 @@ CPP_SOURCES := \
AMDILEvergreenDevice.cpp \
AMDILELFWriterInfo.cpp \
AMDILFrameLowering.cpp \
AMDILGlobalManager.cpp \
AMDILInliner.cpp \
AMDILInstrInfo.cpp \
AMDILIntrinsicInfo.cpp \
AMDILISelDAGToDAG.cpp \
AMDILISelLowering.cpp \
AMDILKernelManager.cpp \
AMDILLiteralManager.cpp \
AMDILMachineFunctionInfo.cpp \
AMDILMachinePeephole.cpp \