swr/rast: Add support for setting optimization level

for JIT compilation

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
George Kyriazis 2018-04-04 17:34:54 -05:00
parent 4f0df5e2f7
commit c57b594317
9 changed files with 55 additions and 18 deletions

View file

@ -193,6 +193,41 @@ KNOBS = [
'category' : 'debug_adv',
}],
['JIT_OPTIMIZATION_LEVEL', {
'type' : 'int',
'default' : '-1',
'desc' : ['JIT compile optimization level:',],
'category' : 'debug',
'control' : 'dropdown',
'choices' : [
{
'name' : 'Automatic',
'desc' : 'Automatic based on other KNOB and build settings',
'value' : -1,
},
{
'name' : 'Debug',
'desc' : 'No optimization: -O0',
'value' : 0,
},
{
'name' : 'Less',
'desc' : 'Some optimization: -O1',
'value' : 1,
},
{
'name' : 'Optimize',
'desc' : 'Default Clang / LLVM optimizations: -O2',
'value' : 2,
},
{
'name' : 'Aggressive',
'desc' : 'Maximum optimization: -O3',
'value' : 3,
},
],
}],
['JIT_CACHE_DIR', {
'type' : 'std::string',
'default' : r'%TEMP%\SWR\JitCache' if sys.platform == 'win32' else '${HOME}/.swr/jitcache',

View file

@ -426,7 +426,7 @@ static SIMDINLINE bool SIMDCALL testz_ps(Float const &a, Float const &b) // ret
SIMD256T::testz_ps(a.v8[1], b.v8[1]));
}
static SIMDINLINE int SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
static SIMDINLINE bool SIMDCALL testz_si(Integer const &a, Integer const &b) // return all_lanes_zero(a & b) ? 1 : 0 (int)
{
return 0 != (SIMD256T::testz_si(a.v8[0], b.v8[0]) &
SIMD256T::testz_si(a.v8[1], b.v8[1]));

View file

@ -1,5 +1,5 @@
/****************************************************************************
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
* Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -526,6 +526,11 @@ enum SWR_AUX_MODE
AUX_MODE_DEPTH,
};
struct SWR_LOD_OFFSETS
{
uint32_t offsets[2][15];
};
//////////////////////////////////////////////////////////////////////////
/// SWR_SURFACE_STATE
//////////////////////////////////////////////////////////////////////////
@ -866,11 +871,9 @@ enum SWR_MULTISAMPLE_COUNT
SWR_MULTISAMPLE_TYPE_COUNT
};
INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start
static INLINE uint32_t GetNumSamples(/* SWR_SAMPLE_COUNT */ int sampleCountEnum) // @llvm_func_start
{
static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16};
assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
return sampleCountLUT[sampleCount];
return uint32_t(1) << sampleCountEnum;
} // @llvm_func_end
struct SWR_BLEND_STATE

View file

@ -66,6 +66,7 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
InitializeNativeTargetAsmPrinter();
InitializeNativeTargetDisassembler();
TargetOptions tOpts;
tOpts.AllowFPOpFusion = FPOpFusion::Fast;
tOpts.NoInfsFPMath = false;
@ -74,9 +75,6 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
//tOpts.PrintMachineCode = true;
mCore = std::string(core);
std::transform(mCore.begin(), mCore.end(), mCore.begin(), ::tolower);
std::unique_ptr<Module> newModule(new Module("", mContext));
mpCurrentModule = newModule.get();
@ -93,6 +91,12 @@ JitManager::JitManager(uint32_t simdWidth, const char *arch, const char* core)
auto optLevel = CodeGenOpt::Aggressive;
if (KNOB_JIT_OPTIMIZATION_LEVEL >= CodeGenOpt::None &&
KNOB_JIT_OPTIMIZATION_LEVEL <= CodeGenOpt::Aggressive)
{
optLevel = CodeGenOpt::Level(KNOB_JIT_OPTIMIZATION_LEVEL);
}
mpExec = EngineBuilder(std::move(newModule))
.setTargetOptions(tOpts)
.setOptLevel(optLevel)

View file

@ -147,7 +147,6 @@ struct JitManager
llvm::FunctionType* mFetchShaderTy;
JitInstructionSet mArch;
std::string mCore;
// Debugging support
std::unordered_map<llvm::StructType*, llvm::DIType*> mDebugStructMap;

View file

@ -42,6 +42,7 @@ namespace SwrJit
{
mpfnTranslateGfxAddress = nullptr;
mpParamSimDC = nullptr;
}
void BuilderGfxMem::NotifyPrivateContextSet()
@ -133,9 +134,8 @@ namespace SwrJit
return Builder::LOAD(BasePtr, offset, name);
}
Value* BuilderGfxMem::TranlsateGfxAddress(Value* xpGfxAddress)
Value* BuilderGfxMem::TranslateGfxAddress(Value* xpGfxAddress)
{
return INT_TO_PTR(xpGfxAddress, PointerType::get(mInt8Ty, 0));
}
}

View file

@ -51,7 +51,8 @@ namespace SwrJit
virtual Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1, JIT_MEM_CLIENT usage = MEM_CLIENT_INTERNAL);
Value* TranlsateGfxAddress(Value* xpGfxAddress);
Value* TranslateGfxAddress(Value* xpGfxAddress);
protected:

View file

@ -694,5 +694,4 @@ namespace SwrJit
// Move builder to beginning of post loop
IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
}
}

View file

@ -90,7 +90,3 @@ void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Val
// Static stack allocations for scatter operations
Value* pScatterStackSrc{ nullptr };
Value* pScatterStackOffsets{ nullptr };
//virtual Value* TRANSLATE_ADDRESS(Value* address) { return address; }