2016-10-07 09:16:09 +10:00
|
|
|
/*
|
|
|
|
|
* Copyright 2014 Advanced Micro Devices, Inc.
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
|
* copy of this software and associated documentation files (the
|
|
|
|
|
* "Software"), to deal in the Software without restriction, including
|
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
|
* distribute, sub license, and/or sell copies of the Software, and to
|
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
|
* the following conditions:
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
|
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
|
|
|
|
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
|
|
|
|
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
|
|
|
|
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice (including the
|
|
|
|
|
* next paragraph) shall be included in all copies or substantial portions
|
|
|
|
|
* of the Software.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
2018-11-28 12:46:45 +01:00
|
|
|
#include <cstring>
|
|
|
|
|
|
2018-07-04 01:11:47 -04:00
|
|
|
#include "ac_binary.h"
|
2016-10-27 16:48:42 +02:00
|
|
|
#include "ac_llvm_util.h"
|
ac,ac/nir: use a better sync scope for shared atomics
https://reviews.llvm.org/rL356946 (present in LLVM 9 and later) changed
the meaning of the "system" sync scope, making it no longer restricted to
the memory operation's address space. So a single address space sync scope
is needed for shared atomic operations (such as "system-one-as" or
"workgroup-one-as") otherwise buffer_wbinvl1 and s_waitcnt instructions
can be created at each shared atomic operation.
This mostly reimplements LLVMBuildAtomicRMW and LLVMBuildAtomicCmpXchg
to allow for more sync scopes and uses the new functions in ac->nir with
the "workgroup-one-as" or "workgroup" sync scopes.
F1 2017 (4K, Ultra High settings, TAA), avg FPS : 59 -> 59.67 (+1.14%)
Strange Brigade (4K, ~highest settings), avg FPS : 51.5 -> 51.6 (+0.19%)
RotTR/mountain (4K, VeryHigh settings, FXAA), avg FPS : 57.2 -> 57.2 (+0.0%)
RotTR/tomb (4K, VeryHigh settings, FXAA), avg FPS : 42.5 -> 43.0 (+1.17%)
RotTR/valley (4K, VeryHigh settings, FXAA), avg FPS : 40.7 -> 41.6 (+2.21%)
Warhammer II/fallen, avg FPS : 31.63 -> 31.83 (+0.63%)
Warhammer II/skaven, avg FPS : 37.77 -> 38.07 (+0.79%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
2019-04-25 14:44:40 +01:00
|
|
|
#include "ac_llvm_build.h"
|
2018-07-04 01:11:47 -04:00
|
|
|
|
2018-11-28 12:46:45 +01:00
|
|
|
#include "util/macros.h"
|
|
|
|
|
|
2016-10-07 09:16:09 +10:00
|
|
|
#include <llvm-c/Core.h>
|
2018-07-04 01:11:47 -04:00
|
|
|
#include <llvm/Target/TargetMachine.h>
|
2018-01-15 14:51:43 +01:00
|
|
|
#include <llvm/IR/IRBuilder.h>
|
2018-06-27 09:34:42 +10:00
|
|
|
#include <llvm/Analysis/TargetLibraryInfo.h>
|
2018-07-05 02:27:45 -04:00
|
|
|
#include <llvm/Transforms/IPO.h>
|
2016-10-07 09:16:09 +10:00
|
|
|
|
2018-07-04 01:11:47 -04:00
|
|
|
#include <llvm/IR/LegacyPassManager.h>
|
|
|
|
|
|
2016-10-27 16:48:42 +02:00
|
|
|
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
|
2016-10-07 09:16:09 +10:00
|
|
|
{
|
|
|
|
|
llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
|
2017-04-20 10:34:18 +02:00
|
|
|
A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
|
2016-10-07 09:16:09 +10:00
|
|
|
}
|
2016-10-28 14:40:24 +02:00
|
|
|
|
|
|
|
|
bool ac_is_sgpr_param(LLVMValueRef arg)
|
|
|
|
|
{
|
|
|
|
|
llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
|
2017-03-21 23:15:05 +01:00
|
|
|
llvm::AttributeList AS = A->getParent()->getAttributes();
|
2016-10-28 14:40:24 +02:00
|
|
|
unsigned ArgNo = A->getArgNo();
|
2018-01-01 00:30:51 +01:00
|
|
|
return AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
|
2016-10-28 14:40:24 +02:00
|
|
|
}
|
2017-04-25 23:33:29 +01:00
|
|
|
|
|
|
|
|
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
|
|
|
|
|
{
|
|
|
|
|
return LLVMGetCalledValue(call);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ac_llvm_is_function(LLVMValueRef v)
|
|
|
|
|
{
|
|
|
|
|
return LLVMGetValueKind(v) == LLVMFunctionValueKind;
|
|
|
|
|
}
|
2018-01-15 14:51:43 +01:00
|
|
|
|
2018-06-30 00:54:30 -04:00
|
|
|
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
|
|
|
|
|
{
|
|
|
|
|
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
|
|
|
|
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
|
|
|
|
|
|
|
|
|
|
llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
|
|
|
|
|
llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
|
|
|
|
|
return module;
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-15 14:51:43 +01:00
|
|
|
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
|
|
|
|
|
enum ac_float_mode float_mode)
|
|
|
|
|
{
|
|
|
|
|
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
|
|
|
|
|
|
|
|
|
|
llvm::FastMathFlags flags;
|
|
|
|
|
|
|
|
|
|
switch (float_mode) {
|
|
|
|
|
case AC_FLOAT_MODE_DEFAULT:
|
2019-10-14 15:36:37 +02:00
|
|
|
case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
|
2018-01-15 14:51:43 +01:00
|
|
|
break;
|
|
|
|
|
case AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH:
|
|
|
|
|
flags.setNoSignedZeros();
|
|
|
|
|
llvm::unwrap(builder)->setFastMathFlags(flags);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return builder;
|
|
|
|
|
}
|
2018-06-27 09:34:42 +10:00
|
|
|
|
|
|
|
|
LLVMTargetLibraryInfoRef
|
|
|
|
|
ac_create_target_library_info(const char *triple)
|
|
|
|
|
{
|
|
|
|
|
return reinterpret_cast<LLVMTargetLibraryInfoRef>(new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
|
|
|
|
|
{
|
|
|
|
|
delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
|
|
|
|
|
}
|
2018-07-04 01:11:47 -04:00
|
|
|
|
2018-11-28 12:46:45 +01:00
|
|
|
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
|
|
|
|
|
* better compatibility with C code. */
|
|
|
|
|
struct raw_memory_ostream : public llvm::raw_pwrite_stream {
|
|
|
|
|
char *buffer;
|
|
|
|
|
size_t written;
|
|
|
|
|
size_t bufsize;
|
|
|
|
|
|
|
|
|
|
raw_memory_ostream()
|
|
|
|
|
{
|
|
|
|
|
buffer = NULL;
|
|
|
|
|
written = 0;
|
|
|
|
|
bufsize = 0;
|
|
|
|
|
SetUnbuffered();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
~raw_memory_ostream()
|
|
|
|
|
{
|
|
|
|
|
free(buffer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void clear()
|
|
|
|
|
{
|
|
|
|
|
written = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void take(char *&out_buffer, size_t &out_size)
|
|
|
|
|
{
|
|
|
|
|
out_buffer = buffer;
|
|
|
|
|
out_size = written;
|
|
|
|
|
buffer = NULL;
|
|
|
|
|
written = 0;
|
|
|
|
|
bufsize = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void flush() = delete;
|
|
|
|
|
|
|
|
|
|
void write_impl(const char *ptr, size_t size) override
|
|
|
|
|
{
|
|
|
|
|
if (unlikely(written + size < written))
|
|
|
|
|
abort();
|
|
|
|
|
if (written + size > bufsize) {
|
|
|
|
|
bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
|
|
|
|
|
buffer = (char *)realloc(buffer, bufsize);
|
|
|
|
|
if (!buffer) {
|
|
|
|
|
fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
|
|
|
|
|
abort();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
memcpy(buffer + written, ptr, size);
|
|
|
|
|
written += size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
|
|
|
|
|
{
|
|
|
|
|
assert(offset == (size_t)offset &&
|
|
|
|
|
offset + size >= offset && offset + size <= written);
|
|
|
|
|
memcpy(buffer + offset, ptr, size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t current_pos() const override
|
|
|
|
|
{
|
|
|
|
|
return written;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2018-07-04 01:11:47 -04:00
|
|
|
/* The LLVM compiler is represented as a pass manager containing passes for
|
|
|
|
|
* optimizations, instruction selection, and code generation.
|
|
|
|
|
*/
|
|
|
|
|
struct ac_compiler_passes {
|
2018-11-28 12:46:45 +01:00
|
|
|
raw_memory_ostream ostream; /* ELF shader binary stream */
|
2018-07-04 01:11:47 -04:00
|
|
|
llvm::legacy::PassManager passmgr; /* list of passes */
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
|
|
|
|
|
{
|
|
|
|
|
struct ac_compiler_passes *p = new ac_compiler_passes();
|
|
|
|
|
if (!p)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine*>(tm);
|
|
|
|
|
|
|
|
|
|
if (TM->addPassesToEmitFile(p->passmgr, p->ostream,
|
|
|
|
|
nullptr,
|
2019-11-14 10:04:29 +01:00
|
|
|
#if LLVM_VERSION_MAJOR >= 10
|
|
|
|
|
llvm::CGFT_ObjectFile)) {
|
|
|
|
|
#else
|
2018-07-04 01:11:47 -04:00
|
|
|
llvm::TargetMachine::CGFT_ObjectFile)) {
|
2019-11-14 10:04:29 +01:00
|
|
|
#endif
|
2018-07-04 01:11:47 -04:00
|
|
|
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
|
|
|
|
|
delete p;
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
return p;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
|
|
|
|
|
{
|
|
|
|
|
delete p;
|
|
|
|
|
}
|
|
|
|
|
|
2018-11-28 12:46:45 +01:00
|
|
|
/* This returns false on failure. */
|
|
|
|
|
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
|
|
|
|
|
char **pelf_buffer, size_t *pelf_size)
|
|
|
|
|
{
|
|
|
|
|
p->passmgr.run(*llvm::unwrap(module));
|
|
|
|
|
p->ostream.take(*pelf_buffer, *pelf_size);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-05 02:27:45 -04:00
|
|
|
void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
|
|
|
|
|
{
|
|
|
|
|
llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
|
|
|
|
|
}
|
2018-07-20 19:54:56 +02:00
|
|
|
|
|
|
|
|
void ac_enable_global_isel(LLVMTargetMachineRef tm)
|
|
|
|
|
{
|
|
|
|
|
reinterpret_cast<llvm::TargetMachine*>(tm)->setGlobalISel(true);
|
|
|
|
|
}
|
ac,ac/nir: use a better sync scope for shared atomics
https://reviews.llvm.org/rL356946 (present in LLVM 9 and later) changed
the meaning of the "system" sync scope, making it no longer restricted to
the memory operation's address space. So a single address space sync scope
is needed for shared atomic operations (such as "system-one-as" or
"workgroup-one-as") otherwise buffer_wbinvl1 and s_waitcnt instructions
can be created at each shared atomic operation.
This mostly reimplements LLVMBuildAtomicRMW and LLVMBuildAtomicCmpXchg
to allow for more sync scopes and uses the new functions in ac->nir with
the "workgroup-one-as" or "workgroup" sync scopes.
F1 2017 (4K, Ultra High settings, TAA), avg FPS : 59 -> 59.67 (+1.14%)
Strange Brigade (4K, ~highest settings), avg FPS : 51.5 -> 51.6 (+0.19%)
RotTR/mountain (4K, VeryHigh settings, FXAA), avg FPS : 57.2 -> 57.2 (+0.0%)
RotTR/tomb (4K, VeryHigh settings, FXAA), avg FPS : 42.5 -> 43.0 (+1.17%)
RotTR/valley (4K, VeryHigh settings, FXAA), avg FPS : 40.7 -> 41.6 (+2.21%)
Warhammer II/fallen, avg FPS : 31.63 -> 31.83 (+0.63%)
Warhammer II/skaven, avg FPS : 37.77 -> 38.07 (+0.79%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
2019-04-25 14:44:40 +01:00
|
|
|
|
|
|
|
|
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
|
|
|
|
|
LLVMValueRef ptr, LLVMValueRef val,
|
|
|
|
|
const char *sync_scope) {
|
|
|
|
|
llvm::AtomicRMWInst::BinOp binop;
|
|
|
|
|
switch (op) {
|
|
|
|
|
case LLVMAtomicRMWBinOpXchg:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Xchg;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpAdd:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Add;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpSub:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Sub;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpAnd:
|
|
|
|
|
binop = llvm::AtomicRMWInst::And;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpNand:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Nand;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpOr:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Or;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpXor:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Xor;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpMax:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Max;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpMin:
|
|
|
|
|
binop = llvm::AtomicRMWInst::Min;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpUMax:
|
|
|
|
|
binop = llvm::AtomicRMWInst::UMax;
|
|
|
|
|
break;
|
|
|
|
|
case LLVMAtomicRMWBinOpUMin:
|
|
|
|
|
binop = llvm::AtomicRMWInst::UMin;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
unreachable(!"invalid LLVMAtomicRMWBinOp");
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
|
|
|
|
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicRMW(
|
|
|
|
|
binop, llvm::unwrap(ptr), llvm::unwrap(val),
|
|
|
|
|
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
|
|
|
|
|
LLVMValueRef cmp, LLVMValueRef val,
|
|
|
|
|
const char *sync_scope) {
|
|
|
|
|
unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
|
|
|
|
|
return llvm::wrap(llvm::unwrap(ctx->builder)->CreateAtomicCmpXchg(
|
|
|
|
|
llvm::unwrap(ptr), llvm::unwrap(cmp), llvm::unwrap(val),
|
|
|
|
|
llvm::AtomicOrdering::SequentiallyConsistent,
|
|
|
|
|
llvm::AtomicOrdering::SequentiallyConsistent, SSID));
|
|
|
|
|
}
|