mesa/src/amd/llvm/ac_llvm_helper.cpp
Antonio Ospite ddf2aa3a4d build: avoid redefining unreachable() which is standard in C23
In the C23 standard unreachable() is now a predefined function-like
macro in <stddef.h>

See https://android.googlesource.com/platform/bionic/+/HEAD/docs/c23.md#is-now-a-predefined-function_like-macro-in

And this causes build errors when building for C23:

-----------------------------------------------------------------------
In file included from ../src/util/log.h:30,
                 from ../src/util/log.c:30:
../src/util/macros.h:123:9: warning: "unreachable" redefined
  123 | #define unreachable(str)    \
      |         ^~~~~~~~~~~
In file included from ../src/util/macros.h:31:
/usr/lib/gcc/x86_64-linux-gnu/14/include/stddef.h:456:9: note: this is the location of the previous definition
  456 | #define unreachable() (__builtin_unreachable ())
      |         ^~~~~~~~~~~
-----------------------------------------------------------------------

So don't redefine it with the same name, but use the name UNREACHABLE()
to also signify it's a macro.

Using a different name also makes sense because the behavior of the
macro was extending the one of __builtin_unreachable() anyway, and it
also had a different signature, accepting one argument, compared to the
standard unreachable() with no arguments.

This change improves the chances of building mesa with the C23 standard,
which for instance is the default in recent AOSP versions.

All the instances of the macro, including the definition, were updated
with the following command line:

  git grep -l '[^_]unreachable(' -- "src/**" | sort | uniq | \
  while read file; \
  do \
    sed -e 's/\([^_]\)unreachable(/\1UNREACHABLE(/g' -i "$file"; \
  done && \
  sed -e 's/#undef unreachable/#undef UNREACHABLE/g' -i src/intel/isl/isl_aux_info.c

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36437>
2025-07-31 17:49:42 +00:00

438 lines
13 KiB
C++

/*
* Copyright 2014 Advanced Micro Devices, Inc.
*
* SPDX-License-Identifier: MIT
*/
#include <llvm-c/Core.h>
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/LegacyPassManager.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Target/TargetMachine.h>
#include <llvm/MC/MCSubtargetInfo.h>
#include <llvm/Support/CommandLine.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/CodeGen/Passes.h>
#include <llvm/Passes/PassBuilder.h>
#include <llvm/Transforms/InstCombine/InstCombine.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/IPO/SCCP.h>
#include <llvm/Transforms/Scalar/EarlyCSE.h>
#include <llvm/Transforms/Scalar/LICM.h>
#include <llvm/Transforms/Scalar/SROA.h>
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include <cstring>
/* DO NOT REORDER THE HEADERS
* The LLVM headers need to all be included before any Mesa header,
* as they use the `restrict` keyword in ways that are incompatible
* with our #define in include/c99_compat.h
*/
#include "ac_binary.h"
#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
#include "util/macros.h"
using namespace llvm;
class RunAtExitForStaticDestructors : public SDNode
{
public:
/* getSDVTList (protected) calls getValueTypeList (private), which contains static variables. */
RunAtExitForStaticDestructors(): SDNode(0, 0, DebugLoc(), getSDVTList(MVT::Other))
{
}
};
void ac_llvm_run_atexit_for_destructors(void)
{
/* LLVM >= 16 registers static variable destructors on the first compile, which gcc
* implements by calling atexit there. Before that, u_queue registers its atexit
* handler to kill all threads. Since exit() runs atexit handlers in the reverse order,
* the LLVM destructors are called first while shader compiler threads may still be
* running, which crashes in LLVM in SelectionDAG.cpp.
*
* The solution is to run the code that declares the LLVM static variables first,
* so that atexit for LLVM is registered first and u_queue is registered after that,
* which ensures that all u_queue threads are terminated before LLVM destructors are
* called.
*
* This just executes the code that declares static variables.
*/
RunAtExitForStaticDestructors();
}
bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
}
void ac_reset_llvm_all_options_occurrences()
{
cl::ResetAllOptionOccurrences();
}
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
{
Argument *A = unwrap<Argument>(val);
A->addAttr(Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
}
void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
{
Argument *A = unwrap<Argument>(val);
A->addAttr(Attribute::getWithAlignment(A->getContext(), Align(bytes)));
}
LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
#if LLVM_VERSION_MAJOR >= 21
unwrap(module)->setTargetTriple(TM->getTargetTriple());
#else
unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
#endif
unwrap(module)->setDataLayout(TM->createDataLayout());
return module;
}
LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
{
LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
FastMathFlags flags;
switch (float_mode) {
case AC_FLOAT_MODE_DEFAULT:
case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
break;
case AC_FLOAT_MODE_DEFAULT_OPENGL:
/* Allow optimizations to treat the sign of a zero argument or
* result as insignificant.
*/
flags.setNoSignedZeros(); /* nsz */
/* Allow optimizations to use the reciprocal of an argument
* rather than perform division.
*/
flags.setAllowReciprocal(); /* arcp */
unwrap(builder)->setFastMathFlags(flags);
break;
}
return builder;
}
void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
{
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
auto *b = unwrap(ctx->builder);
FastMathFlags flags = b->getFastMathFlags();
/* This disables the optimization of (x + 0), which is used
* to convert negative zero to positive zero.
*/
flags.setNoSignedZeros(false);
b->setFastMathFlags(flags);
}
}
void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
{
if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
auto *b = unwrap(ctx->builder);
FastMathFlags flags = b->getFastMathFlags();
flags.setNoSignedZeros();
b->setFastMathFlags(flags);
}
}
/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
* better compatibility with C code. */
struct raw_memory_ostream : public raw_pwrite_stream {
char *buffer;
size_t written;
size_t bufsize;
raw_memory_ostream()
{
buffer = NULL;
written = 0;
bufsize = 0;
SetUnbuffered();
}
~raw_memory_ostream()
{
free(buffer);
}
void take(char *&out_buffer, size_t &out_size)
{
out_buffer = buffer;
out_size = written;
buffer = NULL;
written = 0;
bufsize = 0;
}
void flush() = delete;
void write_impl(const char *ptr, size_t size) override
{
if (unlikely(written + size < written))
abort();
if (written + size > bufsize) {
bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
buffer = (char *)realloc(buffer, bufsize);
if (!buffer) {
fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
abort();
}
}
memcpy(buffer + written, ptr, size);
written += size;
}
void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
{
assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
memcpy(buffer + offset, ptr, size);
}
uint64_t current_pos() const override
{
return written;
}
};
/* The middle-end optimization passes are run using
* the LLVM's new pass manager infrastructure.
*/
struct ac_midend_optimizer
{
TargetMachine *target_machine;
PassBuilder pass_builder;
TargetLibraryInfoImpl target_library_info;
/* Should be declared in this order only,
* so that they are destroyed in the correct order
* due to inter-analysis-manager references.
*/
LoopAnalysisManager loop_am;
FunctionAnalysisManager function_am;
CGSCCAnalysisManager cgscc_am;
ModuleAnalysisManager module_am;
/* Pass Managers */
LoopPassManager loop_pm;
FunctionPassManager function_pm;
ModulePassManager module_pm;
ac_midend_optimizer(TargetMachine *arg_target_machine, bool arg_check_ir)
: target_machine(arg_target_machine),
pass_builder(target_machine, PipelineTuningOptions(), {}),
target_library_info(Triple(target_machine->getTargetTriple()))
{
/* Build the pipeline and optimize.
* Any custom analyses should be registered
* before LLVM's default analysis sets.
*/
function_am.registerPass(
[&] { return TargetLibraryAnalysis(target_library_info); }
);
pass_builder.registerModuleAnalyses(module_am);
pass_builder.registerCGSCCAnalyses(cgscc_am);
pass_builder.registerFunctionAnalyses(function_am);
pass_builder.registerLoopAnalyses(loop_am);
pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am);
if (arg_check_ir)
module_pm.addPass(VerifierPass());
/* Adding inliner pass to the module pass manager directly
* ensures that the pass is run on all functions first, which makes sure
* that the following passes are only run on the remaining non-inline
* function, so it removes useless work done on dead inline functions.
*/
module_pm.addPass(AlwaysInlinerPass());
/* The following set of passes run on an individual function/loop first
* before proceeding to the next.
*/
function_pm.addPass(SROAPass(SROAOptions::ModifyCFG));
loop_pm.addPass(LICMPass(LICMOptions()));
function_pm.addPass(createFunctionToLoopPassAdaptor(std::move(loop_pm), true));
function_pm.addPass(SimplifyCFGPass());
function_pm.addPass(EarlyCSEPass(true));
module_pm.addPass(createModuleToFunctionPassAdaptor(std::move(function_pm)));
}
void run(Module &module)
{
module_pm.run(module, module_am);
/* After a run(), the results in the analyses managers
* aren't useful to optimize a subsequent LLVM module.
* If used, it can lead to unexpected crashes.
* Hence, the results in the analyses managers
* need to be invalidated and cleared before
* running optimizations on a new LLVM module.
*/
module_am.invalidate(module, PreservedAnalyses::none());
module_am.clear();
cgscc_am.clear();
function_am.clear();
loop_am.clear();
}
};
/* The backend passes for optimizations, instruction selection,
* and code generation in the LLVM compiler still requires the
* legacy::PassManager. The use of the legacy PM will be
* deprecated when the new PM can handle backend passes.
*/
struct ac_backend_optimizer
{
raw_memory_ostream ostream; /* ELF shader binary stream */
legacy::PassManager backend_pass_manager; /* for codegen only */
ac_backend_optimizer(TargetMachine *arg_target_machine)
{
/* add backend passes */
if (arg_target_machine->addPassesToEmitFile(backend_pass_manager, ostream, nullptr,
CodeGenFileType::ObjectFile)) {
fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
}
}
void run(Module &module, char *&out_buffer, size_t &out_size)
{
backend_pass_manager.run(module);
ostream.take(out_buffer, out_size);
}
};
ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm,
bool check_ir)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
return new ac_midend_optimizer(TM, check_ir);
}
void ac_destroy_midend_optimiser(ac_midend_optimizer *meo)
{
delete meo;
}
bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module)
{
if (!meo)
return false;
/* Runs all the middle-end optimizations, no code generation */
meo->run(*unwrap(module));
return true;
}
ac_backend_optimizer *ac_create_backend_optimizer(LLVMTargetMachineRef tm)
{
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
return new ac_backend_optimizer(TM);
}
void ac_destroy_backend_optimizer(ac_backend_optimizer *beo)
{
delete beo;
}
bool ac_compile_module_to_elf(ac_backend_optimizer *beo, LLVMModuleRef module,
char **pelf_buffer, size_t *pelf_size)
{
if (!beo)
return false;
/* Runs all backend optimizations and code generation */
beo->run(*unwrap(module), *pelf_buffer, *pelf_size);
return true;
}
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
{
AtomicRMWInst::BinOp binop;
switch (op) {
case LLVMAtomicRMWBinOpXchg:
binop = AtomicRMWInst::Xchg;
break;
case LLVMAtomicRMWBinOpAdd:
binop = AtomicRMWInst::Add;
break;
case LLVMAtomicRMWBinOpSub:
binop = AtomicRMWInst::Sub;
break;
case LLVMAtomicRMWBinOpAnd:
binop = AtomicRMWInst::And;
break;
case LLVMAtomicRMWBinOpNand:
binop = AtomicRMWInst::Nand;
break;
case LLVMAtomicRMWBinOpOr:
binop = AtomicRMWInst::Or;
break;
case LLVMAtomicRMWBinOpXor:
binop = AtomicRMWInst::Xor;
break;
case LLVMAtomicRMWBinOpMax:
binop = AtomicRMWInst::Max;
break;
case LLVMAtomicRMWBinOpMin:
binop = AtomicRMWInst::Min;
break;
case LLVMAtomicRMWBinOpUMax:
binop = AtomicRMWInst::UMax;
break;
case LLVMAtomicRMWBinOpUMin:
binop = AtomicRMWInst::UMin;
break;
case LLVMAtomicRMWBinOpFAdd:
binop = AtomicRMWInst::FAdd;
break;
default:
UNREACHABLE("invalid LLVMAtomicRMWBinOp");
break;
}
unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
return wrap(unwrap(ctx->builder)
->CreateAtomicRMW(binop, unwrap(ptr), unwrap(val),
MaybeAlign(0),
AtomicOrdering::SequentiallyConsistent, SSID));
}
LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
{
unsigned SSID = unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
return wrap(unwrap(ctx->builder)
->CreateAtomicCmpXchg(unwrap(ptr), unwrap(cmp),
unwrap(val),
MaybeAlign(0),
AtomicOrdering::SequentiallyConsistent,
AtomicOrdering::SequentiallyConsistent, SSID));
}