amd,radeonsi: use new pass manager to handle midend optimizations

Adds an optimizer structure that builds an optimization
pipeline to run LLVM passes using the new pass manager.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30506>
This commit is contained in:
Ganesh Belgur Ramachandra 2024-08-04 11:40:18 -05:00 committed by Marge Bot
parent 686459f115
commit 38e50221cd
5 changed files with 128 additions and 45 deletions

View file

@ -17,9 +17,14 @@
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils.h>
#include <llvm/CodeGen/Passes.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Passes/PassBuilder.h>
#include <llvm/Transforms/InstCombine/InstCombine.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/IPO/SCCP.h>
#include <llvm/Transforms/Scalar/EarlyCSE.h>
#include <llvm/Transforms/Scalar/LICM.h>
#include <llvm/Transforms/Scalar/SROA.h>
#include <llvm/Transforms/Scalar/SimplifyCFG.h>
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include <cstring>
@ -234,6 +239,95 @@ struct raw_memory_ostream : public raw_pwrite_stream {
}
};
/* The middle-end optimization passes are run using
* the LLVM's new pass manager infrastructure.
*/
struct ac_midend_optimizer
{
TargetMachine *target_machine;
PassBuilder pass_builder;
TargetLibraryInfoImpl target_library_info;
/* Should be declared in this order only,
* so that they are destroyed in the correct order
* due to inter-analysis-manager references.
*/
LoopAnalysisManager loop_am;
FunctionAnalysisManager function_am;
CGSCCAnalysisManager cgscc_am;
ModuleAnalysisManager module_am;
/* Pass Managers */
LoopPassManager loop_pm;
FunctionPassManager function_pm;
ModulePassManager module_pm;
ac_midend_optimizer(TargetMachine *arg_target_machine, bool arg_check_ir)
: target_machine(arg_target_machine),
pass_builder(target_machine, PipelineTuningOptions(), {}),
target_library_info(Triple(target_machine->getTargetTriple()))
{
/* Build the pipeline and optimize.
* Any custom analyses should be registered
* before LLVM's default analysis sets.
*/
function_am.registerPass(
[&] { return TargetLibraryAnalysis(target_library_info); }
);
pass_builder.registerModuleAnalyses(module_am);
pass_builder.registerCGSCCAnalyses(cgscc_am);
pass_builder.registerFunctionAnalyses(function_am);
pass_builder.registerLoopAnalyses(loop_am);
pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am);
if (arg_check_ir)
module_pm.addPass(VerifierPass());
/* Adding inliner pass to the module pass manager directly
* ensures that the pass is run on all functions first, which makes sure
* that the following passes are only run on the remaining non-inline
* function, so it removes useless work done on dead inline functions.
*/
module_pm.addPass(AlwaysInlinerPass());
/* The following set of passes run on an individual function/loop first
* before proceeding to the next.
*/
#if LLVM_VERSION_MAJOR >= 16
function_pm.addPass(SROAPass(SROAOptions::ModifyCFG));
#else
// Old version of the code
function_pm.addPass(SROAPass());
#endif
loop_pm.addPass(LICMPass(LICMOptions()));
function_pm.addPass(createFunctionToLoopPassAdaptor(std::move(loop_pm), true));
function_pm.addPass(SimplifyCFGPass());
function_pm.addPass(EarlyCSEPass(true));
module_pm.addPass(createModuleToFunctionPassAdaptor(std::move(function_pm)));
}
void run(Module &module)
{
module_pm.run(module, module_am);
/* After a run(), the results in the analyses managers
* aren't useful to optimize a subsequent LLVM module.
* If used, it can lead to unexpected crashes.
* Hence, the results in the analyses managers
* need to be invalidated and cleared before
* running optimizations on a new LLVM module.
*/
module_am.invalidate(module, PreservedAnalyses::none());
module_am.clear();
cgscc_am.clear();
function_am.clear();
loop_am.clear();
}
};
/* The LLVM compiler is represented as a pass manager containing passes for
* optimizations, instruction selection, and code generation.
*/
@ -277,41 +371,26 @@ bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module
return true;
}
LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
bool check_ir)
ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm,
bool check_ir)
{
LLVMPassManagerRef passmgr = LLVMCreatePassManager();
if (!passmgr)
return NULL;
TargetMachine *TM = reinterpret_cast<TargetMachine *>(tm);
return new ac_midend_optimizer(TM, check_ir);
}
if (target_library_info)
LLVMAddTargetLibraryInfo(target_library_info, passmgr);
void ac_destroy_midend_optimiser(ac_midend_optimizer *meo)
{
delete meo;
}
if (check_ir)
unwrap(passmgr)->add(createVerifierPass());
bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module)
{
if (!meo)
return false;
unwrap(passmgr)->add(createAlwaysInlinerLegacyPass());
/* Normally, the pass manager runs all passes on one function before
* moving onto another. Adding a barrier no-op pass forces the pass
* manager to run the inliner on all functions first, which makes sure
* that the following passes are only run on the remaining non-inline
* function, so it removes useless work done on dead inline functions.
*/
unwrap(passmgr)->add(createBarrierNoopPass());
#if LLVM_VERSION_MAJOR >= 16
unwrap(passmgr)->add(createSROAPass(true));
#else
unwrap(passmgr)->add(createSROAPass());
#endif
/* TODO: restore IPSCCP */
unwrap(passmgr)->add(createLICMPass());
unwrap(passmgr)->add(createCFGSimplificationPass());
/* This is recommended by the instruction combining pass. */
unwrap(passmgr)->add(createEarlyCSEPass(true));
unwrap(passmgr)->add(createInstructionCombiningPass());
return passmgr;
/* Runs all the middle-end optimizations, no code generation */
meo->run(*unwrap(module));
return true;
}
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,

View file

@ -182,9 +182,9 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family
if (!compiler->target_library_info)
goto fail;
compiler->passmgr =
ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR);
if (!compiler->passmgr)
compiler->meo =
ac_create_midend_optimizer(compiler->tm, tm_options & AC_TM_CHECK_IR);
if (!compiler->meo)
goto fail;
return true;
@ -198,8 +198,10 @@ void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
ac_destroy_llvm_passes(compiler->passes);
ac_destroy_llvm_passes(compiler->low_opt_passes);
if (compiler->passmgr)
LLVMDisposePassManager(compiler->passmgr);
/* delete optimizer pass manager */
if (compiler->meo)
ac_destroy_midend_optimiser(compiler->meo);
if (compiler->target_library_info)
ac_dispose_target_library_info(compiler->target_library_info);
if (compiler->low_opt_tm)

View file

@ -44,10 +44,10 @@ enum ac_float_mode
/* Per-thread persistent LLVM objects. */
struct ac_llvm_compiler {
LLVMTargetLibraryInfoRef target_library_info;
LLVMPassManagerRef passmgr;
/* Default compiler. */
LLVMTargetMachineRef tm;
struct ac_midend_optimizer *meo;
struct ac_compiler_passes *passes;
/* Optional compiler for faster compilation with fewer optimizations.
@ -86,12 +86,15 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family
enum ac_target_machine_options tm_options);
void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler);
struct ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm,
bool check_ir);
void ac_destroy_midend_optimiser(struct ac_midend_optimizer *meo);
bool ac_llvm_optimize_module(struct ac_midend_optimizer *meo, LLVMModuleRef module);
struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm);
void ac_destroy_llvm_passes(struct ac_compiler_passes *p);
bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
char **pelf_buffer, size_t *pelf_size);
LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
bool check_ir);
static inline bool ac_has_vec3_support(enum amd_gfx_level chip, bool use_format)
{

View file

@ -193,10 +193,9 @@ radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
}
static void
ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr)
ac_llvm_finalize_module(struct radv_shader_context *ctx, struct ac_midend_optimizer *meo)
{
LLVMRunPassManager(passmgr, ctx->ac.module);
ac_llvm_optimize_module(meo, ctx->ac.module);
ac_llvm_context_dispose(&ctx->ac);
}
@ -390,7 +389,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir
fprintf(stderr, "\n");
}
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr);
ac_llvm_finalize_module(&ctx, ac_llvm->meo);
free(name);

View file

@ -236,7 +236,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx)
ac_dump_module(ctx->ac.module);
/* Run the pass */
LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
ac_llvm_optimize_module(ctx->compiler->meo, ctx->ac.module);
}
void si_llvm_dispose(struct si_shader_context *ctx)