diff --git a/src/amd/llvm/ac_llvm_helper.cpp b/src/amd/llvm/ac_llvm_helper.cpp index af4a50f8409..429796f40ad 100644 --- a/src/amd/llvm/ac_llvm_helper.cpp +++ b/src/amd/llvm/ac_llvm_helper.cpp @@ -17,9 +17,14 @@ #include #include #include -#include +#include #include +#include #include +#include +#include +#include +#include #include "llvm/CodeGen/SelectionDAGNodes.h" #include @@ -234,6 +239,95 @@ struct raw_memory_ostream : public raw_pwrite_stream { } }; +/* The middle-end optimization passes are run using + * the LLVM's new pass manager infrastructure. + */ +struct ac_midend_optimizer +{ + TargetMachine *target_machine; + PassBuilder pass_builder; + TargetLibraryInfoImpl target_library_info; + + /* Should be declared in this order only, + * so that they are destroyed in the correct order + * due to inter-analysis-manager references. + */ + LoopAnalysisManager loop_am; + FunctionAnalysisManager function_am; + CGSCCAnalysisManager cgscc_am; + ModuleAnalysisManager module_am; + + /* Pass Managers */ + LoopPassManager loop_pm; + FunctionPassManager function_pm; + ModulePassManager module_pm; + + ac_midend_optimizer(TargetMachine *arg_target_machine, bool arg_check_ir) + : target_machine(arg_target_machine), + pass_builder(target_machine, PipelineTuningOptions(), {}), + target_library_info(Triple(target_machine->getTargetTriple())) + { + /* Build the pipeline and optimize. + * Any custom analyses should be registered + * before LLVM's default analysis sets. + */ + function_am.registerPass( + [&] { return TargetLibraryAnalysis(target_library_info); } + ); + + pass_builder.registerModuleAnalyses(module_am); + pass_builder.registerCGSCCAnalyses(cgscc_am); + pass_builder.registerFunctionAnalyses(function_am); + pass_builder.registerLoopAnalyses(loop_am); + pass_builder.crossRegisterProxies(loop_am, function_am, cgscc_am, module_am); + + if (arg_check_ir) + module_pm.addPass(VerifierPass()); + + /* Adding inliner pass to the module pass manager directly + * ensures that the pass is run on all functions first, which makes sure + * that the following passes are only run on the remaining non-inline + * function, so it removes useless work done on dead inline functions. + */ + module_pm.addPass(AlwaysInlinerPass()); + + /* The following set of passes run on an individual function/loop first + * before proceeding to the next. + */ +#if LLVM_VERSION_MAJOR >= 16 + function_pm.addPass(SROAPass(SROAOptions::ModifyCFG)); +#else + // Old version of the code + function_pm.addPass(SROAPass()); +#endif + + loop_pm.addPass(LICMPass(LICMOptions())); + function_pm.addPass(createFunctionToLoopPassAdaptor(std::move(loop_pm), true)); + function_pm.addPass(SimplifyCFGPass()); + function_pm.addPass(EarlyCSEPass(true)); + + module_pm.addPass(createModuleToFunctionPassAdaptor(std::move(function_pm))); + } + + void run(Module &module) + { + module_pm.run(module, module_am); + + /* After a run(), the results in the analyses managers + * aren't useful to optimize a subsequent LLVM module. + * If used, it can lead to unexpected crashes. + * Hence, the results in the analyses managers + * need to be invalidated and cleared before + * running optimizations on a new LLVM module. + */ + module_am.invalidate(module, PreservedAnalyses::none()); + module_am.clear(); + cgscc_am.clear(); + function_am.clear(); + loop_am.clear(); + } +}; + /* The LLVM compiler is represented as a pass manager containing passes for * optimizations, instruction selection, and code generation. */ @@ -277,41 +371,26 @@ bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module return true; } -LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, - bool check_ir) +ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm, + bool check_ir) { - LLVMPassManagerRef passmgr = LLVMCreatePassManager(); - if (!passmgr) - return NULL; + TargetMachine *TM = reinterpret_cast(tm); + return new ac_midend_optimizer(TM, check_ir); +} - if (target_library_info) - LLVMAddTargetLibraryInfo(target_library_info, passmgr); +void ac_destroy_midend_optimiser(ac_midend_optimizer *meo) +{ + delete meo; +} - if (check_ir) - unwrap(passmgr)->add(createVerifierPass()); +bool ac_llvm_optimize_module(ac_midend_optimizer *meo, LLVMModuleRef module) +{ + if (!meo) + return false; - unwrap(passmgr)->add(createAlwaysInlinerLegacyPass()); - - /* Normally, the pass manager runs all passes on one function before - * moving onto another. Adding a barrier no-op pass forces the pass - * manager to run the inliner on all functions first, which makes sure - * that the following passes are only run on the remaining non-inline - * function, so it removes useless work done on dead inline functions. - */ - unwrap(passmgr)->add(createBarrierNoopPass()); - - #if LLVM_VERSION_MAJOR >= 16 - unwrap(passmgr)->add(createSROAPass(true)); - #else - unwrap(passmgr)->add(createSROAPass()); - #endif - /* TODO: restore IPSCCP */ - unwrap(passmgr)->add(createLICMPass()); - unwrap(passmgr)->add(createCFGSimplificationPass()); - /* This is recommended by the instruction combining pass. */ - unwrap(passmgr)->add(createEarlyCSEPass(true)); - unwrap(passmgr)->add(createInstructionCombiningPass()); - return passmgr; + /* Runs all the middle-end optimizations, no code generation */ + meo->run(*unwrap(module)); + return true; } LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, diff --git a/src/amd/llvm/ac_llvm_util.c b/src/amd/llvm/ac_llvm_util.c index 221cb99ee60..dddcfa32289 100644 --- a/src/amd/llvm/ac_llvm_util.c +++ b/src/amd/llvm/ac_llvm_util.c @@ -182,9 +182,9 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family if (!compiler->target_library_info) goto fail; - compiler->passmgr = - ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); - if (!compiler->passmgr) + compiler->meo = + ac_create_midend_optimizer(compiler->tm, tm_options & AC_TM_CHECK_IR); + if (!compiler->meo) goto fail; return true; @@ -198,8 +198,10 @@ void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) ac_destroy_llvm_passes(compiler->passes); ac_destroy_llvm_passes(compiler->low_opt_passes); - if (compiler->passmgr) - LLVMDisposePassManager(compiler->passmgr); + /* delete optimizer pass manager */ + if (compiler->meo) + ac_destroy_midend_optimiser(compiler->meo); + if (compiler->target_library_info) ac_dispose_target_library_info(compiler->target_library_info); if (compiler->low_opt_tm) diff --git a/src/amd/llvm/ac_llvm_util.h b/src/amd/llvm/ac_llvm_util.h index 29f4a6a9bfb..6311cd048d5 100644 --- a/src/amd/llvm/ac_llvm_util.h +++ b/src/amd/llvm/ac_llvm_util.h @@ -44,10 +44,10 @@ enum ac_float_mode /* Per-thread persistent LLVM objects. */ struct ac_llvm_compiler { LLVMTargetLibraryInfoRef target_library_info; - LLVMPassManagerRef passmgr; /* Default compiler. */ LLVMTargetMachineRef tm; + struct ac_midend_optimizer *meo; struct ac_compiler_passes *passes; /* Optional compiler for faster compilation with fewer optimizations. @@ -86,12 +86,15 @@ bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family enum ac_target_machine_options tm_options); void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler); +struct ac_midend_optimizer *ac_create_midend_optimizer(LLVMTargetMachineRef tm, + bool check_ir); +void ac_destroy_midend_optimiser(struct ac_midend_optimizer *meo); +bool ac_llvm_optimize_module(struct ac_midend_optimizer *meo, LLVMModuleRef module); + struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm); void ac_destroy_llvm_passes(struct ac_compiler_passes *p); bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size); -LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, - bool check_ir); static inline bool ac_has_vec3_support(enum amd_gfx_level chip, bool use_format) { diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 5caf1c025fa..91868b5bef6 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -193,10 +193,9 @@ radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan) } static void -ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr) +ac_llvm_finalize_module(struct radv_shader_context *ctx, struct ac_midend_optimizer *meo) { - LLVMRunPassManager(passmgr, ctx->ac.module); - + ac_llvm_optimize_module(meo, ctx->ac.module); ac_llvm_context_dispose(&ctx->ac); } @@ -390,7 +389,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir fprintf(stderr, "\n"); } - ac_llvm_finalize_module(&ctx, ac_llvm->passmgr); + ac_llvm_finalize_module(&ctx, ac_llvm->meo); free(name); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 1342d0e6425..84275bf1d0d 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -236,7 +236,7 @@ void si_llvm_optimize_module(struct si_shader_context *ctx) ac_dump_module(ctx->ac.module); /* Run the pass */ - LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module); + ac_llvm_optimize_module(ctx->compiler->meo, ctx->ac.module); } void si_llvm_dispose(struct si_shader_context *ctx)