mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-05 15:30:14 +01:00
ac,radeonsi: reduce optimizations for complex compute shaders on older APUs (v2)
To make dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 finish sooner on the older CPUs. (otherwise it gets killed and we fail the test) Acked-by: Dave Airlie <airlied@gmail.com>
This commit is contained in:
parent
c2eab33b08
commit
cb6b241c30
6 changed files with 68 additions and 13 deletions
|
|
@ -142,6 +142,7 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
|
|||
|
||||
static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options,
|
||||
LLVMCodeGenOptLevel level,
|
||||
const char **out_triple)
|
||||
{
|
||||
assert(family >= CHIP_TAHITI);
|
||||
|
|
@ -163,7 +164,7 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
|||
triple,
|
||||
ac_get_llvm_processor_name(family),
|
||||
features,
|
||||
LLVMCodeGenLevelDefault,
|
||||
level,
|
||||
LLVMRelocDefault,
|
||||
LLVMCodeModelDefault);
|
||||
|
||||
|
|
@ -308,11 +309,20 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
|
|||
const char *triple;
|
||||
memset(compiler, 0, sizeof(*compiler));
|
||||
|
||||
compiler->tm = ac_create_target_machine(family,
|
||||
tm_options, &triple);
|
||||
compiler->tm = ac_create_target_machine(family, tm_options,
|
||||
LLVMCodeGenLevelDefault,
|
||||
&triple);
|
||||
if (!compiler->tm)
|
||||
return false;
|
||||
|
||||
if (tm_options & AC_TM_CREATE_LOW_OPT) {
|
||||
compiler->low_opt_tm =
|
||||
ac_create_target_machine(family, tm_options,
|
||||
LLVMCodeGenLevelLess, NULL);
|
||||
if (!compiler->low_opt_tm)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) {
|
||||
compiler->target_library_info =
|
||||
ac_create_target_library_info(triple);
|
||||
|
|
@ -341,6 +351,8 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
|
|||
if (compiler->target_library_info)
|
||||
ac_dispose_target_library_info(compiler->target_library_info);
|
||||
#endif
|
||||
if (compiler->low_opt_tm)
|
||||
LLVMDisposeTargetMachine(compiler->low_opt_tm);
|
||||
if (compiler->tm)
|
||||
LLVMDisposeTargetMachine(compiler->tm);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ enum ac_target_machine_options {
|
|||
AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
|
||||
AC_TM_CHECK_IR = (1 << 5),
|
||||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
|
||||
AC_TM_CREATE_LOW_OPT = (1 << 7),
|
||||
};
|
||||
|
||||
enum ac_float_mode {
|
||||
|
|
@ -74,10 +75,18 @@ enum ac_float_mode {
|
|||
|
||||
/* Per-thread persistent LLVM objects. */
|
||||
struct ac_llvm_compiler {
|
||||
LLVMTargetMachineRef tm;
|
||||
LLVMTargetLibraryInfoRef target_library_info;
|
||||
LLVMPassManagerRef passmgr;
|
||||
|
||||
/* Default compiler. */
|
||||
LLVMTargetMachineRef tm;
|
||||
struct ac_compiler_passes *passes;
|
||||
|
||||
/* Optional compiler for faster compilation with fewer optimizations.
|
||||
* LLVM modules can be created with "tm" too. There is no difference.
|
||||
*/
|
||||
LLVMTargetMachineRef low_opt_tm; /* uses -O1 instead of -O2 */
|
||||
struct ac_compiler_passes *low_opt_passes;
|
||||
};
|
||||
|
||||
const char *ac_get_llvm_processor_name(enum radeon_family family);
|
||||
|
|
|
|||
|
|
@ -108,22 +108,32 @@ static const struct debug_named_value debug_options[] = {
|
|||
static void si_init_compiler(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler)
|
||||
{
|
||||
/* Only create the less-optimizing version of the compiler on APUs
|
||||
* predating Ryzen (Raven). */
|
||||
bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram &&
|
||||
sscreen->info.chip_class <= VI;
|
||||
|
||||
enum ac_target_machine_options tm_options =
|
||||
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
|
||||
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
|
||||
(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
|
||||
(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
|
||||
(!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
|
||||
(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0);
|
||||
(sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
|
||||
(create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
|
||||
|
||||
ac_init_llvm_once();
|
||||
ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
|
||||
compiler->passes = ac_create_llvm_passes(compiler->tm);
|
||||
|
||||
if (compiler->low_opt_tm)
|
||||
compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
|
||||
}
|
||||
|
||||
static void si_destroy_compiler(struct ac_llvm_compiler *compiler)
|
||||
{
|
||||
ac_destroy_llvm_passes(compiler->passes);
|
||||
ac_destroy_llvm_passes(compiler->low_opt_passes);
|
||||
ac_destroy_llvm_compiler(compiler);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5645,7 +5645,8 @@ static int si_compile_llvm(struct si_screen *sscreen,
|
|||
LLVMModuleRef mod,
|
||||
struct pipe_debug_callback *debug,
|
||||
unsigned processor,
|
||||
const char *name)
|
||||
const char *name,
|
||||
bool less_optimized)
|
||||
{
|
||||
int r = 0;
|
||||
unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
|
||||
|
|
@ -5667,7 +5668,8 @@ static int si_compile_llvm(struct si_screen *sscreen,
|
|||
}
|
||||
|
||||
if (!si_replace_shader(count, binary)) {
|
||||
r = si_llvm_compile(mod, binary, compiler, debug);
|
||||
r = si_llvm_compile(mod, binary, compiler, debug,
|
||||
less_optimized);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
|
@ -5884,7 +5886,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
&ctx.shader->config, ctx.compiler,
|
||||
ctx.ac.module,
|
||||
debug, PIPE_SHADER_GEOMETRY,
|
||||
"GS Copy Shader");
|
||||
"GS Copy Shader", false);
|
||||
if (!r) {
|
||||
if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
|
||||
fprintf(stderr, "GS Copy Shader:\n");
|
||||
|
|
@ -6790,6 +6792,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
|
|||
LLVMBuildRetVoid(builder);
|
||||
}
|
||||
|
||||
static bool si_should_optimize_less(struct ac_llvm_compiler *compiler,
|
||||
struct si_shader_selector *sel)
|
||||
{
|
||||
if (!compiler->low_opt_passes)
|
||||
return false;
|
||||
|
||||
/* Assume a slow CPU. */
|
||||
assert(!sel->screen->info.has_dedicated_vram &&
|
||||
sel->screen->info.chip_class <= VI);
|
||||
|
||||
/* For a crazy dEQP test containing 2597 memory opcodes, mostly
|
||||
* buffer stores. */
|
||||
return sel->type == PIPE_SHADER_COMPUTE &&
|
||||
sel->info.num_memory_instructions > 1000;
|
||||
}
|
||||
|
||||
int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct si_shader *shader,
|
||||
|
|
@ -7022,7 +7040,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
|||
|
||||
/* Compile to bytecode. */
|
||||
r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
|
||||
ctx.ac.module, debug, ctx.type, "TGSI shader");
|
||||
ctx.ac.module, debug, ctx.type, "TGSI shader",
|
||||
si_should_optimize_less(compiler, shader->selector));
|
||||
si_llvm_dispose(&ctx);
|
||||
if (r) {
|
||||
fprintf(stderr, "LLVM failed to compile shader\n");
|
||||
|
|
@ -7189,7 +7208,7 @@ si_get_shader_part(struct si_screen *sscreen,
|
|||
si_llvm_optimize_module(&ctx);
|
||||
|
||||
if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler,
|
||||
ctx.ac.module, debug, ctx.type, name)) {
|
||||
ctx.ac.module, debug, ctx.type, name, false)) {
|
||||
FREE(result);
|
||||
result = NULL;
|
||||
goto out;
|
||||
|
|
|
|||
|
|
@ -217,7 +217,8 @@ si_shader_context_from_abi(struct ac_shader_abi *abi)
|
|||
|
||||
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct pipe_debug_callback *debug);
|
||||
struct pipe_debug_callback *debug,
|
||||
bool less_optimized);
|
||||
|
||||
LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
|
||||
enum tgsi_opcode_type type);
|
||||
|
|
|
|||
|
|
@ -82,8 +82,12 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
|
|||
*/
|
||||
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
|
||||
struct ac_llvm_compiler *compiler,
|
||||
struct pipe_debug_callback *debug)
|
||||
struct pipe_debug_callback *debug,
|
||||
bool less_optimized)
|
||||
{
|
||||
struct ac_compiler_passes *passes =
|
||||
less_optimized && compiler->low_opt_passes ?
|
||||
compiler->low_opt_passes : compiler->passes;
|
||||
struct si_llvm_diagnostics diag;
|
||||
LLVMContextRef llvm_ctx;
|
||||
|
||||
|
|
@ -96,7 +100,7 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
|
|||
LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
|
||||
|
||||
/* Compile IR. */
|
||||
if (!ac_compile_module_to_binary(compiler->passes, M, binary))
|
||||
if (!ac_compile_module_to_binary(passes, M, binary))
|
||||
diag.retval = 1;
|
||||
|
||||
if (diag.retval != 0)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue