radeonsi: enable ACO by default

NIR+ACO is the best SSA-based shader compiler for AMD GPUs that exists.

There are many reasons why NIR+ACO is better than LLVM, and I have a long
list that I've collected over the years, but the major ones are better GPU
performance (faster GPU memory access thanks to better clauses and
scheduling, a lot less SGPR/VGPR spilling, better loop support, slightly
smaller shader binaries), 8x lower shader compile times, and smaller memory
footprint of the IR.

It also shows that NIR is a mature SSA-based shader compiler that helps
drivers generate optimized code very quickly.

And most importantly, radeonsi has slightly better Viewperf performance
with NIR+ACO than LLVM, and that's difficult to ignore.

Reviewed-by: Qiang Yu <yuq825@gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38070>
This commit is contained in:
Marek Olšák 2025-10-27 01:19:04 -04:00 committed by Marge Bot
parent 4713df944b
commit e26c28f311
4 changed files with 4 additions and 26 deletions

View file

@ -1784,8 +1784,8 @@ RadeonSI driver environment variables
Use old-style monolithic shaders compiled on demand
``nooptvariant``
Disable compiling optimized shader variants.
``useaco``
Use ACO as shader compiler when possible
``usellvm``
Use LLVM as shader compiler when possible
``nowc``
Disable GTT write combining
``check_vm``

View file

@ -27,8 +27,6 @@ spec@!opengl 1.0@rasterpos@glsl_vs_tes_linked,Fail
# This is caused by lowering mediump before linking:
spec@glsl-es-1.00@linker@glsl-mismatched-uniform-precision-unused,Fail
KHR-GL46.shaders.uniform_block.random.nested_structs_instance_arrays.0,Fail
# See Khronos issue 5587: the test expects one-dimensional (array) texture to work while
# it's explicitely marked as non-supported by EXT_sparse_texture2.
KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r11f_g11f_b10f,Fail
@ -427,12 +425,3 @@ KHR-GL46.transform_feedback_overflow_query_ARB.multiple-streams-one-buffer-per-s
KHR-GL46.texture_query_lod.sampler1D_test,Fail
KHR-GL46.texture_query_lod.sampler2D_test,Fail
KHR-GL46.texture_query_lod.sampler3D_test,Fail
# escts failures (pass with ACO)
KHR-GLES3.shaders.uniform_block.random.nested_structs_instance_arrays.0,Fail
KHR-GLES31.core.shader_image_load_store.basic-allFormats-store-fs,Fail
# deqp failures (pass with ACO)
dEQP-GLES3.functional.ubo.random.all_per_block_buffers.29,Fail
dEQP-GLES3.functional.ubo.random.basic_instance_arrays.22,Fail
dEQP-GLES3.functional.ubo.random.nested_structs.4,Fail

1 # LLVM 20-dev (da439d3af47b)
27 spec@glsl-es-1.00@linker@glsl-mismatched-uniform-precision-unused,Fail
28 KHR-GL46.shaders.uniform_block.random.nested_structs_instance_arrays.0,Fail # See Khronos issue 5587: the test expects one-dimensional (array) texture to work while
29 # See Khronos issue 5587: the test expects one-dimensional (array) texture to work while # it's explicitely marked as non-supported by EXT_sparse_texture2.
# it's explicitely marked as non-supported by EXT_sparse_texture2.
KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r11f_g11f_b10f,Fail
30 KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r16,Fail KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r11f_g11f_b10f,Fail
31 KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r16_snorm,Fail KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r16,Fail
32 KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r16f,Fail KHR-GL46.sparse_texture2_tests.StandardPageSizesTestCase_texture_1d_array_r16_snorm,Fail
425
426
427
dEQP-GLES3.functional.ubo.random.all_per_block_buffers.29,Fail
dEQP-GLES3.functional.ubo.random.basic_instance_arrays.22,Fail
dEQP-GLES3.functional.ubo.random.nested_structs.4,Fail

View file

@ -121,7 +121,6 @@ static const struct debug_named_value radeonsi_shader_debug_options[] = {
{"checkir", DBG(CHECK_IR), "Enable additional sanity checks on shader IR"},
{"mono", DBG(MONOLITHIC_SHADERS), "Use old-style monolithic shaders compiled on demand"},
{"nooptvariant", DBG(NO_OPT_VARIANT), "Disable compiling optimized shader variants."},
{"useaco", DBG(USE_ACO), "Use ACO as shader compiler when possible"},
{"usellvm", DBG(USE_LLVM), "Use LLVM as shader compiler when possible"},
DEBUG_NAMED_VALUE_END /* must be last */
@ -1327,17 +1326,8 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
bool support_aco = aco_is_gpu_supported(&sscreen->info);
#if AMD_LLVM_AVAILABLE
/* For GFX11.5, LLVM < 19 is missing a workaround that can cause GPU hangs. ACO is the only
* alternative that has the workaround and is always available. Same for GFX12.
*/
if ((sscreen->info.gfx_level == GFX12 && LLVM_VERSION_MAJOR < 20) ||
(sscreen->info.gfx_level == GFX11_5 && LLVM_VERSION_MAJOR < 19))
sscreen->use_aco = true;
else if (sscreen->info.gfx_level >= GFX10)
sscreen->use_aco = (sscreen->shader_debug_flags & DBG(USE_ACO));
else
sscreen->use_aco = support_aco && sscreen->info.has_image_opcodes &&
!(sscreen->shader_debug_flags & DBG(USE_LLVM));
sscreen->use_aco = support_aco && sscreen->info.has_image_opcodes &&
!(sscreen->shader_debug_flags & DBG(USE_LLVM));
#else
sscreen->use_aco = true;
#endif

View file

@ -257,7 +257,6 @@ enum
DBG_MONOLITHIC_SHADERS,
DBG_NO_OPT_VARIANT,
DBG_USE_ACO,
DBG_USE_LLVM,
};