From 0961aba8a7cdd0ad25925403bd93b3782b82ee2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Tue, 21 Oct 2025 17:28:43 +0200 Subject: [PATCH] radeonsi: Lower larger workgroups to 256 for CS regalloc bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though radeonsi may not use compute queues, other processes might run compute jobs in the background, so radeonsi must make sure not to use larger than 256 sized workgroups on GPUs that are affected by the regalloc hang. Signed-off-by: Timur Kristóf Part-of: --- src/gallium/drivers/radeonsi/si_shader.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ac60af71462..1258dd77247 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -670,6 +670,22 @@ static void si_preprocess_nir(struct si_nir_shader_ctx *ctx) } if (mesa_shader_stage_is_compute(nir->info.stage)) { + if (sel->screen->info.has_cs_regalloc_hang_bug) { + const uint32_t wg_size = nir->info.workgroup_size[0] * + nir->info.workgroup_size[1] * + nir->info.workgroup_size[2]; + + if (wg_size > 256) { + si_nir_opts(sel->screen, nir, true); + NIR_PASS(progress, nir, nir_lower_workgroup_size, 256); + + if (progress) + si_nir_opts(sel->screen, nir, true); + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + } + } + /* gl_LocalInvocationIndex must be derived from gl_LocalInvocationID.xyz to make it correct * with quad derivatives. Using gl_SubgroupID for that (which is what we do by default) is * incorrect with a non-linear thread order.