radv/nir: Correctly handle workgroup sizes not aligned to 32

Since the stride is always 32 dwords, we need to treat the workgroup
size as multiples of that value. Using MAX2() only works for cases where
the workgroup size is less than 32, which was hit by some CTS with 1x1
workgroups.

Cc: mesa-stable
(cherry picked from commit b08f9f192c)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40092>
This commit is contained in:
Natalie Vock 2026-02-19 10:56:46 +01:00 committed by Eric Engestrom
parent 54293d4fdd
commit 71145cb846
2 changed files with 3 additions and 3 deletions

View file

@ -1724,7 +1724,7 @@
"description": "radv/nir: Correctly handle workgroup sizes not aligned to 32",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"main_sha": null,
"because_sha": null,
"notes": null

View file

@ -163,7 +163,7 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8;
/* ds_bvh_stack* instructions use a fixed stride of 32 dwords. */
if (radv_use_bvh_stack_rtn(pdev))
workgroup_size = MAX2(workgroup_size, 32);
workgroup_size = align(workgroup_size, 32);
uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4;
uint32_t shared_offset = align(shader->info.shared_size, 4);
@ -173,7 +173,7 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
} else {
if (radv_use_bvh_stack_rtn(pdev)) {
/* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords. */
uint32_t num_wave32_groups = DIV_ROUND_UP(workgroup_size, 32);
uint32_t num_wave32_groups = workgroup_size / 32;
uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries;
uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base;
dst->use_bvh_stack_rtn = max_stack_base < 8192;