From 71145cb846b9be86409edde1b5970d4dea72a575 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Thu, 19 Feb 2026 10:56:46 +0100 Subject: [PATCH] radv/nir: Correctly handle workgroup sizes not aligned to 32 Since the stride is always 32 dwords, we need to treat the workgroup size as multiples of that value. Using MAX2() only works for cases where the workgroup size is less than 32, which was hit by some CTS with 1x1 workgroups. Cc: mesa-stable (cherry picked from commit b08f9f192ce881078aae442d22ec626703b4c81d) Part-of: --- .pick_status.json | 2 +- src/amd/vulkan/nir/radv_nir_lower_ray_queries.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index a526126cd76..cde736e6acc 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1724,7 +1724,7 @@ "description": "radv/nir: Correctly handle workgroup sizes not aligned to 32", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index e4080ce59e6..aabe1f2b783 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -163,7 +163,7 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8; /* ds_bvh_stack* instructions use a fixed stride of 32 dwords. */ if (radv_use_bvh_stack_rtn(pdev)) - workgroup_size = MAX2(workgroup_size, 32); + workgroup_size = align(workgroup_size, 32); uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4; uint32_t shared_offset = align(shader->info.shared_size, 4); @@ -173,7 +173,7 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray } else { if (radv_use_bvh_stack_rtn(pdev)) { /* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords. */ - uint32_t num_wave32_groups = DIV_ROUND_UP(workgroup_size, 32); + uint32_t num_wave32_groups = workgroup_size / 32; uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries; uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base; dst->use_bvh_stack_rtn = max_stack_base < 8192;