From 0b4e497c7bf1b33284dab70aec39307dcfee2560 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Fri, 13 Mar 2026 16:48:50 +0100 Subject: [PATCH] radv/rt: Bump ray query stack base limit for GFX12 GFX12 encoding added one bit to the stack offset, doubling the limit on the stack base offset that is possible to encode. In practice, this always allows using bvh_stack_push* instructions on GFX12 since LDS is still 64kB. Cc: mesa-stable Fixes: 59a39779 (radv/rt: Only use ds_bvh_stack_rtn if the stack base is possible to encode) (cherry picked from commit 867d0b33b3292ee2d8499b1f0734f5005600ddd6) Part-of: --- .pick_status.json | 2 +- src/amd/vulkan/nir/radv_nir_lower_ray_queries.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index d748ae69f2b..748921b16f9 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -664,7 +664,7 @@ "description": "radv/rt: Bump ray query stack base limit for GFX12", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "59a397793ea204df8a2ecfd89c7b6d96412a7f6d", "notes": null diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index aabe1f2b783..388b8d151a9 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -172,11 +172,14 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT; } else { if (radv_use_bvh_stack_rtn(pdev)) { - /* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords. */ + /* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords, or 16383 dwords on + * gfx12+. + */ uint32_t num_wave32_groups = workgroup_size / 32; uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries; uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base; - dst->use_bvh_stack_rtn = max_stack_base < 8192; + uint32_t max_hw_stack_base = pdev->info.gfx_level >= GFX12 ? 16384 : 8192; + dst->use_bvh_stack_rtn = max_stack_base < max_hw_stack_base; } dst->shared_stack = true; dst->shared_base = shared_offset;