From 976efbe12a0aa1732feb4a36ead38bb61dc01347 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Sat, 14 Mar 2026 15:59:58 +0100 Subject: [PATCH] radv/rt: Fix shared ray query stack on top of application LDS Since the stack pointer may wrap around the stack size in overflow cases, traversal logic calculates the real stack pointer with nir_umod_imm(b, stack, args->stack_entries * args->stack_stride). For ray queries, "stack" was initialized to "stack_base + local_invocation_idx * 4". This was completely broken, as the umod would later delete the stack base completely and overwrite the start of LDS, which belongs to the apps' shared memory. Instead, add the stack base as a constant offset in the load/store_stack callback. (This should also save 1 VALU per ray query) Also, delete radv_ray_traversal_args::stack_base since it's unused now. Cc: mesa-stable (cherry picked from commit b046eaf36da78631aa4207b6c05f69e6c17a248e) Part-of: --- .pick_status.json | 2 +- src/amd/vulkan/nir/radv_nir_lower_ray_queries.c | 8 ++------ src/amd/vulkan/nir/radv_nir_rt_common.c | 4 ++-- src/amd/vulkan/nir/radv_nir_rt_common.h | 3 +-- src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c | 1 - 5 files changed, 6 insertions(+), 12 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 3a66019d8b4..93630425a45 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -234,7 +234,7 @@ "description": "radv/rt: Fix shared ray query stack on top of application LDS", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 388b8d151a9..4a374246f5b 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -324,7 +324,6 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query rq_store(b, rq, trav_stack_low_watermark, addr); } else { nir_def *base_offset = nir_imul_imm(b, stack_idx, sizeof(uint32_t)); - base_offset = nir_iadd_imm(b, base_offset, vars->shared_base); rq_store(b, rq, trav_stack, base_offset); rq_store(b, rq, trav_stack_low_watermark, base_offset); } @@ -494,7 +493,7 @@ store_stack_entry(nir_builder *b, nir_def *index, nir_def *value, const struct r struct traversal_data *data = args->data; if (data->vars->shared_stack) - nir_store_shared(b, value, index, .base = 0, .align_mul = 4); + nir_store_shared(b, value, index, .base = data->vars->shared_base, .align_mul = 4); else nir_store_deref(b, nir_build_deref_array(b, rq_deref(b, data->rq, stack), index), value, 0x1); } @@ -505,7 +504,7 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal struct traversal_data *data = args->data; if (data->vars->shared_stack) - return nir_load_shared(b, 1, 32, index, .base = 0, .align_mul = 4); + return nir_load_shared(b, 1, 32, index, .base = data->vars->shared_base, .align_mul = 4); else return nir_load_deref(b, nir_build_deref_array(b, rq_deref(b, data->rq, stack), index)); } @@ -578,16 +577,13 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va args.use_bvh_stack_rtn = vars->use_bvh_stack_rtn; if (args.use_bvh_stack_rtn) { args.stack_stride = 1; - args.stack_base = 0; } else { uint32_t workgroup_size = b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2]; args.stack_stride = workgroup_size * 4; - args.stack_base = vars->shared_base; } } else { args.stack_stride = 1; - args.stack_base = 0; } rq_store(b, rq, break_flag, nir_imm_false(b)); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index b89a3a16f44..38e8503c1c3 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -878,7 +878,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc /* Early exit if we never overflowed the stack, to avoid having to backtrack to * the root for no reason. */ if (!args->use_bvh_stack_rtn) { - nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), args->stack_base + args->stack_stride)); + nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), args->stack_stride)); { nir_store_var(b, incomplete, nir_imm_false(b), 0x1); nir_jump(b, nir_jump_break); @@ -1174,7 +1174,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const /* Early exit if we never overflowed the stack, to avoid having to backtrack to * the root for no reason. */ if (!args->use_bvh_stack_rtn) { - nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), args->stack_base + args->stack_stride)); + nir_push_if(b, nir_ilt_imm(b, nir_load_deref(b, args->vars.stack), args->stack_stride)); { nir_store_var(b, incomplete, nir_imm_false(b), 0x1); nir_jump(b, nir_jump_break); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.h b/src/amd/vulkan/nir/radv_nir_rt_common.h index c2bd561e683..e48bdc07995 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.h +++ b/src/amd/vulkan/nir/radv_nir_rt_common.h @@ -135,10 +135,9 @@ struct radv_ray_traversal_args { struct radv_ray_traversal_vars vars; /* The increment/decrement used for radv_ray_traversal_vars::stack, and how many entries are - * available. stack_base is the base address of the stack. */ + * available. */ uint32_t stack_stride; uint32_t stack_entries; - uint32_t stack_base; uint32_t set_flags; uint32_t unset_flags; diff --git a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c index c7fcb4d391c..e3be9a3ce64 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c @@ -1251,7 +1251,6 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin .vars = trav_vars_args, .stack_stride = stack_stride, .stack_entries = MAX_STACK_ENTRY_COUNT, - .stack_base = 0, .ignore_cull_mask = params->ignore_cull_mask, .set_flags = info ? info->set_flags : 0, .unset_flags = info ? info->unset_flags : 0,