From decf9af4726d93fee7308e5320eb0c77e4bab54f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 29 Apr 2025 11:12:15 +0200 Subject: [PATCH] radv/rt: only use one user SGPR for the traversal shader addr All shaders are allocated in the 32-bit addr space. To avoid an issue with alignment, and also for future work, there is an unused user SGPR. Signed-off-by: Samuel Pitoiset Part-of: --- .../compiler/instruction_selection/aco_select_rt_prolog.cpp | 6 ++++-- src/amd/vulkan/nir/radv_nir_rt_shader.c | 5 ++++- src/amd/vulkan/radv_cmd_buffer.c | 4 ++-- src/amd/vulkan/radv_shader_args.c | 6 ++++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp b/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp index 55d0f4c45a4..4d747ad6df0 100644 --- a/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp +++ b/src/amd/compiler/instruction_selection/aco_select_rt_prolog.cpp @@ -33,7 +33,8 @@ select_rt_prolog(Program* program, ac_shader_config* config, * Indirect descriptor sets: s[2] * Push constants pointer: s[3] * SBT descriptors: s[4-5] - * Traversal shader address: s[6-7] + * Traversal shader address: s[6] + * Unused (for future work): s[7] * Ray launch size address: s[8-9] * Dynamic callable stack base: s[10] * Workgroup IDs (xyz): s[11], s[12], s[13] @@ -70,7 +71,8 @@ select_rt_prolog(Program* program, ac_shader_config* config, * Indirect descriptor sets: s[2] * Push constants pointer: s[3] * SBT descriptors: s[4-5] - * Traversal shader address: s[6-7] + * Traversal shader address: s[6] + * Unused (for future work): s[7] * Ray launch sizes (xyz): s[8], s[9], s[10] * Scratch offset (ac, args->ac.rt.traversal_shader_addr); - nir_store_var(&b, vars.traversal_addr, nir_pack_64_2x32(&b, traversal_addr), 1); + nir_store_var(&b, vars.traversal_addr, + nir_pack_64_2x32_split(&b, traversal_addr, nir_imm_int(&b, pdev->info.address32_hi)), 1); nir_def *shader_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_addr); shader_addr = nir_pack_64_2x32(&b, shader_addr); diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 0dfa7a20c75..ef1be68eede 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -7974,9 +7974,9 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu radeon_begin(cs); if (pdev->info.gfx_level >= GFX12) { - gfx12_push_64bit_pointer(traversal_shader_addr_offset, traversal_va); + gfx12_push_32bit_pointer(traversal_shader_addr_offset, traversal_va, &pdev->info); } else { - radeon_emit_64bit_pointer(traversal_shader_addr_offset, traversal_va); + radeon_emit_32bit_pointer(traversal_shader_addr_offset, traversal_va, &pdev->info); } radeon_end(); } diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index cbf4db3a2e7..d9213c356c2 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -322,7 +322,8 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS); ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.push_constants); ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors); - ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr); + ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, NULL); /* unused */ for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++) ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.rt.launch_sizes[i]); @@ -588,7 +589,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics if (info->type == RADV_SHADER_TYPE_RT_PROLOG) { add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS); - add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR); + add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR); + add_ud_arg(args, 1, AC_ARG_CONST_ADDR, NULL, AC_UD_PS_STATE); /* unused */ add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR); add_ud_arg(args, 1, AC_ARG_VALUE, &args->ac.rt.dynamic_callable_stack_base, AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);