radv/rt: only use one user SGPR for the traversal shader addr

All shaders are allocated in the 32-bit addr space. To avoid an issue
with alignment, and also for future work, there is an unused user SGPR.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37133>
This commit is contained in:
Samuel Pitoiset 2025-04-29 11:12:15 +02:00 committed by Marge Bot
parent 17647e8eeb
commit decf9af472
4 changed files with 14 additions and 7 deletions

View file

@ -33,7 +33,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
* Indirect descriptor sets: s[2]
* Push constants pointer: s[3]
* SBT descriptors: s[4-5]
* Traversal shader address: s[6-7]
* Traversal shader address: s[6]
* Unused (for future work): s[7]
* Ray launch size address: s[8-9]
* Dynamic callable stack base: s[10]
* Workgroup IDs (xyz): s[11], s[12], s[13]
@ -70,7 +71,8 @@ select_rt_prolog(Program* program, ac_shader_config* config,
* Indirect descriptor sets: s[2]
* Push constants pointer: s[3]
* SBT descriptors: s[4-5]
* Traversal shader address: s[6-7]
* Traversal shader address: s[6]
* Unused (for future work): s[7]
* Ray launch sizes (xyz): s[8], s[9], s[10]
* Scratch offset (<GFX9 only): s[11]
* Ring offsets (<GFX9 only): s[12-13]

View file

@ -1962,6 +1962,8 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
bool monolithic, bool has_position_fetch,
const struct radv_ray_tracing_stage_info *traversal_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
const VkPipelineCreateFlagBits2 create_flags = vk_rt_pipeline_create_flags(pCreateInfo);
@ -2013,7 +2015,8 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
}
nir_def *traversal_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.traversal_shader_addr);
nir_store_var(&b, vars.traversal_addr, nir_pack_64_2x32(&b, traversal_addr), 1);
nir_store_var(&b, vars.traversal_addr,
nir_pack_64_2x32_split(&b, traversal_addr, nir_imm_int(&b, pdev->info.address32_hi)), 1);
nir_def *shader_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.shader_addr);
shader_addr = nir_pack_64_2x32(&b, shader_addr);

View file

@ -7974,9 +7974,9 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_compu
radeon_begin(cs);
if (pdev->info.gfx_level >= GFX12) {
gfx12_push_64bit_pointer(traversal_shader_addr_offset, traversal_va);
gfx12_push_32bit_pointer(traversal_shader_addr_offset, traversal_va, &pdev->info);
} else {
radeon_emit_64bit_pointer(traversal_shader_addr_offset, traversal_va);
radeon_emit_32bit_pointer(traversal_shader_addr_offset, traversal_va, &pdev->info);
}
radeon_end();
}

View file

@ -322,7 +322,8 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->descriptor_sets[0], AC_UD_INDIRECT_DESCRIPTOR_SETS);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.push_constants);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_ADDR, NULL); /* unused */
for (uint32_t i = 0; i < ARRAY_SIZE(args->ac.rt.launch_sizes); i++)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, &args->ac.rt.launch_sizes[i]);
@ -588,7 +589,8 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
if (info->type == RADV_SHADER_TYPE_RT_PROLOG) {
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.sbt_descriptors, AC_UD_CS_SBT_DESCRIPTORS);
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, &args->ac.rt.traversal_shader_addr, AC_UD_CS_TRAVERSAL_SHADER_ADDR);
add_ud_arg(args, 1, AC_ARG_CONST_ADDR, NULL, AC_UD_PS_STATE); /* unused */
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.rt.launch_size_addr, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
add_ud_arg(args, 1, AC_ARG_VALUE, &args->ac.rt.dynamic_callable_stack_base,
AC_UD_CS_RAY_DYNAMIC_CALLABLE_STACK_BASE);