From af746cc2a6498fd7acbbe8fc3b632d6f58d7ac86 Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Fri, 19 Dec 2025 22:02:50 +0100 Subject: [PATCH] radv/rt: Use 64-bit keys for gfx11- This has a bit of sorting overhead, but can significantly increase BVH quality especially in big BVHs. gfx12 is faster at intersecting, so only enable for gfx11 and earlier right now. Part-of: --- src/amd/vulkan/radv_acceleration_structure.c | 24 +++++++++++++++++++- src/amd/vulkan/radv_device.h | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/amd/vulkan/radv_acceleration_structure.c b/src/amd/vulkan/radv_acceleration_structure.c index d572aa47717..7b9b19c5407 100644 --- a/src/amd/vulkan/radv_acceleration_structure.c +++ b/src/amd/vulkan/radv_acceleration_structure.c @@ -10,6 +10,7 @@ #include "radix_sort/common/vk/barrier.h" #include "radix_sort/radix_sort_u64.h" +#include "radix_sort/radix_sort_u96.h" #include "bvh/build_interface.h" #include "bvh/bvh.h" @@ -235,6 +236,8 @@ radv_device_finish_accel_struct_build_state(struct radv_device *device) if (state->accel_struct_build.radix_sort_64) radix_sort_vk_destroy(state->accel_struct_build.radix_sort_64, _device, &state->alloc); + if (state->accel_struct_build.radix_sort_96) + radix_sort_vk_destroy(state->accel_struct_build.radix_sort_96, _device, &state->alloc); } static VkDeviceSize @@ -282,8 +285,10 @@ radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_s VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info); - if (state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR) + if (state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR) { state->config.internal_type = VK_INTERNAL_BUILD_TYPE_HPLOC; + state->config.u64_keys = pdev->info.gfx_level < GFX12; + } uint32_t encode_key = 0; if (radv_use_bvh8(pdev)) { @@ -945,6 +950,20 @@ static const struct radix_sort_vk_target_config radix_sort_64_config = { .scatter.block_rows = 14, }; +static const struct radix_sort_vk_target_config radix_sort_96_config = { + .keyval_dwords = 3, + .fill.workgroup_size_log2 = 7, + .fill.block_rows = 8, + .histogram.workgroup_size_log2 = 8, + .histogram.subgroup_size_log2 = 6, + .histogram.block_rows = 14, + .prefix.workgroup_size_log2 = 8, + .prefix.subgroup_size_log2 = 6, + .scatter.workgroup_size_log2 = 8, + .scatter.subgroup_size_log2 = 6, + .scatter.block_rows = 14, +}; + static void radv_write_buffer_cp(VkCommandBuffer commandBuffer, VkDeviceAddress addr, void *data, uint32_t size) { @@ -989,6 +1008,8 @@ radv_device_init_accel_struct_build_state(struct radv_device *device) device->meta_state.accel_struct_build.radix_sort_64 = vk_create_radix_sort_u64( radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache, radix_sort_64_config); + device->meta_state.accel_struct_build.radix_sort_96 = vk_create_radix_sort_u96( + radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache, radix_sort_96_config); device->meta_state.accel_struct_build.build_ops = (struct vk_acceleration_structure_build_ops){ .begin_debug_marker = vk_accel_struct_cmd_begin_debug_marker, @@ -1044,6 +1065,7 @@ radv_device_init_accel_struct_build_state(struct radv_device *device) build_args->propagate_cull_flags = pdev->info.gfx_level >= GFX11; build_args->emit_markers = device->sqtt.bo; build_args->radix_sort_64 = device->meta_state.accel_struct_build.radix_sort_64; + build_args->radix_sort_96 = device->meta_state.accel_struct_build.radix_sort_96; exit: mtx_unlock(&device->meta_state.mtx); diff --git a/src/amd/vulkan/radv_device.h b/src/amd/vulkan/radv_device.h index 19700cdf002..9d6451e3071 100644 --- a/src/amd/vulkan/radv_device.h +++ b/src/amd/vulkan/radv_device.h @@ -82,6 +82,7 @@ struct radv_meta_state { struct { struct radix_sort_vk *radix_sort_64; + struct radix_sort_vk *radix_sort_96; struct vk_acceleration_structure_build_ops build_ops; struct vk_acceleration_structure_build_args build_args; } accel_struct_build;