tu: Support acceleration_structure for wave64

Gen8 replaces wave128 with double dispatch wave64, and so will need
smaller subgroup sizes.

Signed-off-by: Rob Clark <rob.clark@oss.qualcomm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39167>
This commit is contained in:
Rob Clark 2026-01-12 06:34:57 -08:00 committed by Marge Bot
parent 380c79c923
commit 039e21fde8

View file

@ -352,7 +352,7 @@ const struct vk_acceleration_structure_build_ops tu_as_build_ops = {
.encode_as = { encode, header },
};
struct radix_sort_vk_target_config tu_radix_sort_config = {
const struct radix_sort_vk_target_config tu_radix_sort_config_128 = {
.keyval_dwords = 2,
.init = { .workgroup_size_log2 = 8, },
.fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
@ -373,16 +373,41 @@ struct radix_sort_vk_target_config tu_radix_sort_config = {
.nonsequential_dispatch = false,
};
const struct radix_sort_vk_target_config tu_radix_sort_config_64 = {
.keyval_dwords = 2,
.init = { .workgroup_size_log2 = 8, },
.fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
.histogram = {
.workgroup_size_log2 = 8,
.subgroup_size_log2 = 6,
.block_rows = 14, /* TODO tune this */
},
.prefix = {
.workgroup_size_log2 = 8,
.subgroup_size_log2 = 6,
},
.scatter = {
.workgroup_size_log2 = 8,
.subgroup_size_log2 = 6,
.block_rows = 14, /* TODO tune this */
},
.nonsequential_dispatch = false,
};
static VkResult
init_radix_sort(struct tu_device *device)
{
if (!device->radix_sort) {
mtx_lock(&device->radix_sort_mutex);
if (!device->radix_sort) {
const struct radix_sort_vk_target_config *cfg =
device->physical_device->info->props.supports_double_threadsize ?
&tu_radix_sort_config_128 :
&tu_radix_sort_config_64;
device->radix_sort =
vk_create_radix_sort_u64(tu_device_to_handle(device),
&device->vk.alloc,
VK_NULL_HANDLE, tu_radix_sort_config);
VK_NULL_HANDLE, *cfg);
if (!device->radix_sort) {
/* TODO plumb through the error here */
mtx_unlock(&device->radix_sort_mutex);
@ -439,7 +464,7 @@ tu_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t inf
tu_save_compute_state(cmd, &state);
struct vk_acceleration_structure_build_args args = {
.subgroup_size = 128,
.subgroup_size = device->physical_device->info->props.supports_double_threadsize ? 128 : 64,
.bvh_bounds_offset = offsetof(tu_accel_struct_header, aabb),
.emit_markers = false,
.radix_sort = device->radix_sort,
@ -593,7 +618,7 @@ tu_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructu
init_radix_sort(device);
struct vk_acceleration_structure_build_args args = {
.subgroup_size = 128,
.subgroup_size = device->physical_device->info->props.supports_double_threadsize ? 128 : 64,
.radix_sort = device->radix_sort,
};