From a1c2b96cd161edd7500aa7e74b3f3d5f797e79fd Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Mon, 15 Dec 2025 18:15:38 +0100 Subject: [PATCH] vulkan/radix_sort: Add support for 96-bit keys 64-bit morton codes are required for decent lbvh tlas builds since the scene bounds are usually much bigger than the area that is actually important. The changes were done without understanding the code but they seem to work. Part-of: --- src/vulkan/runtime/radix_sort/meson.build | 1 + .../runtime/radix_sort/radix_sort_u64.c | 16 +-- .../runtime/radix_sort/radix_sort_u96.c | 69 ++++++++++++ .../runtime/radix_sort/radix_sort_u96.h | 24 ++++ src/vulkan/runtime/radix_sort/radix_sort_vk.c | 12 ++ src/vulkan/runtime/radix_sort/radix_sort_vk.h | 2 +- .../runtime/radix_sort/shaders/histogram.comp | 22 +++- .../runtime/radix_sort/shaders/meson.build | 103 +++++++++++++++--- .../runtime/radix_sort/shaders/scatter.glsl | 5 +- .../radix_sort/shaders/scatter_2_even.comp | 36 ++++++ .../radix_sort/shaders/scatter_2_odd.comp | 36 ++++++ 11 files changed, 300 insertions(+), 26 deletions(-) create mode 100644 src/vulkan/runtime/radix_sort/radix_sort_u96.c create mode 100644 src/vulkan/runtime/radix_sort/radix_sort_u96.h create mode 100644 src/vulkan/runtime/radix_sort/shaders/scatter_2_even.comp create mode 100644 src/vulkan/runtime/radix_sort/shaders/scatter_2_odd.comp diff --git a/src/vulkan/runtime/radix_sort/meson.build b/src/vulkan/runtime/radix_sort/meson.build index 138c0c9369a..213fce8e1c5 100644 --- a/src/vulkan/runtime/radix_sort/meson.build +++ b/src/vulkan/runtime/radix_sort/meson.build @@ -29,6 +29,7 @@ radix_sort_files = files( 'shaders/push.h', 'radix_sort_u64.c', 'radix_sort_u64.h', + 'radix_sort_u96.c', 'radix_sort_vk_devaddr.h', 'radix_sort_vk_ext.h', 'radix_sort_vk.c', diff --git a/src/vulkan/runtime/radix_sort/radix_sort_u64.c b/src/vulkan/runtime/radix_sort/radix_sort_u64.c index 0d5f9217656..393046eda03 100644 --- a/src/vulkan/runtime/radix_sort/radix_sort_u64.c +++ b/src/vulkan/runtime/radix_sort/radix_sort_u64.c @@ -7,35 +7,35 @@ #include static const uint32_t init_spv[] = { -#include "radix_sort/shaders/init.comp.spv.h" +#include "radix_sort/shaders/u64_init.spv.h" }; static const uint32_t fill_spv[] = { -#include "radix_sort/shaders/fill.comp.spv.h" +#include "radix_sort/shaders/fill.spv.h" }; static const uint32_t histogram_spv[] = { -#include "radix_sort/shaders/histogram.comp.spv.h" +#include "radix_sort/shaders/u64_histogram.spv.h" }; static const uint32_t prefix_spv[] = { -#include "radix_sort/shaders/prefix.comp.spv.h" +#include "radix_sort/shaders/u64_prefix.spv.h" }; static const uint32_t scatter_0_even_spv[] = { -#include "radix_sort/shaders/scatter_0_even.comp.spv.h" +#include "radix_sort/shaders/u64_scatter_0_even.spv.h" }; static const uint32_t scatter_0_odd_spv[] = { -#include "radix_sort/shaders/scatter_0_odd.comp.spv.h" +#include "radix_sort/shaders/u64_scatter_0_odd.spv.h" }; static const uint32_t scatter_1_even_spv[] = { -#include "radix_sort/shaders/scatter_1_even.comp.spv.h" +#include "radix_sort/shaders/u64_scatter_1_even.spv.h" }; static const uint32_t scatter_1_odd_spv[] = { -#include "radix_sort/shaders/scatter_1_odd.comp.spv.h" +#include "radix_sort/shaders/u64_scatter_1_odd.spv.h" }; diff --git a/src/vulkan/runtime/radix_sort/radix_sort_u96.c b/src/vulkan/runtime/radix_sort/radix_sort_u96.c new file mode 100644 index 00000000000..8b0ad6f722e --- /dev/null +++ b/src/vulkan/runtime/radix_sort/radix_sort_u96.c @@ -0,0 +1,69 @@ +/* + * Copyright © 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#include "radix_sort_u96.h" +#include + +static const uint32_t init_spv[] = { +#include "radix_sort/shaders/u96_init.spv.h" +}; + +static const uint32_t fill_spv[] = { +#include "radix_sort/shaders/fill.spv.h" +}; + +static const uint32_t histogram_spv[] = { +#include "radix_sort/shaders/u96_histogram.spv.h" +}; + +static const uint32_t prefix_spv[] = { +#include "radix_sort/shaders/u96_prefix.spv.h" +}; + +static const uint32_t scatter_0_even_spv[] = { +#include "radix_sort/shaders/u96_scatter_0_even.spv.h" +}; + +static const uint32_t scatter_0_odd_spv[] = { +#include "radix_sort/shaders/u96_scatter_0_odd.spv.h" +}; + +static const uint32_t scatter_1_even_spv[] = { +#include "radix_sort/shaders/u96_scatter_1_even.spv.h" +}; + +static const uint32_t scatter_1_odd_spv[] = { +#include "radix_sort/shaders/u96_scatter_1_odd.spv.h" +}; + +static const uint32_t scatter_2_even_spv[] = { +#include "radix_sort/shaders/u96_scatter_2_even.spv.h" +}; + +static const uint32_t scatter_2_odd_spv[] = { +#include "radix_sort/shaders/u96_scatter_2_odd.spv.h" +}; + + +radix_sort_vk_t * +vk_create_radix_sort_u96(VkDevice device, VkAllocationCallbacks const *ac, + VkPipelineCache pc, + struct radix_sort_vk_target_config config) +{ + assert(config.keyval_dwords == 3); + + const uint32_t *spv[10] = { + init_spv, fill_spv, histogram_spv, prefix_spv, + scatter_0_even_spv, scatter_0_odd_spv, scatter_1_even_spv, scatter_1_odd_spv, + scatter_2_even_spv, scatter_2_odd_spv, + }; + const uint32_t spv_sizes[10] = { + sizeof(init_spv), sizeof(fill_spv), sizeof(histogram_spv), sizeof(prefix_spv), + sizeof(scatter_0_even_spv), sizeof(scatter_0_odd_spv), sizeof(scatter_1_even_spv), sizeof(scatter_1_odd_spv), + sizeof(scatter_2_even_spv), sizeof(scatter_2_odd_spv), + }; + return radix_sort_vk_create(device, ac, pc, spv, spv_sizes, config); +} + diff --git a/src/vulkan/runtime/radix_sort/radix_sort_u96.h b/src/vulkan/runtime/radix_sort/radix_sort_u96.h new file mode 100644 index 00000000000..b8152f41272 --- /dev/null +++ b/src/vulkan/runtime/radix_sort/radix_sort_u96.h @@ -0,0 +1,24 @@ +/* + * Copyright © 2024 Valve Corporation + * SPDX-License-Identifier: MIT + */ + +#ifndef VK_RADIX_SORT_U96 +#define VK_RADIX_SORT_U96 + +#include "radix_sort_vk.h" + +#ifdef __cplusplus +extern "C" { +#endif + +radix_sort_vk_t * +vk_create_radix_sort_u96(VkDevice device, VkAllocationCallbacks const *ac, + VkPipelineCache pc, + struct radix_sort_vk_target_config config); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/vulkan/runtime/radix_sort/radix_sort_vk.c b/src/vulkan/runtime/radix_sort/radix_sort_vk.c index 31efd3d4a75..a0cca6872de 100644 --- a/src/vulkan/runtime/radix_sort/radix_sort_vk.c +++ b/src/vulkan/runtime/radix_sort/radix_sort_vk.c @@ -277,6 +277,14 @@ radix_sort_vk_create(VkDevice _device, { .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, // .offset = 0, .size = sizeof(struct rs_push_scatter) }, // scatter_1_odd + + { .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, // + .offset = 0, + .size = sizeof(struct rs_push_scatter) }, // scatter_2_even + + { .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, // + .offset = 0, + .size = sizeof(struct rs_push_scatter) }, // scatter_2_odd }; uint32_t spec_constants[] = { @@ -378,6 +386,8 @@ radix_sort_vk_create(VkDevice _device, RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[0].odd RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[1].even RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[1].odd + RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[2].even + RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[2].odd }; // @@ -409,6 +419,8 @@ radix_sort_vk_create(VkDevice _device, RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(5), // scatter[0].odd RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(6), // scatter[1].even RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(7), // scatter[1].odd + RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(8), // scatter[2].even + RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(9), // scatter[2].odd }; // diff --git a/src/vulkan/runtime/radix_sort/radix_sort_vk.h b/src/vulkan/runtime/radix_sort/radix_sort_vk.h index f52020880a8..4b8813fb639 100644 --- a/src/vulkan/runtime/radix_sort/radix_sort_vk.h +++ b/src/vulkan/runtime/radix_sort/radix_sort_vk.h @@ -71,7 +71,7 @@ typedef struct radix_sort_vk_target radix_sort_vk_target_t; // NOTE: The library currently supports uint32_t and uint64_t keyvals. // -#define RS_KV_DWORDS_MAX 2 +#define RS_KV_DWORDS_MAX 3 // // diff --git a/src/vulkan/runtime/radix_sort/shaders/histogram.comp b/src/vulkan/runtime/radix_sort/shaders/histogram.comp index 54ce51711bf..bf6b50ff756 100644 --- a/src/vulkan/runtime/radix_sort/shaders/histogram.comp +++ b/src/vulkan/runtime/radix_sort/shaders/histogram.comp @@ -12,6 +12,7 @@ #extension GL_GOOGLE_include_directive : require #extension GL_EXT_control_flow_attributes : require #extension GL_KHR_shader_subgroup_basic : require +#extension GL_EXT_scalar_block_layout : require // clang-format on // @@ -80,6 +81,8 @@ layout(push_constant) uniform block_push #define RS_KEYVAL_TYPE uint32_t #elif (RS_KEYVAL_DWORDS == 2) #define RS_KEYVAL_TYPE u32vec2 +#elif (RS_KEYVAL_DWORDS == 3) +#define RS_KEYVAL_TYPE u32vec3 #else #error "Unsupported RS_KEYVAL_DWORDS" #endif @@ -154,7 +157,7 @@ layout(local_size_x_id = RS_HISTOGRAM_WORKGROUP_SIZE_ID) in; // // // -layout(buffer_reference, std430) buffer buffer_rs_kv +layout(buffer_reference, scalar) buffer buffer_rs_kv { RS_KEYVAL_TYPE extent[]; }; @@ -413,8 +416,23 @@ main() RS_HISTOGRAM_PASS(1) RS_HISTOGRAM_PASS(0) +#elif (RS_KEYVAL_DWORDS == 3) + + RS_HISTOGRAM_PASS(11) + RS_HISTOGRAM_PASS(10) + RS_HISTOGRAM_PASS(9) + RS_HISTOGRAM_PASS(8) + RS_HISTOGRAM_PASS(7) + RS_HISTOGRAM_PASS(6) + RS_HISTOGRAM_PASS(5) + RS_HISTOGRAM_PASS(4) + RS_HISTOGRAM_PASS(3) + RS_HISTOGRAM_PASS(2) + RS_HISTOGRAM_PASS(1) + RS_HISTOGRAM_PASS(0) + #else -#error "Error: (RS_KEYVAL_DWORDS >= 3) not implemented." +#error "Error: (RS_KEYVAL_DWORDS >= 4) not implemented." #endif } diff --git a/src/vulkan/runtime/radix_sort/shaders/meson.build b/src/vulkan/runtime/radix_sort/shaders/meson.build index 4152735b730..4ec847cf617 100644 --- a/src/vulkan/runtime/radix_sort/shaders/meson.build +++ b/src/vulkan/runtime/radix_sort/shaders/meson.build @@ -19,14 +19,91 @@ # SOFTWARE. radix_sort_shaders = [ - 'init.comp', - 'fill.comp', - 'histogram.comp', - 'prefix.comp', - 'scatter_0_even.comp', - 'scatter_0_odd.comp', - 'scatter_1_even.comp', - 'scatter_1_odd.comp' + [ + 'init.comp', + 'u64_init', + ['-DRS_KEYVAL_DWORDS=2'], + ], + [ + 'init.comp', + 'u96_init', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'fill.comp', + 'fill', + [], + ], + [ + 'histogram.comp', + 'u64_histogram', + ['-DRS_KEYVAL_DWORDS=2'], + ], + [ + 'histogram.comp', + 'u96_histogram', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'prefix.comp', + 'u64_prefix', + ['-DRS_KEYVAL_DWORDS=2'], + ], + [ + 'prefix.comp', + 'u96_prefix', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'scatter_0_even.comp', + 'u64_scatter_0_even', + ['-DRS_KEYVAL_DWORDS=2'], + ], + [ + 'scatter_0_even.comp', + 'u96_scatter_0_even', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'scatter_0_odd.comp', + 'u64_scatter_0_odd', + ['-DRS_KEYVAL_DWORDS=2'], + ], + [ + 'scatter_0_odd.comp', + 'u96_scatter_0_odd', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'scatter_1_even.comp', + 'u64_scatter_1_even', + ['-DRS_KEYVAL_DWORDS=2'], + ], + [ + 'scatter_1_even.comp', + 'u96_scatter_1_even', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'scatter_1_odd.comp', + 'u64_scatter_1_odd', + ['-DRS_KEYVAL_DWORDS=2'], + ], + [ + 'scatter_1_odd.comp', + 'u96_scatter_1_odd', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'scatter_2_even.comp', + 'u96_scatter_2_even', + ['-DRS_KEYVAL_DWORDS=3'], + ], + [ + 'scatter_2_odd.comp', + 'u96_scatter_2_odd', + ['-DRS_KEYVAL_DWORDS=3'], + ], ] shader_include_files = files( @@ -38,16 +115,14 @@ shader_include_files = files( 'config.h', ) -defines = ['-DRS_KEYVAL_DWORDS=2'] - radix_sort_spv = [] foreach s : radix_sort_shaders radix_sort_spv += custom_target( - s + '.spv.h', - input : s, - output : s + '.spv.h', + s[1] + '.spv.h', + input : s[0], + output : s[1] + '.spv.h', command : [ prog_glslang, '-V', '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@' - ] + defines + glslang_quiet + (with_mesa_debug ? ['-g'] : []), + ] + s[2] + glslang_quiet + (with_mesa_debug ? ['-g'] : []), depend_files: shader_include_files) endforeach diff --git a/src/vulkan/runtime/radix_sort/shaders/scatter.glsl b/src/vulkan/runtime/radix_sort/shaders/scatter.glsl index 509647f940f..76c7cbf419b 100644 --- a/src/vulkan/runtime/radix_sort/shaders/scatter.glsl +++ b/src/vulkan/runtime/radix_sort/shaders/scatter.glsl @@ -13,6 +13,7 @@ #extension GL_KHR_shader_subgroup_arithmetic : require #extension GL_KHR_memory_scope_semantics : require #extension GL_KHR_shader_subgroup_ballot : require +#extension GL_EXT_scalar_block_layout : require // clang-format on // @@ -140,6 +141,8 @@ layout(push_constant) uniform block_push #define RS_KEYVAL_TYPE uint32_t #elif (RS_KEYVAL_DWORDS == 2) #define RS_KEYVAL_TYPE u32vec2 +#elif (RS_KEYVAL_DWORDS == 3) +#define RS_KEYVAL_TYPE u32vec3 #else #error "Error: Unsupported RS_KEYVAL_DWORDS" #endif @@ -272,7 +275,7 @@ layout(local_size_x_id = RS_SCATTER_WORKGROUP_SIZE_ID) in; // // // -layout(buffer_reference, std430) buffer buffer_rs_kv +layout(buffer_reference, scalar) buffer buffer_rs_kv { RS_KEYVAL_TYPE extent[]; }; diff --git a/src/vulkan/runtime/radix_sort/shaders/scatter_2_even.comp b/src/vulkan/runtime/radix_sort/shaders/scatter_2_even.comp new file mode 100644 index 00000000000..52d57786c17 --- /dev/null +++ b/src/vulkan/runtime/radix_sort/shaders/scatter_2_even.comp @@ -0,0 +1,36 @@ +// Copyright 2021 The Fuchsia Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#version 460 + +// +// +// + +// clang-format off +#define RS_SCATTER_KEYVAL_DWORD_BASE 2 + +#define RS_PARTITION_STATUS_INVALID RS_PARTITION_STATUS_EVEN_INVALID +#define RS_PARTITION_STATUS_REDUCTION RS_PARTITION_STATUS_EVEN_REDUCTION +#define RS_PARTITION_STATUS_PREFIX RS_PARTITION_STATUS_EVEN_PREFIX + +#define RS_DEVADDR_KEYVALS_IN(push_) push_.devaddr_keyvals_even +#define RS_DEVADDR_KEYVALS_OUT(push_) push_.devaddr_keyvals_odd +// clang-format on + +// +// +// + +#extension GL_GOOGLE_include_directive : require + +// +// +// + +#include "scatter.glsl" + +// +// +// diff --git a/src/vulkan/runtime/radix_sort/shaders/scatter_2_odd.comp b/src/vulkan/runtime/radix_sort/shaders/scatter_2_odd.comp new file mode 100644 index 00000000000..57806bfce3d --- /dev/null +++ b/src/vulkan/runtime/radix_sort/shaders/scatter_2_odd.comp @@ -0,0 +1,36 @@ +// Copyright 2021 The Fuchsia Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#version 460 + +// +// +// + +// clang-format off +#define RS_SCATTER_KEYVAL_DWORD_BASE 2 + +#define RS_PARTITION_STATUS_INVALID RS_PARTITION_STATUS_ODD_INVALID +#define RS_PARTITION_STATUS_REDUCTION RS_PARTITION_STATUS_ODD_REDUCTION +#define RS_PARTITION_STATUS_PREFIX RS_PARTITION_STATUS_ODD_PREFIX + +#define RS_DEVADDR_KEYVALS_IN(push_) push_.devaddr_keyvals_odd +#define RS_DEVADDR_KEYVALS_OUT(push_) push_.devaddr_keyvals_even +// clang-format on + +// +// +// + +#extension GL_GOOGLE_include_directive : require + +// +// +// + +#include "scatter.glsl" + +// +// +//