vulkan/radix_sort: Add support for 96-bit keys

64-bit morton codes are required for decent lbvh tlas builds since the
scene bounds are usually much bigger than the area that is actually
important.

The changes were done without understanding the code but they seem to
work.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41300>
This commit is contained in:
Konstantin Seurer 2025-12-15 18:15:38 +01:00 committed by Marge Bot
parent a2175b7ec3
commit a1c2b96cd1
11 changed files with 300 additions and 26 deletions

View file

@ -29,6 +29,7 @@ radix_sort_files = files(
'shaders/push.h',
'radix_sort_u64.c',
'radix_sort_u64.h',
'radix_sort_u96.c',
'radix_sort_vk_devaddr.h',
'radix_sort_vk_ext.h',
'radix_sort_vk.c',

View file

@ -7,35 +7,35 @@
#include <assert.h>
static const uint32_t init_spv[] = {
#include "radix_sort/shaders/init.comp.spv.h"
#include "radix_sort/shaders/u64_init.spv.h"
};
static const uint32_t fill_spv[] = {
#include "radix_sort/shaders/fill.comp.spv.h"
#include "radix_sort/shaders/fill.spv.h"
};
static const uint32_t histogram_spv[] = {
#include "radix_sort/shaders/histogram.comp.spv.h"
#include "radix_sort/shaders/u64_histogram.spv.h"
};
static const uint32_t prefix_spv[] = {
#include "radix_sort/shaders/prefix.comp.spv.h"
#include "radix_sort/shaders/u64_prefix.spv.h"
};
static const uint32_t scatter_0_even_spv[] = {
#include "radix_sort/shaders/scatter_0_even.comp.spv.h"
#include "radix_sort/shaders/u64_scatter_0_even.spv.h"
};
static const uint32_t scatter_0_odd_spv[] = {
#include "radix_sort/shaders/scatter_0_odd.comp.spv.h"
#include "radix_sort/shaders/u64_scatter_0_odd.spv.h"
};
static const uint32_t scatter_1_even_spv[] = {
#include "radix_sort/shaders/scatter_1_even.comp.spv.h"
#include "radix_sort/shaders/u64_scatter_1_even.spv.h"
};
static const uint32_t scatter_1_odd_spv[] = {
#include "radix_sort/shaders/scatter_1_odd.comp.spv.h"
#include "radix_sort/shaders/u64_scatter_1_odd.spv.h"
};

View file

@ -0,0 +1,69 @@
/*
* Copyright © 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#include "radix_sort_u96.h"
#include <assert.h>
static const uint32_t init_spv[] = {
#include "radix_sort/shaders/u96_init.spv.h"
};
static const uint32_t fill_spv[] = {
#include "radix_sort/shaders/fill.spv.h"
};
static const uint32_t histogram_spv[] = {
#include "radix_sort/shaders/u96_histogram.spv.h"
};
static const uint32_t prefix_spv[] = {
#include "radix_sort/shaders/u96_prefix.spv.h"
};
static const uint32_t scatter_0_even_spv[] = {
#include "radix_sort/shaders/u96_scatter_0_even.spv.h"
};
static const uint32_t scatter_0_odd_spv[] = {
#include "radix_sort/shaders/u96_scatter_0_odd.spv.h"
};
static const uint32_t scatter_1_even_spv[] = {
#include "radix_sort/shaders/u96_scatter_1_even.spv.h"
};
static const uint32_t scatter_1_odd_spv[] = {
#include "radix_sort/shaders/u96_scatter_1_odd.spv.h"
};
static const uint32_t scatter_2_even_spv[] = {
#include "radix_sort/shaders/u96_scatter_2_even.spv.h"
};
static const uint32_t scatter_2_odd_spv[] = {
#include "radix_sort/shaders/u96_scatter_2_odd.spv.h"
};
radix_sort_vk_t *
vk_create_radix_sort_u96(VkDevice device, VkAllocationCallbacks const *ac,
VkPipelineCache pc,
struct radix_sort_vk_target_config config)
{
assert(config.keyval_dwords == 3);
const uint32_t *spv[10] = {
init_spv, fill_spv, histogram_spv, prefix_spv,
scatter_0_even_spv, scatter_0_odd_spv, scatter_1_even_spv, scatter_1_odd_spv,
scatter_2_even_spv, scatter_2_odd_spv,
};
const uint32_t spv_sizes[10] = {
sizeof(init_spv), sizeof(fill_spv), sizeof(histogram_spv), sizeof(prefix_spv),
sizeof(scatter_0_even_spv), sizeof(scatter_0_odd_spv), sizeof(scatter_1_even_spv), sizeof(scatter_1_odd_spv),
sizeof(scatter_2_even_spv), sizeof(scatter_2_odd_spv),
};
return radix_sort_vk_create(device, ac, pc, spv, spv_sizes, config);
}

View file

@ -0,0 +1,24 @@
/*
* Copyright © 2024 Valve Corporation
* SPDX-License-Identifier: MIT
*/
#ifndef VK_RADIX_SORT_U96
#define VK_RADIX_SORT_U96
#include "radix_sort_vk.h"
#ifdef __cplusplus
extern "C" {
#endif
radix_sort_vk_t *
vk_create_radix_sort_u96(VkDevice device, VkAllocationCallbacks const *ac,
VkPipelineCache pc,
struct radix_sort_vk_target_config config);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -277,6 +277,14 @@ radix_sort_vk_create(VkDevice _device,
{ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, //
.offset = 0,
.size = sizeof(struct rs_push_scatter) }, // scatter_1_odd
{ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, //
.offset = 0,
.size = sizeof(struct rs_push_scatter) }, // scatter_2_even
{ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, //
.offset = 0,
.size = sizeof(struct rs_push_scatter) }, // scatter_2_odd
};
uint32_t spec_constants[] = {
@ -378,6 +386,8 @@ radix_sort_vk_create(VkDevice _device,
RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[0].odd
RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[1].even
RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[1].odd
RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[2].even
RS_SUBGROUP_SIZE_CREATE_INFO_NAME(scatter), // scatter[2].odd
};
//
@ -409,6 +419,8 @@ radix_sort_vk_create(VkDevice _device,
RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(5), // scatter[0].odd
RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(6), // scatter[1].even
RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(7), // scatter[1].odd
RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(8), // scatter[2].even
RS_COMPUTE_PIPELINE_CREATE_INFO_DECL(9), // scatter[2].odd
};
//

View file

@ -71,7 +71,7 @@ typedef struct radix_sort_vk_target radix_sort_vk_target_t;
// NOTE: The library currently supports uint32_t and uint64_t keyvals.
//
#define RS_KV_DWORDS_MAX 2
#define RS_KV_DWORDS_MAX 3
//
//

View file

@ -12,6 +12,7 @@
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_control_flow_attributes : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_EXT_scalar_block_layout : require
// clang-format on
//
@ -80,6 +81,8 @@ layout(push_constant) uniform block_push
#define RS_KEYVAL_TYPE uint32_t
#elif (RS_KEYVAL_DWORDS == 2)
#define RS_KEYVAL_TYPE u32vec2
#elif (RS_KEYVAL_DWORDS == 3)
#define RS_KEYVAL_TYPE u32vec3
#else
#error "Unsupported RS_KEYVAL_DWORDS"
#endif
@ -154,7 +157,7 @@ layout(local_size_x_id = RS_HISTOGRAM_WORKGROUP_SIZE_ID) in;
//
//
//
layout(buffer_reference, std430) buffer buffer_rs_kv
layout(buffer_reference, scalar) buffer buffer_rs_kv
{
RS_KEYVAL_TYPE extent[];
};
@ -413,8 +416,23 @@ main()
RS_HISTOGRAM_PASS(1)
RS_HISTOGRAM_PASS(0)
#elif (RS_KEYVAL_DWORDS == 3)
RS_HISTOGRAM_PASS(11)
RS_HISTOGRAM_PASS(10)
RS_HISTOGRAM_PASS(9)
RS_HISTOGRAM_PASS(8)
RS_HISTOGRAM_PASS(7)
RS_HISTOGRAM_PASS(6)
RS_HISTOGRAM_PASS(5)
RS_HISTOGRAM_PASS(4)
RS_HISTOGRAM_PASS(3)
RS_HISTOGRAM_PASS(2)
RS_HISTOGRAM_PASS(1)
RS_HISTOGRAM_PASS(0)
#else
#error "Error: (RS_KEYVAL_DWORDS >= 3) not implemented."
#error "Error: (RS_KEYVAL_DWORDS >= 4) not implemented."
#endif
}

View file

@ -19,14 +19,91 @@
# SOFTWARE.
radix_sort_shaders = [
'init.comp',
'fill.comp',
'histogram.comp',
'prefix.comp',
'scatter_0_even.comp',
'scatter_0_odd.comp',
'scatter_1_even.comp',
'scatter_1_odd.comp'
[
'init.comp',
'u64_init',
['-DRS_KEYVAL_DWORDS=2'],
],
[
'init.comp',
'u96_init',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'fill.comp',
'fill',
[],
],
[
'histogram.comp',
'u64_histogram',
['-DRS_KEYVAL_DWORDS=2'],
],
[
'histogram.comp',
'u96_histogram',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'prefix.comp',
'u64_prefix',
['-DRS_KEYVAL_DWORDS=2'],
],
[
'prefix.comp',
'u96_prefix',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'scatter_0_even.comp',
'u64_scatter_0_even',
['-DRS_KEYVAL_DWORDS=2'],
],
[
'scatter_0_even.comp',
'u96_scatter_0_even',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'scatter_0_odd.comp',
'u64_scatter_0_odd',
['-DRS_KEYVAL_DWORDS=2'],
],
[
'scatter_0_odd.comp',
'u96_scatter_0_odd',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'scatter_1_even.comp',
'u64_scatter_1_even',
['-DRS_KEYVAL_DWORDS=2'],
],
[
'scatter_1_even.comp',
'u96_scatter_1_even',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'scatter_1_odd.comp',
'u64_scatter_1_odd',
['-DRS_KEYVAL_DWORDS=2'],
],
[
'scatter_1_odd.comp',
'u96_scatter_1_odd',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'scatter_2_even.comp',
'u96_scatter_2_even',
['-DRS_KEYVAL_DWORDS=3'],
],
[
'scatter_2_odd.comp',
'u96_scatter_2_odd',
['-DRS_KEYVAL_DWORDS=3'],
],
]
shader_include_files = files(
@ -38,16 +115,14 @@ shader_include_files = files(
'config.h',
)
defines = ['-DRS_KEYVAL_DWORDS=2']
radix_sort_spv = []
foreach s : radix_sort_shaders
radix_sort_spv += custom_target(
s + '.spv.h',
input : s,
output : s + '.spv.h',
s[1] + '.spv.h',
input : s[0],
output : s[1] + '.spv.h',
command : [
prog_glslang, '-V', '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
] + defines + glslang_quiet + (with_mesa_debug ? ['-g'] : []),
] + s[2] + glslang_quiet + (with_mesa_debug ? ['-g'] : []),
depend_files: shader_include_files)
endforeach

View file

@ -13,6 +13,7 @@
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_memory_scope_semantics : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_EXT_scalar_block_layout : require
// clang-format on
//
@ -140,6 +141,8 @@ layout(push_constant) uniform block_push
#define RS_KEYVAL_TYPE uint32_t
#elif (RS_KEYVAL_DWORDS == 2)
#define RS_KEYVAL_TYPE u32vec2
#elif (RS_KEYVAL_DWORDS == 3)
#define RS_KEYVAL_TYPE u32vec3
#else
#error "Error: Unsupported RS_KEYVAL_DWORDS"
#endif
@ -272,7 +275,7 @@ layout(local_size_x_id = RS_SCATTER_WORKGROUP_SIZE_ID) in;
//
//
//
layout(buffer_reference, std430) buffer buffer_rs_kv
layout(buffer_reference, scalar) buffer buffer_rs_kv
{
RS_KEYVAL_TYPE extent[];
};

View file

@ -0,0 +1,36 @@
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#version 460
//
//
//
// clang-format off
#define RS_SCATTER_KEYVAL_DWORD_BASE 2
#define RS_PARTITION_STATUS_INVALID RS_PARTITION_STATUS_EVEN_INVALID
#define RS_PARTITION_STATUS_REDUCTION RS_PARTITION_STATUS_EVEN_REDUCTION
#define RS_PARTITION_STATUS_PREFIX RS_PARTITION_STATUS_EVEN_PREFIX
#define RS_DEVADDR_KEYVALS_IN(push_) push_.devaddr_keyvals_even
#define RS_DEVADDR_KEYVALS_OUT(push_) push_.devaddr_keyvals_odd
// clang-format on
//
//
//
#extension GL_GOOGLE_include_directive : require
//
//
//
#include "scatter.glsl"
//
//
//

View file

@ -0,0 +1,36 @@
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#version 460
//
//
//
// clang-format off
#define RS_SCATTER_KEYVAL_DWORD_BASE 2
#define RS_PARTITION_STATUS_INVALID RS_PARTITION_STATUS_ODD_INVALID
#define RS_PARTITION_STATUS_REDUCTION RS_PARTITION_STATUS_ODD_REDUCTION
#define RS_PARTITION_STATUS_PREFIX RS_PARTITION_STATUS_ODD_PREFIX
#define RS_DEVADDR_KEYVALS_IN(push_) push_.devaddr_keyvals_odd
#define RS_DEVADDR_KEYVALS_OUT(push_) push_.devaddr_keyvals_even
// clang-format on
//
//
//
#extension GL_GOOGLE_include_directive : require
//
//
//
#include "scatter.glsl"
//
//
//