mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-06-17 04:28:28 +02:00
vulkan: Implement HPLOC
Reviewed-by: Natalie Vock <natalie.vock@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39116>
This commit is contained in:
parent
ab9d3528dc
commit
a6a62363df
6 changed files with 354 additions and 2 deletions
235
src/vulkan/runtime/bvh/hploc_internal.comp
Normal file
235
src/vulkan/runtime/bvh/hploc_internal.comp
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
/*
|
||||
* Copyright © 2025 Valve Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#version 460
|
||||
|
||||
#include "vk_build_interface.h"
|
||||
#include "vk_debug.h"
|
||||
|
||||
layout(local_size_x_id = SUBGROUP_SIZE_ID, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout(push_constant) uniform CONSTS
|
||||
{
|
||||
hploc_args args;
|
||||
};
|
||||
|
||||
uint32_t
|
||||
delta(uint32_t index)
|
||||
{
|
||||
uint32_t left_index = index;
|
||||
uint32_t right_index = index + 1;
|
||||
|
||||
uint32_t left_key = DEREF(INDEX(key_id_pair, args.ids, left_index)).key;
|
||||
uint32_t right_key = DEREF(INDEX(key_id_pair, args.ids, right_index)).key;
|
||||
|
||||
return left_key != right_key ? (32 + findMSB(left_key ^ right_key)) : findMSB(left_index ^ right_index);
|
||||
}
|
||||
|
||||
#define SEARCH_RADIUS 16
|
||||
|
||||
shared uint32_t node_ids[SUBGROUP_SIZE];
|
||||
shared vk_aabb node_aabbs[SUBGROUP_SIZE];
|
||||
shared uint32_t candidate_infos[SUBGROUP_SIZE];
|
||||
|
||||
void
|
||||
main(void)
|
||||
{
|
||||
uint32_t global_id = gl_GlobalInvocationID.x;
|
||||
REF(vk_ir_header) header = REF(vk_ir_header)(args.header);
|
||||
uint32_t active_leaf_count = DEREF(header).active_leaf_count;
|
||||
|
||||
if (active_leaf_count <= 1) {
|
||||
if (global_id > 0)
|
||||
return;
|
||||
|
||||
DEREF(header).ir_internal_node_count = 1;
|
||||
|
||||
uint32_t child_id = VK_BVH_INVALID_NODE;
|
||||
vk_ir_node child = vk_ir_node(vk_aabb(vec3(0.0), vec3(0.0)));
|
||||
if (active_leaf_count > 0) {
|
||||
REF(key_id_pair) key_id = INDEX(key_id_pair, args.ids, global_id);
|
||||
child_id = DEREF(key_id).id;
|
||||
child = DEREF(REF(vk_ir_node)(OFFSET(args.bvh, ir_id_to_offset(child_id))));
|
||||
}
|
||||
|
||||
REF(vk_ir_box_node) node = REF(vk_ir_box_node)(OFFSET(args.bvh, args.internal_node_base));
|
||||
DEREF(node).base.aabb = child.aabb;
|
||||
DEREF(node).children[0] = child_id;
|
||||
DEREF(node).children[1] = VK_BVH_INVALID_NODE;
|
||||
DEREF(node).bvh_offset = VK_UNKNOWN_BVH_OFFSET;
|
||||
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS) && active_leaf_count > 0)
|
||||
DEREF(node).flags = fetch_child_flags(args.bvh, child_id);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/* Start at the leaf nodes which cover only one primitive => start=end. */
|
||||
uint32_t range_start = global_id;
|
||||
uint32_t range_end = global_id;
|
||||
|
||||
uint32_t internal_node_count = active_leaf_count - 1;
|
||||
|
||||
bool is_active = global_id < active_leaf_count;
|
||||
|
||||
while (subgroupAny(is_active)) {
|
||||
uint32_t parent_index = 0xffffffff;
|
||||
if (is_active) {
|
||||
/* The parent node has either the index range_start-1 or range_end. Avoid indexing -1 or active_leaf_count. */
|
||||
bool use_right_parent = range_start == 0 || (range_end < internal_node_count && delta(range_end) < delta(range_start - 1));
|
||||
|
||||
parent_index = use_right_parent ? range_end : (range_start - 1);
|
||||
if (parent_index == internal_node_count) {
|
||||
is_active = false;
|
||||
} else {
|
||||
uint32_t prev_range = atomicExchange(DEREF(INDEX(uint32_t, args.ranges, parent_index)), use_right_parent ? range_start : range_end);
|
||||
if (prev_range == 0xffffffff) {
|
||||
is_active = false;
|
||||
} else {
|
||||
if (use_right_parent)
|
||||
range_end = prev_range;
|
||||
else
|
||||
range_start = prev_range;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Merging phase for every invocation that has a range with more than SUBGROUP_SIZE / 2 nodes.
|
||||
* The nodes are merged until the number of nodes is below SUBGROUP_SIZE / 2 which ensures that
|
||||
* the invocation handling the parent node can load its child nodes.
|
||||
*/
|
||||
const uint32_t cluster_threshold = SUBGROUP_SIZE / 2;
|
||||
uint32_t range_size = range_end - range_start + 1;
|
||||
bool range_is_root = subgroupAny(range_size == active_leaf_count);
|
||||
uint64_t cluster_mask = packUint2x32(subgroupBallot(is_active && (range_is_root || range_size > cluster_threshold)).xy);
|
||||
while (cluster_mask != 0) {
|
||||
uint32_t cluster_invoc = uint32_t(findLSB(cluster_mask));
|
||||
/* Clear the LSB. */
|
||||
cluster_mask &= cluster_mask - 1;
|
||||
|
||||
uint32_t start = subgroupShuffle(range_start, cluster_invoc);
|
||||
uint32_t split = subgroupShuffle(parent_index, cluster_invoc);
|
||||
uint32_t end = subgroupShuffle(range_end, cluster_invoc);
|
||||
|
||||
uint32_t load_index, load_base, index_range;
|
||||
if (gl_SubgroupInvocationID >= cluster_threshold) {
|
||||
load_index = gl_SubgroupInvocationID - cluster_threshold;
|
||||
load_base = split + 1;
|
||||
index_range = end - split;
|
||||
} else {
|
||||
load_index = gl_SubgroupInvocationID;
|
||||
load_base = start;
|
||||
index_range = split + 1 - start;
|
||||
}
|
||||
uint32_t node_id_index = load_base + load_index;
|
||||
uint32_t node_id = VK_BVH_INVALID_NODE;
|
||||
if (load_index < index_range)
|
||||
node_id = DEREF(INDEX(key_id_pair, args.ids, node_id_index)).id;
|
||||
|
||||
uvec4 node_valid_mask = subgroupBallot(node_id != VK_BVH_INVALID_NODE);
|
||||
uint32_t node_prefix_sum = subgroupBallotExclusiveBitCount(node_valid_mask);
|
||||
uint32_t node_count = subgroupBallotBitCount(node_valid_mask);
|
||||
if (node_id != VK_BVH_INVALID_NODE) {
|
||||
node_ids[node_prefix_sum] = node_id;
|
||||
node_aabbs[node_prefix_sum] = DEREF(REF(vk_ir_node)(OFFSET(args.bvh, ir_id_to_offset(node_id)))).aabb;
|
||||
}
|
||||
|
||||
while (node_count > (range_is_root ? 1 : cluster_threshold)) {
|
||||
node_id = VK_BVH_INVALID_NODE;
|
||||
vk_aabb node_aabb = node_aabbs[gl_SubgroupInvocationID];
|
||||
if (gl_SubgroupInvocationID < node_count) {
|
||||
candidate_infos[gl_SubgroupInvocationID] = 0xffffffff;
|
||||
uint32_t best_candidate = 0xffffffff;
|
||||
for (uint32_t i = 1; i <= SEARCH_RADIUS; i++) {
|
||||
int32_t index = int32_t(gl_SubgroupInvocationID) - int(i);
|
||||
|
||||
vk_aabb shared_bounds;
|
||||
shared_bounds.min = min(node_aabbs[index].min, node_aabb.min);
|
||||
shared_bounds.max = max(node_aabbs[index].max, node_aabb.max);
|
||||
|
||||
uint32_t shared_sa = (floatBitsToUint(aabb_surface_area(shared_bounds)) << 1u) & (~(SUBGROUP_SIZE - 1));
|
||||
if (index >= 0) {
|
||||
uint32_t candidate = shared_sa | index;
|
||||
best_candidate = min(best_candidate, candidate);
|
||||
|
||||
candidate = shared_sa | gl_SubgroupInvocationID;
|
||||
atomicMin(candidate_infos[index], candidate);
|
||||
}
|
||||
}
|
||||
|
||||
best_candidate = min(best_candidate, candidate_infos[gl_SubgroupInvocationID]);
|
||||
uint32_t best_index = best_candidate & (SUBGROUP_SIZE - 1);
|
||||
uint32_t other_node_id = node_ids[best_index];
|
||||
|
||||
vk_aabb shared_bounds;
|
||||
shared_bounds.min = min(node_aabbs[best_index].min, node_aabb.min);
|
||||
shared_bounds.max = max(node_aabbs[best_index].max, node_aabb.max);
|
||||
|
||||
/* There is always at least on pair of invocations that can be merged because there is a finite number of pairs and
|
||||
* one of them therefore has a minimum surface area. If more than two nodes have the exact same surface area, the
|
||||
* neighbor search prioritizes lower invocation indices.
|
||||
*/
|
||||
bool merge = best_index < SUBGROUP_SIZE && subgroupShuffle(best_index, best_index) == gl_SubgroupInvocationID;
|
||||
|
||||
node_id = node_ids[gl_SubgroupInvocationID];
|
||||
|
||||
if (merge) {
|
||||
if (gl_SubgroupInvocationID < best_index) {
|
||||
uint32_t dst_index = atomicAdd(DEREF(header).ir_internal_node_count, 1);
|
||||
uint32_t dst_offset = args.internal_node_base + dst_index * SIZEOF(vk_ir_box_node);
|
||||
|
||||
node_aabb = shared_bounds;
|
||||
|
||||
REF(vk_ir_box_node) node = REF(vk_ir_box_node)(OFFSET(args.bvh, dst_offset));
|
||||
DEREF(node).base.aabb = shared_bounds;
|
||||
DEREF(node).children[0] = node_id;
|
||||
DEREF(node).children[1] = other_node_id;
|
||||
DEREF(node).bvh_offset = VK_UNKNOWN_BVH_OFFSET;
|
||||
if (VK_BUILD_FLAG(VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS)) {
|
||||
DEREF(node).flags = fetch_child_flags(args.bvh, node_id) & fetch_child_flags(args.bvh, other_node_id);
|
||||
}
|
||||
|
||||
node_id = pack_ir_node_id(dst_offset, vk_ir_node_internal);
|
||||
} else {
|
||||
node_id = VK_BVH_INVALID_NODE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
node_count = subgroupBallotBitCount(subgroupBallot(node_id != VK_BVH_INVALID_NODE));
|
||||
uint32_t node_prefix_sum = subgroupBallotExclusiveBitCount(subgroupBallot(node_id != VK_BVH_INVALID_NODE));
|
||||
if (node_id != VK_BVH_INVALID_NODE) {
|
||||
node_ids[node_prefix_sum] = node_id;
|
||||
node_aabbs[node_prefix_sum] = node_aabb;
|
||||
}
|
||||
}
|
||||
|
||||
if (gl_SubgroupInvocationID < min(end - start + 1, cluster_threshold)) {
|
||||
uint32_t dst_node = gl_SubgroupInvocationID < node_count ? node_ids[gl_SubgroupInvocationID] : VK_BVH_INVALID_NODE;
|
||||
DEREF(INDEX(key_id_pair, args.ids, start + gl_SubgroupInvocationID)).id = dst_node;
|
||||
}
|
||||
|
||||
memoryBarrier(gl_ScopeDevice, gl_StorageSemanticsBuffer,
|
||||
gl_SemanticsAcquireRelease | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -40,6 +40,10 @@ bvh_shaders = [
|
|||
'ploc_internal.comp',
|
||||
'ploc_internal',
|
||||
],
|
||||
[
|
||||
'hploc_internal.comp',
|
||||
'hploc_internal',
|
||||
],
|
||||
]
|
||||
|
||||
spirv_include_dir = dir_source_root + '/src/compiler/spirv'
|
||||
|
|
@ -70,6 +74,7 @@ vk_glsl_shader_extensions = [
|
|||
'GL_KHR_shader_subgroup_shuffle',
|
||||
'GL_KHR_shader_subgroup_ballot',
|
||||
'GL_KHR_shader_subgroup_clustered',
|
||||
'GL_KHR_shader_subgroup_vote',
|
||||
'GL_EXT_shader_atomic_int64',
|
||||
'GL_EXT_spirv_intrinsics',
|
||||
]
|
||||
|
|
|
|||
|
|
@ -110,4 +110,12 @@ struct ploc_args {
|
|||
uint32_t internal_node_offset;
|
||||
};
|
||||
|
||||
struct hploc_args {
|
||||
REF(vk_ir_header) header;
|
||||
VOID_REF bvh;
|
||||
REF(key_id_pair) ids;
|
||||
VOID_REF ranges;
|
||||
uint32_t internal_node_base;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -61,6 +61,10 @@ static const uint32_t ploc_spv[] = {
|
|||
#include "bvh/ploc_internal.spv.h"
|
||||
};
|
||||
|
||||
static const uint32_t hploc_spv[] = {
|
||||
#include "bvh/hploc_internal.spv.h"
|
||||
};
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
vk_common_CreateAccelerationStructureKHR(VkDevice _device,
|
||||
const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
|
||||
|
|
@ -172,10 +176,13 @@ vk_acceleration_structure_build_state_init(struct vk_acceleration_structure_buil
|
|||
uint32_t offset = 0;
|
||||
|
||||
uint32_t ploc_scratch_space = 0;
|
||||
uint32_t hploc_scratch_space = 0;
|
||||
uint32_t lbvh_node_space = 0;
|
||||
|
||||
if (state->config.internal_type == VK_INTERNAL_BUILD_TYPE_PLOC)
|
||||
ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
|
||||
else if (state->config.internal_type == VK_INTERNAL_BUILD_TYPE_HPLOC)
|
||||
hploc_scratch_space = sizeof(uint32_t) * internal_count;
|
||||
else
|
||||
lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
|
||||
|
||||
|
|
@ -199,8 +206,11 @@ vk_acceleration_structure_build_state_init(struct vk_acceleration_structure_buil
|
|||
/* Internal sorting data is not needed when PLOC/LBVH are invoked,
|
||||
* save space by aliasing them */
|
||||
state->scratch.ploc_prefix_sum_partition_offset = offset;
|
||||
offset += MAX2(requirements.internal_size, ploc_scratch_space);
|
||||
|
||||
state->scratch.lbvh_node_offset = offset;
|
||||
offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
|
||||
state->scratch.hploc_ranges_offset = offset;
|
||||
offset += MAX2(hploc_scratch_space, lbvh_node_space);
|
||||
|
||||
/* Make sure encode scratch space does not overlap the BVH. */
|
||||
offset = MAX2(offset, encode_scratch_end);
|
||||
|
|
@ -242,6 +252,7 @@ struct bvh_batch_state {
|
|||
bool any_updateable;
|
||||
bool any_non_updateable;
|
||||
bool any_ploc;
|
||||
bool any_hploc;
|
||||
bool any_lbvh;
|
||||
bool any_update;
|
||||
};
|
||||
|
|
@ -1069,6 +1080,72 @@ ploc_build_internal(VkCommandBuffer commandBuffer,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
hploc_build_internal(VkCommandBuffer commandBuffer,
|
||||
struct vk_device *device, struct vk_meta_device *meta,
|
||||
const struct vk_acceleration_structure_build_args *args,
|
||||
uint32_t infoCount,
|
||||
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
|
||||
{
|
||||
VkPipeline pipeline;
|
||||
VkPipelineLayout layout;
|
||||
|
||||
uint32_t flags = 0;
|
||||
if (args->propagate_cull_flags)
|
||||
flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS;
|
||||
|
||||
VkResult result = vk_get_bvh_build_pipeline_spv(device, meta, VK_META_OBJECT_KEY_HPLOC, hploc_spv,
|
||||
sizeof(hploc_spv), sizeof(struct hploc_args),
|
||||
args, flags, &pipeline,
|
||||
false /* unaligned_dispatch */);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = vk_get_bvh_build_pipeline_layout(device, meta, sizeof(struct hploc_args), &layout);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (args->emit_markers) {
|
||||
struct vk_acceleration_structure_build_marker marker = {
|
||||
.step = VK_ACCELERATION_STRUCTURE_BUILD_STEP_HPLOC_BUILD_INTERNAL,
|
||||
};
|
||||
device->as_build_ops->begin_debug_marker(commandBuffer, &marker);
|
||||
}
|
||||
|
||||
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
|
||||
disp->CmdBindPipeline(
|
||||
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
if (bvh_states[i].vk.config.internal_type != VK_INTERNAL_BUILD_TYPE_HPLOC)
|
||||
continue;
|
||||
|
||||
assert(args->subgroup_size <= 64);
|
||||
|
||||
uint64_t scratch_addr = pInfos[i].scratchData.deviceAddress;
|
||||
const struct hploc_args consts = {
|
||||
.header = scratch_addr + bvh_states[i].vk.scratch.header_offset,
|
||||
.bvh = scratch_addr + bvh_states[i].vk.scratch.ir_offset,
|
||||
.ranges = scratch_addr + bvh_states[i].vk.scratch.hploc_ranges_offset,
|
||||
.ids = scratch_addr + bvh_states[i].scratch_offset,
|
||||
.internal_node_base = bvh_states[i].vk.scratch.internal_node_offset - bvh_states[i].vk.scratch.ir_offset,
|
||||
};
|
||||
|
||||
disp->CmdPushConstants(commandBuffer, layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
||||
disp->CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].vk.leaf_node_count, args->subgroup_size), 1), 1, 1);
|
||||
}
|
||||
|
||||
if (args->emit_markers) {
|
||||
struct vk_acceleration_structure_build_marker marker = {
|
||||
.step = VK_ACCELERATION_STRUCTURE_BUILD_STEP_HPLOC_BUILD_INTERNAL,
|
||||
};
|
||||
device->as_build_ops->end_debug_marker(commandBuffer, &marker);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer,
|
||||
struct vk_device *device,
|
||||
|
|
@ -1124,6 +1201,8 @@ vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer,
|
|||
|
||||
if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_PLOC) {
|
||||
batch_state.any_ploc = true;
|
||||
} else if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_HPLOC) {
|
||||
batch_state.any_hploc = true;
|
||||
} else if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_LBVH) {
|
||||
batch_state.any_lbvh = true;
|
||||
} else if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_UPDATE) {
|
||||
|
|
@ -1172,7 +1251,7 @@ vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer,
|
|||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
||||
}, 0, NULL, 0, NULL);
|
||||
|
||||
if (batch_state.any_lbvh || batch_state.any_ploc) {
|
||||
if (batch_state.any_lbvh || batch_state.any_ploc || batch_state.any_hploc) {
|
||||
VkResult result;
|
||||
|
||||
if (batch_state.any_non_updateable) {
|
||||
|
|
@ -1199,6 +1278,17 @@ vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
}
|
||||
|
||||
if (batch_state.any_hploc) {
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
uint32_t internal_count = MAX2(bvh_states[i].vk.leaf_node_count, 2) - 1;
|
||||
if (bvh_states[i].vk.config.internal_type == VK_INTERNAL_BUILD_TYPE_HPLOC) {
|
||||
device->cmd_fill_buffer_addr(commandBuffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].vk.scratch.hploc_ranges_offset,
|
||||
sizeof(uint32_t) * internal_count, 0xffffffff);
|
||||
}
|
||||
}
|
||||
vk_barrier_transfer_w_to_compute_r(commandBuffer);
|
||||
}
|
||||
|
||||
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
||||
|
||||
result =
|
||||
|
|
@ -1237,6 +1327,16 @@ vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer,
|
|||
}
|
||||
}
|
||||
|
||||
if (batch_state.any_hploc) {
|
||||
result =
|
||||
hploc_build_internal(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(cmd_buffer, result);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
||||
vk_barrier_compute_w_to_indirect_compute_r(commandBuffer);
|
||||
flushed_compute_after_init_update_scratch = true;
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ enum vk_acceleration_structure_build_step {
|
|||
VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_STEP_HPLOC_BUILD_INTERNAL,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE,
|
||||
VK_ACCELERATION_STRUCTURE_BUILD_STEP_UPDATE,
|
||||
};
|
||||
|
|
@ -88,6 +89,7 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(vk_acceleration_structure, base, VkAccelerationSt
|
|||
enum vk_internal_build_type {
|
||||
VK_INTERNAL_BUILD_TYPE_LBVH,
|
||||
VK_INTERNAL_BUILD_TYPE_PLOC,
|
||||
VK_INTERNAL_BUILD_TYPE_HPLOC,
|
||||
VK_INTERNAL_BUILD_TYPE_UPDATE,
|
||||
};
|
||||
|
||||
|
|
@ -111,6 +113,7 @@ struct vk_scratch_layout {
|
|||
|
||||
uint32_t ploc_prefix_sum_partition_offset;
|
||||
uint32_t lbvh_node_offset;
|
||||
uint32_t hploc_ranges_offset;
|
||||
|
||||
uint32_t ir_offset;
|
||||
uint32_t internal_node_offset;
|
||||
|
|
|
|||
|
|
@ -180,6 +180,7 @@ enum vk_meta_object_key_type {
|
|||
VK_META_OBJECT_KEY_LBVH_MAIN,
|
||||
VK_META_OBJECT_KEY_LBVH_GENERATE_IR,
|
||||
VK_META_OBJECT_KEY_PLOC,
|
||||
VK_META_OBJECT_KEY_HPLOC,
|
||||
|
||||
/* Should be used as an offset for driver-specific object types. */
|
||||
VK_META_OBJECT_KEY_DRIVER_OFFSET = 0x80000000,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue