mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-18 15:58:06 +02:00
We don't currently support building acceleration structures on the CPU
or indirect building in the common framework, and drivers using it don't
either, but drivers have to return non-NULL entrypoints for CPU building
functions if they claim to support VK_KHR_acceleration_structure. Add
stub entrypoints here so that drivers don't have to have this
boilerplate.
Fixes dEQP-VK.api.version_check.entry_points on turnip.
Fixes: 671e3a65a6 ("tu: Support VK_KHR_acceleration_structure")
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33153>
1352 lines
50 KiB
C
1352 lines
50 KiB
C
/*
|
|
* Copyright © 2021 Bas Nieuwenhuizen
|
|
* Copyright © 2023 Valve Corporation
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include "vk_acceleration_structure.h"
|
|
|
|
#include "vk_alloc.h"
|
|
#include "vk_common_entrypoints.h"
|
|
#include "vk_device.h"
|
|
#include "vk_command_buffer.h"
|
|
#include "vk_log.h"
|
|
#include "vk_meta.h"
|
|
|
|
#include "bvh/vk_build_interface.h"
|
|
#include "bvh/vk_bvh.h"
|
|
|
|
#include "radix_sort/common/vk/barrier.h"
|
|
#include "radix_sort/shaders/push.h"
|
|
|
|
#include "util/u_string.h"
|
|
|
|
static const uint32_t leaf_spv[] = {
|
|
#include "bvh/leaf.spv.h"
|
|
};
|
|
|
|
static const uint32_t leaf_always_active_spv[] = {
|
|
#include "bvh/leaf_always_active.spv.h"
|
|
};
|
|
|
|
static const uint32_t morton_spv[] = {
|
|
#include "bvh/morton.spv.h"
|
|
};
|
|
|
|
static const uint32_t lbvh_main_spv[] = {
|
|
#include "bvh/lbvh_main.spv.h"
|
|
};
|
|
|
|
static const uint32_t lbvh_generate_ir_spv[] = {
|
|
#include "bvh/lbvh_generate_ir.spv.h"
|
|
};
|
|
|
|
static const uint32_t ploc_spv[] = {
|
|
#include "bvh/ploc_internal.spv.h"
|
|
};
|
|
|
|
VkDeviceAddress
|
|
vk_acceleration_structure_get_va(struct vk_acceleration_structure *accel_struct)
|
|
{
|
|
VkBufferDeviceAddressInfo info = {
|
|
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
|
|
.buffer = accel_struct->buffer,
|
|
};
|
|
|
|
VkDeviceAddress base_addr = accel_struct->base.device->dispatch_table.GetBufferDeviceAddress(
|
|
vk_device_to_handle(accel_struct->base.device), &info);
|
|
|
|
return base_addr + accel_struct->offset;
|
|
}
|
|
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
vk_common_CreateAccelerationStructureKHR(VkDevice _device,
|
|
const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
|
|
const VkAllocationCallbacks *pAllocator,
|
|
VkAccelerationStructureKHR *pAccelerationStructure)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
|
|
struct vk_acceleration_structure *accel_struct = vk_object_alloc(
|
|
device, pAllocator, sizeof(struct vk_acceleration_structure),
|
|
VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR);
|
|
|
|
if (!accel_struct)
|
|
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
|
|
|
accel_struct->buffer = pCreateInfo->buffer;
|
|
accel_struct->offset = pCreateInfo->offset;
|
|
accel_struct->size = pCreateInfo->size;
|
|
|
|
if (pCreateInfo->deviceAddress &&
|
|
vk_acceleration_structure_get_va(accel_struct) != pCreateInfo->deviceAddress)
|
|
return vk_error(device, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
|
|
|
|
*pAccelerationStructure = vk_acceleration_structure_to_handle(accel_struct);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
vk_common_DestroyAccelerationStructureKHR(VkDevice _device,
|
|
VkAccelerationStructureKHR accelerationStructure,
|
|
const VkAllocationCallbacks *pAllocator)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure);
|
|
|
|
if (!accel_struct)
|
|
return;
|
|
|
|
vk_object_free(device, pAllocator, accel_struct);
|
|
}
|
|
|
|
VKAPI_ATTR VkDeviceAddress VKAPI_CALL
|
|
vk_common_GetAccelerationStructureDeviceAddressKHR(
|
|
VkDevice _device, const VkAccelerationStructureDeviceAddressInfoKHR *pInfo)
|
|
{
|
|
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfo->accelerationStructure);
|
|
return vk_acceleration_structure_get_va(accel_struct);
|
|
}
|
|
|
|
#define KEY_ID_PAIR_SIZE 8
|
|
#define MORTON_BIT_SIZE 24
|
|
|
|
enum internal_build_type {
|
|
INTERNAL_BUILD_TYPE_LBVH,
|
|
INTERNAL_BUILD_TYPE_PLOC,
|
|
INTERNAL_BUILD_TYPE_UPDATE,
|
|
};
|
|
|
|
struct build_config {
|
|
enum internal_build_type internal_type;
|
|
bool updateable;
|
|
uint32_t encode_key[MAX_ENCODE_PASSES];
|
|
};
|
|
|
|
struct scratch_layout {
|
|
uint32_t size;
|
|
uint32_t update_size;
|
|
|
|
uint32_t header_offset;
|
|
|
|
/* Used for BUILD only. */
|
|
|
|
uint32_t sort_buffer_offset[2];
|
|
uint32_t sort_internal_offset;
|
|
|
|
uint32_t ploc_prefix_sum_partition_offset;
|
|
uint32_t lbvh_node_offset;
|
|
|
|
uint32_t ir_offset;
|
|
uint32_t internal_node_offset;
|
|
};
|
|
|
|
static struct build_config
|
|
build_config(uint32_t leaf_count,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
|
const struct vk_acceleration_structure_build_ops *ops)
|
|
{
|
|
struct build_config config = {0};
|
|
|
|
if (leaf_count <= 4)
|
|
config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
|
|
else if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR)
|
|
config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
|
|
else if (!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR) &&
|
|
!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
|
|
config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
|
|
else
|
|
config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
|
|
|
|
if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
|
|
build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
|
|
ops->update_as[0])
|
|
config.internal_type = INTERNAL_BUILD_TYPE_UPDATE;
|
|
|
|
if ((build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) &&
|
|
build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
|
|
ops->update_as[0])
|
|
config.updateable = true;
|
|
|
|
for (unsigned i = 0; i < ARRAY_SIZE(config.encode_key); i++) {
|
|
if (!ops->get_encode_key[i])
|
|
break;
|
|
config.encode_key[i] = ops->get_encode_key[i](leaf_count, build_info->flags);
|
|
}
|
|
|
|
return config;
|
|
}
|
|
|
|
static void
|
|
get_scratch_layout(struct vk_device *device,
|
|
uint32_t leaf_count,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
|
|
const struct vk_acceleration_structure_build_args *args,
|
|
struct scratch_layout *scratch)
|
|
{
|
|
uint32_t internal_count = MAX2(leaf_count, 2) - 1;
|
|
|
|
radix_sort_vk_memory_requirements_t requirements = {
|
|
0,
|
|
};
|
|
radix_sort_vk_get_memory_requirements(args->radix_sort, leaf_count,
|
|
&requirements);
|
|
|
|
uint32_t ir_leaf_size;
|
|
switch (vk_get_as_geometry_type(build_info)) {
|
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
|
ir_leaf_size = sizeof(struct vk_ir_triangle_node);
|
|
break;
|
|
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
|
ir_leaf_size = sizeof(struct vk_ir_aabb_node);
|
|
break;
|
|
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
|
ir_leaf_size = sizeof(struct vk_ir_instance_node);
|
|
break;
|
|
default:
|
|
unreachable("Unknown VkGeometryTypeKHR");
|
|
}
|
|
|
|
|
|
uint32_t offset = 0;
|
|
|
|
uint32_t ploc_scratch_space = 0;
|
|
uint32_t lbvh_node_space = 0;
|
|
|
|
struct build_config config = build_config(leaf_count, build_info,
|
|
device->as_build_ops);
|
|
|
|
if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC)
|
|
ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
|
|
else
|
|
lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
|
|
|
|
scratch->header_offset = offset;
|
|
offset += sizeof(struct vk_ir_header);
|
|
|
|
scratch->sort_buffer_offset[0] = offset;
|
|
offset += requirements.keyvals_size;
|
|
|
|
scratch->sort_buffer_offset[1] = offset;
|
|
offset += requirements.keyvals_size;
|
|
|
|
scratch->sort_internal_offset = offset;
|
|
/* Internal sorting data is not needed when PLOC/LBVH are invoked,
|
|
* save space by aliasing them */
|
|
scratch->ploc_prefix_sum_partition_offset = offset;
|
|
scratch->lbvh_node_offset = offset;
|
|
offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
|
|
|
|
scratch->ir_offset = offset;
|
|
offset += ir_leaf_size * leaf_count;
|
|
|
|
scratch->internal_node_offset = offset;
|
|
offset += sizeof(struct vk_ir_box_node) * internal_count;
|
|
|
|
scratch->size = offset;
|
|
|
|
if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
|
|
device->as_build_ops->update_as[0]) {
|
|
scratch->update_size =
|
|
device->as_build_ops->get_update_scratch_size(device, leaf_count);
|
|
} else {
|
|
scratch->update_size = offset;
|
|
}
|
|
}
|
|
|
|
struct bvh_state {
|
|
uint32_t scratch_offset;
|
|
|
|
uint32_t leaf_node_count;
|
|
uint32_t internal_node_count;
|
|
uint32_t leaf_node_size;
|
|
|
|
struct scratch_layout scratch;
|
|
struct build_config config;
|
|
|
|
/* Radix sort state */
|
|
uint32_t scatter_blocks;
|
|
uint32_t count_ru_scatter;
|
|
uint32_t histo_blocks;
|
|
uint32_t count_ru_histo;
|
|
struct rs_push_scatter push_scatter;
|
|
|
|
uint32_t last_encode_pass;
|
|
};
|
|
|
|
struct bvh_batch_state {
|
|
bool any_updateable;
|
|
bool any_non_updateable;
|
|
bool any_ploc;
|
|
bool any_lbvh;
|
|
bool any_update;
|
|
};
|
|
|
|
static VkResult
|
|
get_pipeline_spv(struct vk_device *device, struct vk_meta_device *meta,
|
|
const char *name, const uint32_t *spv, uint32_t spv_size,
|
|
unsigned push_constant_size,
|
|
const struct vk_acceleration_structure_build_args *args,
|
|
VkPipeline *pipeline, VkPipelineLayout *layout)
|
|
{
|
|
size_t key_size = strlen(name);
|
|
|
|
VkResult result = vk_meta_get_pipeline_layout(
|
|
device, meta, NULL,
|
|
&(VkPushConstantRange){
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size
|
|
},
|
|
name, key_size, layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(meta, name, key_size);
|
|
if (pipeline_from_cache != VK_NULL_HANDLE) {
|
|
*pipeline = pipeline_from_cache;
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
VkShaderModuleCreateInfo module_info = {
|
|
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
|
.pNext = NULL,
|
|
.flags = 0,
|
|
.codeSize = spv_size,
|
|
.pCode = spv,
|
|
};
|
|
|
|
VkSpecializationMapEntry spec_map[2] = {
|
|
{
|
|
.constantID = SUBGROUP_SIZE_ID,
|
|
.offset = 0,
|
|
.size = sizeof(args->subgroup_size),
|
|
},
|
|
{
|
|
.constantID = BVH_BOUNDS_OFFSET_ID,
|
|
.offset = sizeof(args->subgroup_size),
|
|
.size = sizeof(args->bvh_bounds_offset),
|
|
},
|
|
};
|
|
|
|
uint32_t spec_constants[2] = {
|
|
args->subgroup_size,
|
|
args->bvh_bounds_offset
|
|
};
|
|
|
|
VkSpecializationInfo spec_info = {
|
|
.mapEntryCount = ARRAY_SIZE(spec_map),
|
|
.pMapEntries = spec_map,
|
|
.dataSize = sizeof(spec_constants),
|
|
.pData = spec_constants,
|
|
};
|
|
|
|
VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT rssci = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
|
.pNext = &module_info,
|
|
.requiredSubgroupSize = args->subgroup_size,
|
|
};
|
|
|
|
VkPipelineShaderStageCreateInfo shader_stage = {
|
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
|
.pNext = &rssci,
|
|
.flags = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT,
|
|
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.pName = "main",
|
|
.pSpecializationInfo = &spec_info,
|
|
};
|
|
|
|
VkComputePipelineCreateInfo pipeline_info = {
|
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
|
.stage = shader_stage,
|
|
.flags = 0,
|
|
.layout = *layout,
|
|
};
|
|
|
|
return vk_meta_create_compute_pipeline(device, meta, &pipeline_info,
|
|
name, key_size, pipeline);
|
|
}
|
|
|
|
static uint32_t
|
|
pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
|
|
{
|
|
uint32_t geometry_id_and_flags = geometry_id;
|
|
if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
|
|
geometry_id_and_flags |= VK_GEOMETRY_OPAQUE;
|
|
|
|
return geometry_id_and_flags;
|
|
}
|
|
|
|
struct vk_bvh_geometry_data
|
|
vk_fill_geometry_data(VkAccelerationStructureTypeKHR type, uint32_t first_id, uint32_t geom_index,
|
|
const VkAccelerationStructureGeometryKHR *geometry,
|
|
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info)
|
|
{
|
|
struct vk_bvh_geometry_data data = {
|
|
.first_id = first_id,
|
|
.geometry_id = pack_geometry_id_and_flags(geom_index, geometry->flags),
|
|
.geometry_type = geometry->geometryType,
|
|
};
|
|
|
|
switch (geometry->geometryType) {
|
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
|
assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
|
|
|
|
data.data = geometry->geometry.triangles.vertexData.deviceAddress +
|
|
build_range_info->firstVertex * geometry->geometry.triangles.vertexStride;
|
|
data.indices = geometry->geometry.triangles.indexData.deviceAddress;
|
|
|
|
if (geometry->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR)
|
|
data.data += build_range_info->primitiveOffset;
|
|
else
|
|
data.indices += build_range_info->primitiveOffset;
|
|
|
|
data.transform = geometry->geometry.triangles.transformData.deviceAddress;
|
|
if (data.transform)
|
|
data.transform += build_range_info->transformOffset;
|
|
|
|
data.stride = geometry->geometry.triangles.vertexStride;
|
|
data.vertex_format = geometry->geometry.triangles.vertexFormat;
|
|
data.index_format = geometry->geometry.triangles.indexType;
|
|
break;
|
|
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
|
assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
|
|
|
|
data.data = geometry->geometry.aabbs.data.deviceAddress + build_range_info->primitiveOffset;
|
|
data.stride = geometry->geometry.aabbs.stride;
|
|
break;
|
|
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
|
assert(type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
|
|
|
|
data.data = geometry->geometry.instances.data.deviceAddress + build_range_info->primitiveOffset;
|
|
|
|
if (geometry->geometry.instances.arrayOfPointers)
|
|
data.stride = 8;
|
|
else
|
|
data.stride = sizeof(VkAccelerationStructureInstanceKHR);
|
|
break;
|
|
default:
|
|
unreachable("Unknown geometryType");
|
|
}
|
|
|
|
return data;
|
|
}
|
|
|
|
void
|
|
vk_accel_struct_cmd_begin_debug_marker(VkCommandBuffer commandBuffer,
|
|
enum vk_acceleration_structure_build_step step,
|
|
const char *format, ...)
|
|
{
|
|
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
|
|
struct vk_device *device = cmd_buffer->base.device;
|
|
|
|
va_list ap;
|
|
va_start(ap, format);
|
|
|
|
char *name;
|
|
if (vasprintf(&name, format, ap) == -1) {
|
|
va_end(ap);
|
|
return;
|
|
}
|
|
|
|
va_end(ap);
|
|
|
|
VkDebugMarkerMarkerInfoEXT marker = {
|
|
.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT,
|
|
.pMarkerName = name,
|
|
};
|
|
|
|
device->dispatch_table.CmdDebugMarkerBeginEXT(commandBuffer, &marker);
|
|
}
|
|
|
|
void
|
|
vk_accel_struct_cmd_end_debug_marker(VkCommandBuffer commandBuffer)
|
|
{
|
|
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
|
|
struct vk_device *device = cmd_buffer->base.device;
|
|
|
|
device->dispatch_table.CmdDebugMarkerEndEXT(commandBuffer);
|
|
}
|
|
|
|
static VkResult
|
|
build_leaves(VkCommandBuffer commandBuffer,
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_acceleration_structure_build_args *args,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
|
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
|
|
struct bvh_state *bvh_states,
|
|
bool updateable)
|
|
{
|
|
VkPipeline pipeline;
|
|
VkPipelineLayout layout;
|
|
|
|
/* Many apps are broken and will make inactive primitives active when
|
|
* updating, even though this is disallowed by the spec. To handle this,
|
|
* we use a different variant for updateable acceleration structures when
|
|
* the driver implements an update pass. This passes through inactive leaf
|
|
* nodes as if they were active, with an empty bounding box. It's then the
|
|
* driver or HW's responsibility to filter out inactive nodes.
|
|
*/
|
|
VkResult result;
|
|
if (updateable) {
|
|
result = get_pipeline_spv(device, meta, "leaves_always_active",
|
|
leaf_always_active_spv,
|
|
sizeof(leaf_always_active_spv),
|
|
sizeof(struct leaf_args), args, &pipeline, &layout);
|
|
} else {
|
|
result = get_pipeline_spv(device, meta, "leaves", leaf_spv, sizeof(leaf_spv),
|
|
sizeof(struct leaf_args), args, &pipeline, &layout);
|
|
}
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (args->emit_markers) {
|
|
device->as_build_ops->begin_debug_marker(commandBuffer,
|
|
VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES,
|
|
"build_leaves");
|
|
}
|
|
|
|
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
|
|
disp->CmdBindPipeline(
|
|
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
|
|
continue;
|
|
if (bvh_states[i].config.updateable != updateable)
|
|
continue;
|
|
|
|
struct leaf_args leaf_consts = {
|
|
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
|
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
|
|
.ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
|
|
};
|
|
|
|
for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
|
|
const VkAccelerationStructureGeometryKHR *geom =
|
|
pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
|
|
|
|
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
|
|
|
|
if (build_range_info->primitiveCount == 0)
|
|
continue;
|
|
|
|
leaf_consts.geom_data = vk_fill_geometry_data(pInfos[i].type, bvh_states[i].leaf_node_count, j, geom, build_range_info);
|
|
|
|
disp->CmdPushConstants(commandBuffer, layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts);
|
|
device->cmd_dispatch_unaligned(commandBuffer, build_range_info->primitiveCount, 1, 1);
|
|
|
|
bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
|
|
}
|
|
}
|
|
|
|
if (args->emit_markers)
|
|
device->as_build_ops->end_debug_marker(commandBuffer);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
morton_generate(VkCommandBuffer commandBuffer, struct vk_device *device,
|
|
struct vk_meta_device *meta,
|
|
const struct vk_acceleration_structure_build_args *args,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
|
struct bvh_state *bvh_states)
|
|
{
|
|
VkPipeline pipeline;
|
|
VkPipelineLayout layout;
|
|
|
|
VkResult result =
|
|
get_pipeline_spv(device, meta, "morton", morton_spv, sizeof(morton_spv),
|
|
sizeof(struct morton_args), args, &pipeline, &layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (args->emit_markers) {
|
|
device->as_build_ops->begin_debug_marker(commandBuffer,
|
|
VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE,
|
|
"morton_generate");
|
|
}
|
|
|
|
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
|
|
disp->CmdBindPipeline(
|
|
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
|
|
continue;
|
|
const struct morton_args consts = {
|
|
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
|
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
|
|
.ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
|
|
};
|
|
|
|
disp->CmdPushConstants(commandBuffer, layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
|
device->cmd_dispatch_unaligned(commandBuffer, bvh_states[i].leaf_node_count, 1, 1);
|
|
}
|
|
|
|
if (args->emit_markers)
|
|
device->as_build_ops->end_debug_marker(commandBuffer);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
morton_sort(VkCommandBuffer commandBuffer, struct vk_device *device,
|
|
const struct vk_acceleration_structure_build_args *args,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
|
|
{
|
|
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
|
|
|
|
if (args->emit_markers) {
|
|
device->as_build_ops->begin_debug_marker(commandBuffer,
|
|
VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT,
|
|
"morton_sort");
|
|
}
|
|
|
|
/* Copyright 2019 The Fuchsia Authors. */
|
|
const radix_sort_vk_t *rs = args->radix_sort;
|
|
|
|
/*
|
|
* OVERVIEW
|
|
*
|
|
* 1. Pad the keyvals in `scatter_even`.
|
|
* 2. Zero the `histograms` and `partitions`.
|
|
* --- BARRIER ---
|
|
* 3. HISTOGRAM is dispatched before PREFIX.
|
|
* --- BARRIER ---
|
|
* 4. PREFIX is dispatched before the first SCATTER.
|
|
* --- BARRIER ---
|
|
* 5. One or more SCATTER dispatches.
|
|
*
|
|
* Note that the `partitions` buffer can be zeroed anytime before the first
|
|
* scatter.
|
|
*/
|
|
|
|
/* How many passes? */
|
|
uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
|
|
uint32_t keyval_bits = keyval_bytes * 8;
|
|
uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
|
|
uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (bvh_states[i].leaf_node_count)
|
|
bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
|
|
else
|
|
bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
|
|
}
|
|
|
|
/*
|
|
* PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
|
|
*
|
|
* Pad fractional blocks with max-valued keyvals.
|
|
*
|
|
* Zero the histograms and partitions buffer.
|
|
*
|
|
* This assumes the partitions follow the histograms.
|
|
*/
|
|
|
|
/* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
|
|
|
|
/* How many scatter blocks? */
|
|
uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
|
|
uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
|
|
|
|
/*
|
|
* How many histogram blocks?
|
|
*
|
|
* Note that it's OK to have more max-valued digits counted by the histogram
|
|
* than sorted by the scatters because the sort is stable.
|
|
*/
|
|
uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
|
|
uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
|
|
|
|
uint32_t pass_idx = (keyval_bytes - passes);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (!bvh_states[i].leaf_node_count)
|
|
continue;
|
|
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
|
|
continue;
|
|
|
|
uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
|
|
bvh_states[i].scatter_blocks = (bvh_states[i].leaf_node_count + scatter_block_kvs - 1) / scatter_block_kvs;
|
|
bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
|
|
|
|
bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
|
|
bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
|
|
|
|
/* Fill with max values */
|
|
if (bvh_states[i].count_ru_histo > bvh_states[i].leaf_node_count) {
|
|
device->cmd_fill_buffer_addr(commandBuffer, keyvals_even_addr +
|
|
bvh_states[i].leaf_node_count * keyval_bytes,
|
|
(bvh_states[i].count_ru_histo - bvh_states[i].leaf_node_count) * keyval_bytes,
|
|
0xFFFFFFFF);
|
|
}
|
|
|
|
/*
|
|
* Zero histograms and invalidate partitions.
|
|
*
|
|
* Note that the partition invalidation only needs to be performed once
|
|
* because the even/odd scatter dispatches rely on the the previous pass to
|
|
* leave the partitions in an invalid state.
|
|
*
|
|
* Note that the last workgroup doesn't read/write a partition so it doesn't
|
|
* need to be initialized.
|
|
*/
|
|
uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
|
|
|
|
uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
|
|
device->cmd_fill_buffer_addr(commandBuffer,
|
|
internal_addr + rs->internal.histograms.offset + fill_base,
|
|
histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)) + keyval_bytes * sizeof(uint32_t), 0);
|
|
}
|
|
|
|
/*
|
|
* Pipeline: HISTOGRAM
|
|
*
|
|
* TODO(allanmac): All subgroups should try to process approximately the same
|
|
* number of blocks in order to minimize tail effects. This was implemented
|
|
* and reverted but should be reimplemented and benchmarked later.
|
|
*/
|
|
vk_barrier_transfer_w_to_compute_r(commandBuffer);
|
|
|
|
disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
rs->pipelines.named.histogram);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (!bvh_states[i].leaf_node_count)
|
|
continue;
|
|
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
|
|
continue;
|
|
|
|
uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
|
|
/* Dispatch histogram */
|
|
struct rs_push_histogram push_histogram = {
|
|
.devaddr_histograms = internal_addr + rs->internal.histograms.offset,
|
|
.devaddr_keyvals = keyvals_even_addr,
|
|
.passes = passes,
|
|
};
|
|
|
|
disp->CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
|
sizeof(push_histogram), &push_histogram);
|
|
|
|
disp->CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
|
|
}
|
|
|
|
/*
|
|
* Pipeline: PREFIX
|
|
*
|
|
* Launch one workgroup per pass.
|
|
*/
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
rs->pipelines.named.prefix);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (!bvh_states[i].leaf_node_count)
|
|
continue;
|
|
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
|
|
continue;
|
|
|
|
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
|
|
struct rs_push_prefix push_prefix = {
|
|
.devaddr_histograms = internal_addr + rs->internal.histograms.offset,
|
|
};
|
|
|
|
disp->CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
|
|
sizeof(push_prefix), &push_prefix);
|
|
|
|
disp->CmdDispatch(commandBuffer, passes, 1, 1);
|
|
}
|
|
|
|
/* Pipeline: SCATTER */
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
|
|
uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
|
|
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
|
|
|
|
bvh_states[i].push_scatter = (struct rs_push_scatter){
|
|
.devaddr_keyvals_even = keyvals_even_addr,
|
|
.devaddr_keyvals_odd = keyvals_odd_addr,
|
|
.devaddr_partitions = internal_addr + rs->internal.partitions.offset,
|
|
.devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
|
|
};
|
|
}
|
|
|
|
bool is_even = true;
|
|
|
|
while (true) {
|
|
uint32_t pass_dword = pass_idx / 4;
|
|
|
|
/* Bind new pipeline */
|
|
VkPipeline p =
|
|
is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
|
|
disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
|
|
|
|
/* Update push constants that changed */
|
|
VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
|
|
: rs->pipeline_layouts.named.scatter[pass_dword].odd;
|
|
|
|
for (uint32_t i = 0; i < infoCount; i++) {
|
|
if (!bvh_states[i].leaf_node_count)
|
|
continue;
|
|
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
|
|
continue;
|
|
|
|
bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
|
|
|
|
disp->CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
|
|
&bvh_states[i].push_scatter);
|
|
|
|
disp->CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
|
|
|
|
bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
|
|
}
|
|
|
|
/* Continue? */
|
|
if (++pass_idx >= keyval_bytes)
|
|
break;
|
|
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
is_even ^= true;
|
|
}
|
|
|
|
if (args->emit_markers)
|
|
device->as_build_ops->end_debug_marker(commandBuffer);
|
|
}
|
|
|
|
static VkResult
|
|
lbvh_build_internal(VkCommandBuffer commandBuffer,
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_acceleration_structure_build_args *args,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
|
|
{
|
|
VkPipeline pipeline;
|
|
VkPipelineLayout layout;
|
|
|
|
VkResult result =
|
|
get_pipeline_spv(device, meta, "lbvh_main", lbvh_main_spv,
|
|
sizeof(lbvh_main_spv),
|
|
sizeof(struct lbvh_main_args), args, &pipeline, &layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (args->emit_markers) {
|
|
device->as_build_ops->begin_debug_marker(commandBuffer,
|
|
VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL,
|
|
"lbvh_build_internal");
|
|
}
|
|
|
|
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
|
|
disp->CmdBindPipeline(
|
|
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
|
|
continue;
|
|
|
|
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
|
|
uint32_t internal_node_count = MAX2(bvh_states[i].leaf_node_count, 2) - 1;
|
|
|
|
const struct lbvh_main_args consts = {
|
|
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
|
.src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
|
|
.node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
|
|
.id_count = bvh_states[i].leaf_node_count,
|
|
.internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
|
|
};
|
|
|
|
disp->CmdPushConstants(commandBuffer, layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
|
device->cmd_dispatch_unaligned(commandBuffer, internal_node_count, 1, 1);
|
|
bvh_states[i].internal_node_count = internal_node_count;
|
|
}
|
|
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
result =
|
|
get_pipeline_spv(device, meta, "lbvh_generate_ir", lbvh_generate_ir_spv,
|
|
sizeof(lbvh_generate_ir_spv),
|
|
sizeof(struct lbvh_generate_ir_args), args, &pipeline, &layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
disp->CmdBindPipeline(
|
|
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
|
|
continue;
|
|
|
|
const struct lbvh_generate_ir_args consts = {
|
|
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
|
.node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
|
|
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
|
|
.internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
|
|
};
|
|
|
|
disp->CmdPushConstants(commandBuffer, layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
|
device->cmd_dispatch_unaligned(commandBuffer, bvh_states[i].internal_node_count, 1, 1);
|
|
}
|
|
|
|
if (args->emit_markers)
|
|
device->as_build_ops->end_debug_marker(commandBuffer);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static VkResult
|
|
ploc_build_internal(VkCommandBuffer commandBuffer,
|
|
struct vk_device *device, struct vk_meta_device *meta,
|
|
const struct vk_acceleration_structure_build_args *args,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
|
|
{
|
|
VkPipeline pipeline;
|
|
VkPipelineLayout layout;
|
|
|
|
VkResult result =
|
|
get_pipeline_spv(device, meta, "ploc", ploc_spv,
|
|
sizeof(ploc_spv),
|
|
sizeof(struct ploc_args), args, &pipeline, &layout);
|
|
|
|
if (result != VK_SUCCESS)
|
|
return result;
|
|
|
|
if (args->emit_markers) {
|
|
device->as_build_ops->begin_debug_marker(commandBuffer,
|
|
VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL,
|
|
"ploc_build_internal");
|
|
}
|
|
|
|
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
|
|
disp->CmdBindPipeline(
|
|
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
|
|
continue;
|
|
|
|
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
|
|
uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
|
|
? bvh_states[i].scratch.sort_buffer_offset[1]
|
|
: bvh_states[i].scratch.sort_buffer_offset[0];
|
|
|
|
const struct ploc_args consts = {
|
|
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
|
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
|
|
.ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
|
|
.ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset,
|
|
.prefix_scan_partitions =
|
|
pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
|
|
.internal_node_offset = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
|
|
};
|
|
|
|
disp->CmdPushConstants(commandBuffer, layout,
|
|
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
|
|
disp->CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].leaf_node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
|
|
}
|
|
|
|
if (args->emit_markers)
|
|
device->as_build_ops->end_debug_marker(commandBuffer);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
void
|
|
vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer,
|
|
struct vk_device *device,
|
|
struct vk_meta_device *meta,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
|
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
|
|
const struct vk_acceleration_structure_build_args *args)
|
|
{
|
|
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
|
|
const struct vk_acceleration_structure_build_ops *ops = device->as_build_ops;
|
|
|
|
struct bvh_batch_state batch_state = {0};
|
|
|
|
struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
|
|
|
|
if (args->emit_markers) {
|
|
device->as_build_ops->begin_debug_marker(commandBuffer,
|
|
VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP,
|
|
"vkCmdBuildAccelerationStructuresKHR(%u)",
|
|
infoCount);
|
|
}
|
|
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
uint32_t leaf_node_count = 0;
|
|
for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
|
|
leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
|
|
}
|
|
|
|
get_scratch_layout(device, leaf_node_count, pInfos + i, args, &bvh_states[i].scratch);
|
|
|
|
struct build_config config = build_config(leaf_node_count, pInfos + i,
|
|
device->as_build_ops);
|
|
bvh_states[i].config = config;
|
|
|
|
if (config.updateable)
|
|
batch_state.any_updateable = true;
|
|
else
|
|
batch_state.any_non_updateable = true;
|
|
|
|
if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC) {
|
|
batch_state.any_ploc = true;
|
|
} else if (config.internal_type == INTERNAL_BUILD_TYPE_LBVH) {
|
|
batch_state.any_lbvh = true;
|
|
} else if (config.internal_type == INTERNAL_BUILD_TYPE_UPDATE) {
|
|
batch_state.any_update = true;
|
|
/* For updates, the leaf node pass never runs, so set leaf_node_count here. */
|
|
bvh_states[i].leaf_node_count = leaf_node_count;
|
|
} else {
|
|
unreachable("Unknown internal_build_type");
|
|
}
|
|
|
|
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE) {
|
|
/* The internal node count is updated in lbvh_build_internal for LBVH
|
|
* and from the PLOC shader for PLOC. */
|
|
struct vk_ir_header header = {
|
|
.min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
|
|
.max_bounds = {0x80000000, 0x80000000, 0x80000000},
|
|
.dispatch_size_y = 1,
|
|
.dispatch_size_z = 1,
|
|
.sync_data =
|
|
{
|
|
.current_phase_end_counter = TASK_INDEX_INVALID,
|
|
/* Will be updated by the first PLOC shader invocation */
|
|
.task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
|
|
},
|
|
};
|
|
|
|
device->write_buffer_cp(commandBuffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
|
|
&header, sizeof(header));
|
|
} else {
|
|
VK_FROM_HANDLE(vk_acceleration_structure, src_as, pInfos[i].srcAccelerationStructure);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, dst_as, pInfos[i].dstAccelerationStructure);
|
|
|
|
ops->init_update_scratch(commandBuffer, pInfos[i].scratchData.deviceAddress,
|
|
leaf_node_count, src_as, dst_as);
|
|
}
|
|
}
|
|
|
|
/* Wait for the write_buffer_cp to land before using in compute shaders */
|
|
device->flush_buffer_write_cp(commandBuffer);
|
|
device->dispatch_table.CmdPipelineBarrier(commandBuffer,
|
|
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
|
0, /* dependencyFlags */
|
|
1,
|
|
&(VkMemoryBarrier) {
|
|
.srcAccessMask = 0,
|
|
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
|
}, 0, NULL, 0, NULL);
|
|
|
|
if (batch_state.any_lbvh || batch_state.any_ploc) {
|
|
VkResult result;
|
|
|
|
if (batch_state.any_non_updateable) {
|
|
result =
|
|
build_leaves(commandBuffer, device, meta, args, infoCount, pInfos,
|
|
ppBuildRangeInfos, bvh_states, false);
|
|
|
|
if (result != VK_SUCCESS) {
|
|
free(bvh_states);
|
|
vk_command_buffer_set_error(cmd_buffer, result);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (batch_state.any_updateable) {
|
|
result =
|
|
build_leaves(commandBuffer, device, meta, args, infoCount, pInfos,
|
|
ppBuildRangeInfos, bvh_states, true);
|
|
|
|
if (result != VK_SUCCESS) {
|
|
free(bvh_states);
|
|
vk_command_buffer_set_error(cmd_buffer, result);
|
|
return;
|
|
}
|
|
}
|
|
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
result =
|
|
morton_generate(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states);
|
|
|
|
if (result != VK_SUCCESS) {
|
|
free(bvh_states);
|
|
vk_command_buffer_set_error(cmd_buffer, result);
|
|
return;
|
|
}
|
|
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
morton_sort(commandBuffer, device, args, infoCount, pInfos, bvh_states);
|
|
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
if (batch_state.any_lbvh) {
|
|
result =
|
|
lbvh_build_internal(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states);
|
|
|
|
if (result != VK_SUCCESS) {
|
|
free(bvh_states);
|
|
vk_command_buffer_set_error(cmd_buffer, result);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (batch_state.any_ploc) {
|
|
result =
|
|
ploc_build_internal(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states);
|
|
|
|
if (result != VK_SUCCESS) {
|
|
vk_command_buffer_set_error(cmd_buffer, result);
|
|
return;
|
|
}
|
|
}
|
|
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
vk_barrier_compute_w_to_indirect_compute_r(commandBuffer);
|
|
}
|
|
|
|
if (args->emit_markers) {
|
|
device->as_build_ops->begin_debug_marker(commandBuffer,
|
|
VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE,
|
|
"encode");
|
|
}
|
|
|
|
for (unsigned pass = 0; pass < ARRAY_SIZE(ops->encode_as); pass++) {
|
|
if (!ops->encode_as[pass] && !ops->update_as[pass])
|
|
break;
|
|
|
|
bool progress;
|
|
do {
|
|
progress = false;
|
|
|
|
bool update;
|
|
uint32_t encode_key = 0;
|
|
for (uint32_t i = 0; i < infoCount; ++i) {
|
|
if (bvh_states[i].last_encode_pass == pass + 1)
|
|
continue;
|
|
|
|
if (!progress) {
|
|
update = (bvh_states[i].config.internal_type ==
|
|
INTERNAL_BUILD_TYPE_UPDATE);
|
|
if (update && !ops->update_as[pass])
|
|
continue;
|
|
if (!update && !ops->encode_as[pass])
|
|
continue;
|
|
encode_key = bvh_states[i].config.encode_key[pass];
|
|
progress = true;
|
|
if (update)
|
|
ops->update_bind_pipeline[pass](commandBuffer);
|
|
else
|
|
ops->encode_bind_pipeline[pass](commandBuffer, encode_key);
|
|
} else {
|
|
if (update != (bvh_states[i].config.internal_type ==
|
|
INTERNAL_BUILD_TYPE_UPDATE) ||
|
|
encode_key != bvh_states[i].config.encode_key[pass])
|
|
continue;
|
|
}
|
|
|
|
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
|
|
|
|
if (update) {
|
|
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfos[i].srcAccelerationStructure);
|
|
ops->update_as[pass](commandBuffer,
|
|
&pInfos[i],
|
|
ppBuildRangeInfos[i],
|
|
bvh_states[i].leaf_node_count,
|
|
src,
|
|
accel_struct);
|
|
|
|
} else {
|
|
ops->encode_as[pass](commandBuffer,
|
|
&pInfos[i],
|
|
ppBuildRangeInfos[i],
|
|
pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
|
|
pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
|
|
bvh_states[i].leaf_node_count,
|
|
encode_key,
|
|
accel_struct);
|
|
}
|
|
|
|
bvh_states[i].last_encode_pass = pass + 1;
|
|
}
|
|
} while (progress);
|
|
}
|
|
|
|
if (args->emit_markers)
|
|
device->as_build_ops->end_debug_marker(commandBuffer);
|
|
|
|
if (args->emit_markers)
|
|
device->as_build_ops->end_debug_marker(commandBuffer);
|
|
|
|
free(bvh_states);
|
|
}
|
|
|
|
void
|
|
vk_get_as_build_sizes(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
|
|
const uint32_t *pMaxPrimitiveCounts,
|
|
VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo,
|
|
const struct vk_acceleration_structure_build_args *args)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
|
|
uint32_t leaf_count = 0;
|
|
for (uint32_t i = 0; i < pBuildInfo->geometryCount; i++)
|
|
leaf_count += pMaxPrimitiveCounts[i];
|
|
|
|
struct scratch_layout scratch;
|
|
|
|
get_scratch_layout(device, leaf_count, pBuildInfo, args, &scratch);
|
|
|
|
pSizeInfo->accelerationStructureSize =
|
|
device->as_build_ops->get_as_size(_device, pBuildInfo, leaf_count);
|
|
pSizeInfo->updateScratchSize = scratch.update_size;
|
|
pSizeInfo->buildScratchSize = scratch.size;
|
|
}
|
|
|
|
/* Return true if the common framework supports using this format for loading
|
|
* vertices. Must match the formats handled by load_vertices() on the GPU.
|
|
*/
|
|
bool
|
|
vk_acceleration_struct_vtx_format_supported(VkFormat format)
|
|
{
|
|
switch (format) {
|
|
case VK_FORMAT_R32G32_SFLOAT:
|
|
case VK_FORMAT_R32G32B32_SFLOAT:
|
|
case VK_FORMAT_R32G32B32A32_SFLOAT:
|
|
case VK_FORMAT_R16G16_SFLOAT:
|
|
case VK_FORMAT_R16G16B16_SFLOAT:
|
|
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
|
case VK_FORMAT_R16G16_SNORM:
|
|
case VK_FORMAT_R16G16_UNORM:
|
|
case VK_FORMAT_R16G16B16A16_SNORM:
|
|
case VK_FORMAT_R16G16B16A16_UNORM:
|
|
case VK_FORMAT_R8G8_SNORM:
|
|
case VK_FORMAT_R8G8_UNORM:
|
|
case VK_FORMAT_R8G8B8A8_SNORM:
|
|
case VK_FORMAT_R8G8B8A8_UNORM:
|
|
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/* Stubs of optional functions for drivers that don't implment them. */
|
|
|
|
VKAPI_ATTR void VKAPI_CALL
|
|
vk_common_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
|
const VkDeviceAddress *pIndirectDeviceAddresses,
|
|
const uint32_t *pIndirectStrides,
|
|
const uint32_t *const *ppMaxPrimitiveCounts)
|
|
{
|
|
unreachable("Unimplemented");
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
vk_common_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
|
|
const VkAccelerationStructureKHR *pAccelerationStructures,
|
|
VkQueryType queryType,
|
|
size_t dataSize,
|
|
void *pData,
|
|
size_t stride)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
vk_common_BuildAccelerationStructuresKHR(VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
|
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
vk_common_CopyAccelerationStructureKHR(VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
const VkCopyAccelerationStructureInfoKHR *pInfo)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
vk_common_CopyMemoryToAccelerationStructureKHR(VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|
|
VKAPI_ATTR VkResult VKAPI_CALL
|
|
vk_common_CopyAccelerationStructureToMemoryKHR(VkDevice _device,
|
|
VkDeferredOperationKHR deferredOperation,
|
|
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
|
|
{
|
|
VK_FROM_HANDLE(vk_device, device, _device);
|
|
unreachable("Unimplemented");
|
|
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
|
|
}
|
|
|