mesa/src/vulkan/runtime/vk_acceleration_structure.c
Connor Abbott 3141033ac2 vk/bvh: Add default stubs for unsupported entrypoints
We don't currently support building acceleration structures on the CPU
or indirect building in the common framework, and drivers using it don't
either, but drivers have to return non-NULL entrypoints for CPU building
functions if they claim to support VK_KHR_acceleration_structure. Add
stub entrypoints here so that drivers don't have to have this
boilerplate.

Fixes dEQP-VK.api.version_check.entry_points on turnip.

Fixes: 671e3a65a6 ("tu: Support VK_KHR_acceleration_structure")
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33153>
2025-01-23 05:16:58 +00:00

1352 lines
50 KiB
C

/*
* Copyright © 2021 Bas Nieuwenhuizen
* Copyright © 2023 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "vk_acceleration_structure.h"
#include "vk_alloc.h"
#include "vk_common_entrypoints.h"
#include "vk_device.h"
#include "vk_command_buffer.h"
#include "vk_log.h"
#include "vk_meta.h"
#include "bvh/vk_build_interface.h"
#include "bvh/vk_bvh.h"
#include "radix_sort/common/vk/barrier.h"
#include "radix_sort/shaders/push.h"
#include "util/u_string.h"
static const uint32_t leaf_spv[] = {
#include "bvh/leaf.spv.h"
};
static const uint32_t leaf_always_active_spv[] = {
#include "bvh/leaf_always_active.spv.h"
};
static const uint32_t morton_spv[] = {
#include "bvh/morton.spv.h"
};
static const uint32_t lbvh_main_spv[] = {
#include "bvh/lbvh_main.spv.h"
};
static const uint32_t lbvh_generate_ir_spv[] = {
#include "bvh/lbvh_generate_ir.spv.h"
};
static const uint32_t ploc_spv[] = {
#include "bvh/ploc_internal.spv.h"
};
VkDeviceAddress
vk_acceleration_structure_get_va(struct vk_acceleration_structure *accel_struct)
{
VkBufferDeviceAddressInfo info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
.buffer = accel_struct->buffer,
};
VkDeviceAddress base_addr = accel_struct->base.device->dispatch_table.GetBufferDeviceAddress(
vk_device_to_handle(accel_struct->base.device), &info);
return base_addr + accel_struct->offset;
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CreateAccelerationStructureKHR(VkDevice _device,
const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkAccelerationStructureKHR *pAccelerationStructure)
{
VK_FROM_HANDLE(vk_device, device, _device);
struct vk_acceleration_structure *accel_struct = vk_object_alloc(
device, pAllocator, sizeof(struct vk_acceleration_structure),
VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR);
if (!accel_struct)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
accel_struct->buffer = pCreateInfo->buffer;
accel_struct->offset = pCreateInfo->offset;
accel_struct->size = pCreateInfo->size;
if (pCreateInfo->deviceAddress &&
vk_acceleration_structure_get_va(accel_struct) != pCreateInfo->deviceAddress)
return vk_error(device, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS);
*pAccelerationStructure = vk_acceleration_structure_to_handle(accel_struct);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
vk_common_DestroyAccelerationStructureKHR(VkDevice _device,
VkAccelerationStructureKHR accelerationStructure,
const VkAllocationCallbacks *pAllocator)
{
VK_FROM_HANDLE(vk_device, device, _device);
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure);
if (!accel_struct)
return;
vk_object_free(device, pAllocator, accel_struct);
}
VKAPI_ATTR VkDeviceAddress VKAPI_CALL
vk_common_GetAccelerationStructureDeviceAddressKHR(
VkDevice _device, const VkAccelerationStructureDeviceAddressInfoKHR *pInfo)
{
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfo->accelerationStructure);
return vk_acceleration_structure_get_va(accel_struct);
}
#define KEY_ID_PAIR_SIZE 8
#define MORTON_BIT_SIZE 24
enum internal_build_type {
INTERNAL_BUILD_TYPE_LBVH,
INTERNAL_BUILD_TYPE_PLOC,
INTERNAL_BUILD_TYPE_UPDATE,
};
struct build_config {
enum internal_build_type internal_type;
bool updateable;
uint32_t encode_key[MAX_ENCODE_PASSES];
};
struct scratch_layout {
uint32_t size;
uint32_t update_size;
uint32_t header_offset;
/* Used for BUILD only. */
uint32_t sort_buffer_offset[2];
uint32_t sort_internal_offset;
uint32_t ploc_prefix_sum_partition_offset;
uint32_t lbvh_node_offset;
uint32_t ir_offset;
uint32_t internal_node_offset;
};
static struct build_config
build_config(uint32_t leaf_count,
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
const struct vk_acceleration_structure_build_ops *ops)
{
struct build_config config = {0};
if (leaf_count <= 4)
config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
else if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR)
config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
else if (!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR) &&
!(build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR))
config.internal_type = INTERNAL_BUILD_TYPE_PLOC;
else
config.internal_type = INTERNAL_BUILD_TYPE_LBVH;
if (build_info->mode == VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR &&
build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
ops->update_as[0])
config.internal_type = INTERNAL_BUILD_TYPE_UPDATE;
if ((build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR) &&
build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
ops->update_as[0])
config.updateable = true;
for (unsigned i = 0; i < ARRAY_SIZE(config.encode_key); i++) {
if (!ops->get_encode_key[i])
break;
config.encode_key[i] = ops->get_encode_key[i](leaf_count, build_info->flags);
}
return config;
}
static void
get_scratch_layout(struct vk_device *device,
uint32_t leaf_count,
const VkAccelerationStructureBuildGeometryInfoKHR *build_info,
const struct vk_acceleration_structure_build_args *args,
struct scratch_layout *scratch)
{
uint32_t internal_count = MAX2(leaf_count, 2) - 1;
radix_sort_vk_memory_requirements_t requirements = {
0,
};
radix_sort_vk_get_memory_requirements(args->radix_sort, leaf_count,
&requirements);
uint32_t ir_leaf_size;
switch (vk_get_as_geometry_type(build_info)) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
ir_leaf_size = sizeof(struct vk_ir_triangle_node);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
ir_leaf_size = sizeof(struct vk_ir_aabb_node);
break;
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
ir_leaf_size = sizeof(struct vk_ir_instance_node);
break;
default:
unreachable("Unknown VkGeometryTypeKHR");
}
uint32_t offset = 0;
uint32_t ploc_scratch_space = 0;
uint32_t lbvh_node_space = 0;
struct build_config config = build_config(leaf_count, build_info,
device->as_build_ops);
if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC)
ploc_scratch_space = DIV_ROUND_UP(leaf_count, PLOC_WORKGROUP_SIZE) * sizeof(struct ploc_prefix_scan_partition);
else
lbvh_node_space = sizeof(struct lbvh_node_info) * internal_count;
scratch->header_offset = offset;
offset += sizeof(struct vk_ir_header);
scratch->sort_buffer_offset[0] = offset;
offset += requirements.keyvals_size;
scratch->sort_buffer_offset[1] = offset;
offset += requirements.keyvals_size;
scratch->sort_internal_offset = offset;
/* Internal sorting data is not needed when PLOC/LBVH are invoked,
* save space by aliasing them */
scratch->ploc_prefix_sum_partition_offset = offset;
scratch->lbvh_node_offset = offset;
offset += MAX3(requirements.internal_size, ploc_scratch_space, lbvh_node_space);
scratch->ir_offset = offset;
offset += ir_leaf_size * leaf_count;
scratch->internal_node_offset = offset;
offset += sizeof(struct vk_ir_box_node) * internal_count;
scratch->size = offset;
if (build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR &&
device->as_build_ops->update_as[0]) {
scratch->update_size =
device->as_build_ops->get_update_scratch_size(device, leaf_count);
} else {
scratch->update_size = offset;
}
}
struct bvh_state {
uint32_t scratch_offset;
uint32_t leaf_node_count;
uint32_t internal_node_count;
uint32_t leaf_node_size;
struct scratch_layout scratch;
struct build_config config;
/* Radix sort state */
uint32_t scatter_blocks;
uint32_t count_ru_scatter;
uint32_t histo_blocks;
uint32_t count_ru_histo;
struct rs_push_scatter push_scatter;
uint32_t last_encode_pass;
};
struct bvh_batch_state {
bool any_updateable;
bool any_non_updateable;
bool any_ploc;
bool any_lbvh;
bool any_update;
};
static VkResult
get_pipeline_spv(struct vk_device *device, struct vk_meta_device *meta,
const char *name, const uint32_t *spv, uint32_t spv_size,
unsigned push_constant_size,
const struct vk_acceleration_structure_build_args *args,
VkPipeline *pipeline, VkPipelineLayout *layout)
{
size_t key_size = strlen(name);
VkResult result = vk_meta_get_pipeline_layout(
device, meta, NULL,
&(VkPushConstantRange){
VK_SHADER_STAGE_COMPUTE_BIT, 0, push_constant_size
},
name, key_size, layout);
if (result != VK_SUCCESS)
return result;
VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(meta, name, key_size);
if (pipeline_from_cache != VK_NULL_HANDLE) {
*pipeline = pipeline_from_cache;
return VK_SUCCESS;
}
VkShaderModuleCreateInfo module_info = {
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
.pNext = NULL,
.flags = 0,
.codeSize = spv_size,
.pCode = spv,
};
VkSpecializationMapEntry spec_map[2] = {
{
.constantID = SUBGROUP_SIZE_ID,
.offset = 0,
.size = sizeof(args->subgroup_size),
},
{
.constantID = BVH_BOUNDS_OFFSET_ID,
.offset = sizeof(args->subgroup_size),
.size = sizeof(args->bvh_bounds_offset),
},
};
uint32_t spec_constants[2] = {
args->subgroup_size,
args->bvh_bounds_offset
};
VkSpecializationInfo spec_info = {
.mapEntryCount = ARRAY_SIZE(spec_map),
.pMapEntries = spec_map,
.dataSize = sizeof(spec_constants),
.pData = spec_constants,
};
VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT rssci = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = &module_info,
.requiredSubgroupSize = args->subgroup_size,
};
VkPipelineShaderStageCreateInfo shader_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = &rssci,
.flags = VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.pName = "main",
.pSpecializationInfo = &spec_info,
};
VkComputePipelineCreateInfo pipeline_info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = shader_stage,
.flags = 0,
.layout = *layout,
};
return vk_meta_create_compute_pipeline(device, meta, &pipeline_info,
name, key_size, pipeline);
}
static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
{
uint32_t geometry_id_and_flags = geometry_id;
if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
geometry_id_and_flags |= VK_GEOMETRY_OPAQUE;
return geometry_id_and_flags;
}
struct vk_bvh_geometry_data
vk_fill_geometry_data(VkAccelerationStructureTypeKHR type, uint32_t first_id, uint32_t geom_index,
const VkAccelerationStructureGeometryKHR *geometry,
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info)
{
struct vk_bvh_geometry_data data = {
.first_id = first_id,
.geometry_id = pack_geometry_id_and_flags(geom_index, geometry->flags),
.geometry_type = geometry->geometryType,
};
switch (geometry->geometryType) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
data.data = geometry->geometry.triangles.vertexData.deviceAddress +
build_range_info->firstVertex * geometry->geometry.triangles.vertexStride;
data.indices = geometry->geometry.triangles.indexData.deviceAddress;
if (geometry->geometry.triangles.indexType == VK_INDEX_TYPE_NONE_KHR)
data.data += build_range_info->primitiveOffset;
else
data.indices += build_range_info->primitiveOffset;
data.transform = geometry->geometry.triangles.transformData.deviceAddress;
if (data.transform)
data.transform += build_range_info->transformOffset;
data.stride = geometry->geometry.triangles.vertexStride;
data.vertex_format = geometry->geometry.triangles.vertexFormat;
data.index_format = geometry->geometry.triangles.indexType;
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
assert(type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR);
data.data = geometry->geometry.aabbs.data.deviceAddress + build_range_info->primitiveOffset;
data.stride = geometry->geometry.aabbs.stride;
break;
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
assert(type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
data.data = geometry->geometry.instances.data.deviceAddress + build_range_info->primitiveOffset;
if (geometry->geometry.instances.arrayOfPointers)
data.stride = 8;
else
data.stride = sizeof(VkAccelerationStructureInstanceKHR);
break;
default:
unreachable("Unknown geometryType");
}
return data;
}
void
vk_accel_struct_cmd_begin_debug_marker(VkCommandBuffer commandBuffer,
enum vk_acceleration_structure_build_step step,
const char *format, ...)
{
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
struct vk_device *device = cmd_buffer->base.device;
va_list ap;
va_start(ap, format);
char *name;
if (vasprintf(&name, format, ap) == -1) {
va_end(ap);
return;
}
va_end(ap);
VkDebugMarkerMarkerInfoEXT marker = {
.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT,
.pMarkerName = name,
};
device->dispatch_table.CmdDebugMarkerBeginEXT(commandBuffer, &marker);
}
void
vk_accel_struct_cmd_end_debug_marker(VkCommandBuffer commandBuffer)
{
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
struct vk_device *device = cmd_buffer->base.device;
device->dispatch_table.CmdDebugMarkerEndEXT(commandBuffer);
}
static VkResult
build_leaves(VkCommandBuffer commandBuffer,
struct vk_device *device, struct vk_meta_device *meta,
const struct vk_acceleration_structure_build_args *args,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
struct bvh_state *bvh_states,
bool updateable)
{
VkPipeline pipeline;
VkPipelineLayout layout;
/* Many apps are broken and will make inactive primitives active when
* updating, even though this is disallowed by the spec. To handle this,
* we use a different variant for updateable acceleration structures when
* the driver implements an update pass. This passes through inactive leaf
* nodes as if they were active, with an empty bounding box. It's then the
* driver or HW's responsibility to filter out inactive nodes.
*/
VkResult result;
if (updateable) {
result = get_pipeline_spv(device, meta, "leaves_always_active",
leaf_always_active_spv,
sizeof(leaf_always_active_spv),
sizeof(struct leaf_args), args, &pipeline, &layout);
} else {
result = get_pipeline_spv(device, meta, "leaves", leaf_spv, sizeof(leaf_spv),
sizeof(struct leaf_args), args, &pipeline, &layout);
}
if (result != VK_SUCCESS)
return result;
if (args->emit_markers) {
device->as_build_ops->begin_debug_marker(commandBuffer,
VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES,
"build_leaves");
}
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
disp->CmdBindPipeline(
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
continue;
if (bvh_states[i].config.updateable != updateable)
continue;
struct leaf_args leaf_consts = {
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
.ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
};
for (unsigned j = 0; j < pInfos[i].geometryCount; ++j) {
const VkAccelerationStructureGeometryKHR *geom =
pInfos[i].pGeometries ? &pInfos[i].pGeometries[j] : pInfos[i].ppGeometries[j];
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &ppBuildRangeInfos[i][j];
if (build_range_info->primitiveCount == 0)
continue;
leaf_consts.geom_data = vk_fill_geometry_data(pInfos[i].type, bvh_states[i].leaf_node_count, j, geom, build_range_info);
disp->CmdPushConstants(commandBuffer, layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(leaf_consts), &leaf_consts);
device->cmd_dispatch_unaligned(commandBuffer, build_range_info->primitiveCount, 1, 1);
bvh_states[i].leaf_node_count += build_range_info->primitiveCount;
}
}
if (args->emit_markers)
device->as_build_ops->end_debug_marker(commandBuffer);
return VK_SUCCESS;
}
static VkResult
morton_generate(VkCommandBuffer commandBuffer, struct vk_device *device,
struct vk_meta_device *meta,
const struct vk_acceleration_structure_build_args *args,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
struct bvh_state *bvh_states)
{
VkPipeline pipeline;
VkPipelineLayout layout;
VkResult result =
get_pipeline_spv(device, meta, "morton", morton_spv, sizeof(morton_spv),
sizeof(struct morton_args), args, &pipeline, &layout);
if (result != VK_SUCCESS)
return result;
if (args->emit_markers) {
device->as_build_ops->begin_debug_marker(commandBuffer,
VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE,
"morton_generate");
}
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
disp->CmdBindPipeline(
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
continue;
const struct morton_args consts = {
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
.ids = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0],
};
disp->CmdPushConstants(commandBuffer, layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
device->cmd_dispatch_unaligned(commandBuffer, bvh_states[i].leaf_node_count, 1, 1);
}
if (args->emit_markers)
device->as_build_ops->end_debug_marker(commandBuffer);
return VK_SUCCESS;
}
static void
morton_sort(VkCommandBuffer commandBuffer, struct vk_device *device,
const struct vk_acceleration_structure_build_args *args,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
{
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
if (args->emit_markers) {
device->as_build_ops->begin_debug_marker(commandBuffer,
VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT,
"morton_sort");
}
/* Copyright 2019 The Fuchsia Authors. */
const radix_sort_vk_t *rs = args->radix_sort;
/*
* OVERVIEW
*
* 1. Pad the keyvals in `scatter_even`.
* 2. Zero the `histograms` and `partitions`.
* --- BARRIER ---
* 3. HISTOGRAM is dispatched before PREFIX.
* --- BARRIER ---
* 4. PREFIX is dispatched before the first SCATTER.
* --- BARRIER ---
* 5. One or more SCATTER dispatches.
*
* Note that the `partitions` buffer can be zeroed anytime before the first
* scatter.
*/
/* How many passes? */
uint32_t keyval_bytes = rs->config.keyval_dwords * (uint32_t)sizeof(uint32_t);
uint32_t keyval_bits = keyval_bytes * 8;
uint32_t key_bits = MIN2(MORTON_BIT_SIZE, keyval_bits);
uint32_t passes = (key_bits + RS_RADIX_LOG2 - 1) / RS_RADIX_LOG2;
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].leaf_node_count)
bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[passes & 1];
else
bvh_states[i].scratch_offset = bvh_states[i].scratch.sort_buffer_offset[0];
}
/*
* PAD KEYVALS AND ZERO HISTOGRAM/PARTITIONS
*
* Pad fractional blocks with max-valued keyvals.
*
* Zero the histograms and partitions buffer.
*
* This assumes the partitions follow the histograms.
*/
/* FIXME(allanmac): Consider precomputing some of these values and hang them off `rs`. */
/* How many scatter blocks? */
uint32_t scatter_wg_size = 1 << rs->config.scatter.workgroup_size_log2;
uint32_t scatter_block_kvs = scatter_wg_size * rs->config.scatter.block_rows;
/*
* How many histogram blocks?
*
* Note that it's OK to have more max-valued digits counted by the histogram
* than sorted by the scatters because the sort is stable.
*/
uint32_t histo_wg_size = 1 << rs->config.histogram.workgroup_size_log2;
uint32_t histo_block_kvs = histo_wg_size * rs->config.histogram.block_rows;
uint32_t pass_idx = (keyval_bytes - passes);
for (uint32_t i = 0; i < infoCount; ++i) {
if (!bvh_states[i].leaf_node_count)
continue;
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
continue;
uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
bvh_states[i].scatter_blocks = (bvh_states[i].leaf_node_count + scatter_block_kvs - 1) / scatter_block_kvs;
bvh_states[i].count_ru_scatter = bvh_states[i].scatter_blocks * scatter_block_kvs;
bvh_states[i].histo_blocks = (bvh_states[i].count_ru_scatter + histo_block_kvs - 1) / histo_block_kvs;
bvh_states[i].count_ru_histo = bvh_states[i].histo_blocks * histo_block_kvs;
/* Fill with max values */
if (bvh_states[i].count_ru_histo > bvh_states[i].leaf_node_count) {
device->cmd_fill_buffer_addr(commandBuffer, keyvals_even_addr +
bvh_states[i].leaf_node_count * keyval_bytes,
(bvh_states[i].count_ru_histo - bvh_states[i].leaf_node_count) * keyval_bytes,
0xFFFFFFFF);
}
/*
* Zero histograms and invalidate partitions.
*
* Note that the partition invalidation only needs to be performed once
* because the even/odd scatter dispatches rely on the the previous pass to
* leave the partitions in an invalid state.
*
* Note that the last workgroup doesn't read/write a partition so it doesn't
* need to be initialized.
*/
uint32_t histo_partition_count = passes + bvh_states[i].scatter_blocks - 1;
uint32_t fill_base = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
device->cmd_fill_buffer_addr(commandBuffer,
internal_addr + rs->internal.histograms.offset + fill_base,
histo_partition_count * (RS_RADIX_SIZE * sizeof(uint32_t)) + keyval_bytes * sizeof(uint32_t), 0);
}
/*
* Pipeline: HISTOGRAM
*
* TODO(allanmac): All subgroups should try to process approximately the same
* number of blocks in order to minimize tail effects. This was implemented
* and reverted but should be reimplemented and benchmarked later.
*/
vk_barrier_transfer_w_to_compute_r(commandBuffer);
disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
rs->pipelines.named.histogram);
for (uint32_t i = 0; i < infoCount; ++i) {
if (!bvh_states[i].leaf_node_count)
continue;
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
continue;
uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
/* Dispatch histogram */
struct rs_push_histogram push_histogram = {
.devaddr_histograms = internal_addr + rs->internal.histograms.offset,
.devaddr_keyvals = keyvals_even_addr,
.passes = passes,
};
disp->CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.histogram, VK_SHADER_STAGE_COMPUTE_BIT, 0,
sizeof(push_histogram), &push_histogram);
disp->CmdDispatch(commandBuffer, bvh_states[i].histo_blocks, 1, 1);
}
/*
* Pipeline: PREFIX
*
* Launch one workgroup per pass.
*/
vk_barrier_compute_w_to_compute_r(commandBuffer);
disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
rs->pipelines.named.prefix);
for (uint32_t i = 0; i < infoCount; ++i) {
if (!bvh_states[i].leaf_node_count)
continue;
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
continue;
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
struct rs_push_prefix push_prefix = {
.devaddr_histograms = internal_addr + rs->internal.histograms.offset,
};
disp->CmdPushConstants(commandBuffer, rs->pipeline_layouts.named.prefix, VK_SHADER_STAGE_COMPUTE_BIT, 0,
sizeof(push_prefix), &push_prefix);
disp->CmdDispatch(commandBuffer, passes, 1, 1);
}
/* Pipeline: SCATTER */
vk_barrier_compute_w_to_compute_r(commandBuffer);
uint32_t histogram_offset = pass_idx * (RS_RADIX_SIZE * sizeof(uint32_t));
for (uint32_t i = 0; i < infoCount; i++) {
uint64_t keyvals_even_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[0];
uint64_t keyvals_odd_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_buffer_offset[1];
uint64_t internal_addr = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.sort_internal_offset;
bvh_states[i].push_scatter = (struct rs_push_scatter){
.devaddr_keyvals_even = keyvals_even_addr,
.devaddr_keyvals_odd = keyvals_odd_addr,
.devaddr_partitions = internal_addr + rs->internal.partitions.offset,
.devaddr_histograms = internal_addr + rs->internal.histograms.offset + histogram_offset,
};
}
bool is_even = true;
while (true) {
uint32_t pass_dword = pass_idx / 4;
/* Bind new pipeline */
VkPipeline p =
is_even ? rs->pipelines.named.scatter[pass_dword].even : rs->pipelines.named.scatter[pass_dword].odd;
disp->CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, p);
/* Update push constants that changed */
VkPipelineLayout pl = is_even ? rs->pipeline_layouts.named.scatter[pass_dword].even
: rs->pipeline_layouts.named.scatter[pass_dword].odd;
for (uint32_t i = 0; i < infoCount; i++) {
if (!bvh_states[i].leaf_node_count)
continue;
if (bvh_states[i].config.internal_type == INTERNAL_BUILD_TYPE_UPDATE)
continue;
bvh_states[i].push_scatter.pass_offset = (pass_idx & 3) * RS_RADIX_LOG2;
disp->CmdPushConstants(commandBuffer, pl, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct rs_push_scatter),
&bvh_states[i].push_scatter);
disp->CmdDispatch(commandBuffer, bvh_states[i].scatter_blocks, 1, 1);
bvh_states[i].push_scatter.devaddr_histograms += (RS_RADIX_SIZE * sizeof(uint32_t));
}
/* Continue? */
if (++pass_idx >= keyval_bytes)
break;
vk_barrier_compute_w_to_compute_r(commandBuffer);
is_even ^= true;
}
if (args->emit_markers)
device->as_build_ops->end_debug_marker(commandBuffer);
}
static VkResult
lbvh_build_internal(VkCommandBuffer commandBuffer,
struct vk_device *device, struct vk_meta_device *meta,
const struct vk_acceleration_structure_build_args *args,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
{
VkPipeline pipeline;
VkPipelineLayout layout;
VkResult result =
get_pipeline_spv(device, meta, "lbvh_main", lbvh_main_spv,
sizeof(lbvh_main_spv),
sizeof(struct lbvh_main_args), args, &pipeline, &layout);
if (result != VK_SUCCESS)
return result;
if (args->emit_markers) {
device->as_build_ops->begin_debug_marker(commandBuffer,
VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL,
"lbvh_build_internal");
}
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
disp->CmdBindPipeline(
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
continue;
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
uint32_t internal_node_count = MAX2(bvh_states[i].leaf_node_count, 2) - 1;
const struct lbvh_main_args consts = {
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.src_ids = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
.node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
.id_count = bvh_states[i].leaf_node_count,
.internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
};
disp->CmdPushConstants(commandBuffer, layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
device->cmd_dispatch_unaligned(commandBuffer, internal_node_count, 1, 1);
bvh_states[i].internal_node_count = internal_node_count;
}
vk_barrier_compute_w_to_compute_r(commandBuffer);
result =
get_pipeline_spv(device, meta, "lbvh_generate_ir", lbvh_generate_ir_spv,
sizeof(lbvh_generate_ir_spv),
sizeof(struct lbvh_generate_ir_args), args, &pipeline, &layout);
if (result != VK_SUCCESS)
return result;
disp->CmdBindPipeline(
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_LBVH)
continue;
const struct lbvh_generate_ir_args consts = {
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.node_info = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.lbvh_node_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
.internal_node_base = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
};
disp->CmdPushConstants(commandBuffer, layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
device->cmd_dispatch_unaligned(commandBuffer, bvh_states[i].internal_node_count, 1, 1);
}
if (args->emit_markers)
device->as_build_ops->end_debug_marker(commandBuffer);
return VK_SUCCESS;
}
static VkResult
ploc_build_internal(VkCommandBuffer commandBuffer,
struct vk_device *device, struct vk_meta_device *meta,
const struct vk_acceleration_structure_build_args *args,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, struct bvh_state *bvh_states)
{
VkPipeline pipeline;
VkPipelineLayout layout;
VkResult result =
get_pipeline_spv(device, meta, "ploc", ploc_spv,
sizeof(ploc_spv),
sizeof(struct ploc_args), args, &pipeline, &layout);
if (result != VK_SUCCESS)
return result;
if (args->emit_markers) {
device->as_build_ops->begin_debug_marker(commandBuffer,
VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL,
"ploc_build_internal");
}
const struct vk_device_dispatch_table *disp = &device->dispatch_table;
disp->CmdBindPipeline(
commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_PLOC)
continue;
uint32_t src_scratch_offset = bvh_states[i].scratch_offset;
uint32_t dst_scratch_offset = (src_scratch_offset == bvh_states[i].scratch.sort_buffer_offset[0])
? bvh_states[i].scratch.sort_buffer_offset[1]
: bvh_states[i].scratch.sort_buffer_offset[0];
const struct ploc_args consts = {
.bvh = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
.header = pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
.ids_0 = pInfos[i].scratchData.deviceAddress + src_scratch_offset,
.ids_1 = pInfos[i].scratchData.deviceAddress + dst_scratch_offset,
.prefix_scan_partitions =
pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ploc_prefix_sum_partition_offset,
.internal_node_offset = bvh_states[i].scratch.internal_node_offset - bvh_states[i].scratch.ir_offset,
};
disp->CmdPushConstants(commandBuffer, layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(consts), &consts);
disp->CmdDispatch(commandBuffer, MAX2(DIV_ROUND_UP(bvh_states[i].leaf_node_count, PLOC_WORKGROUP_SIZE), 1), 1, 1);
}
if (args->emit_markers)
device->as_build_ops->end_debug_marker(commandBuffer);
return VK_SUCCESS;
}
void
vk_cmd_build_acceleration_structures(VkCommandBuffer commandBuffer,
struct vk_device *device,
struct vk_meta_device *meta,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos,
const struct vk_acceleration_structure_build_args *args)
{
VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, commandBuffer);
const struct vk_acceleration_structure_build_ops *ops = device->as_build_ops;
struct bvh_batch_state batch_state = {0};
struct bvh_state *bvh_states = calloc(infoCount, sizeof(struct bvh_state));
if (args->emit_markers) {
device->as_build_ops->begin_debug_marker(commandBuffer,
VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP,
"vkCmdBuildAccelerationStructuresKHR(%u)",
infoCount);
}
for (uint32_t i = 0; i < infoCount; ++i) {
uint32_t leaf_node_count = 0;
for (uint32_t j = 0; j < pInfos[i].geometryCount; ++j) {
leaf_node_count += ppBuildRangeInfos[i][j].primitiveCount;
}
get_scratch_layout(device, leaf_node_count, pInfos + i, args, &bvh_states[i].scratch);
struct build_config config = build_config(leaf_node_count, pInfos + i,
device->as_build_ops);
bvh_states[i].config = config;
if (config.updateable)
batch_state.any_updateable = true;
else
batch_state.any_non_updateable = true;
if (config.internal_type == INTERNAL_BUILD_TYPE_PLOC) {
batch_state.any_ploc = true;
} else if (config.internal_type == INTERNAL_BUILD_TYPE_LBVH) {
batch_state.any_lbvh = true;
} else if (config.internal_type == INTERNAL_BUILD_TYPE_UPDATE) {
batch_state.any_update = true;
/* For updates, the leaf node pass never runs, so set leaf_node_count here. */
bvh_states[i].leaf_node_count = leaf_node_count;
} else {
unreachable("Unknown internal_build_type");
}
if (bvh_states[i].config.internal_type != INTERNAL_BUILD_TYPE_UPDATE) {
/* The internal node count is updated in lbvh_build_internal for LBVH
* and from the PLOC shader for PLOC. */
struct vk_ir_header header = {
.min_bounds = {0x7fffffff, 0x7fffffff, 0x7fffffff},
.max_bounds = {0x80000000, 0x80000000, 0x80000000},
.dispatch_size_y = 1,
.dispatch_size_z = 1,
.sync_data =
{
.current_phase_end_counter = TASK_INDEX_INVALID,
/* Will be updated by the first PLOC shader invocation */
.task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
},
};
device->write_buffer_cp(commandBuffer, pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
&header, sizeof(header));
} else {
VK_FROM_HANDLE(vk_acceleration_structure, src_as, pInfos[i].srcAccelerationStructure);
VK_FROM_HANDLE(vk_acceleration_structure, dst_as, pInfos[i].dstAccelerationStructure);
ops->init_update_scratch(commandBuffer, pInfos[i].scratchData.deviceAddress,
leaf_node_count, src_as, dst_as);
}
}
/* Wait for the write_buffer_cp to land before using in compute shaders */
device->flush_buffer_write_cp(commandBuffer);
device->dispatch_table.CmdPipelineBarrier(commandBuffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
0, /* dependencyFlags */
1,
&(VkMemoryBarrier) {
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
}, 0, NULL, 0, NULL);
if (batch_state.any_lbvh || batch_state.any_ploc) {
VkResult result;
if (batch_state.any_non_updateable) {
result =
build_leaves(commandBuffer, device, meta, args, infoCount, pInfos,
ppBuildRangeInfos, bvh_states, false);
if (result != VK_SUCCESS) {
free(bvh_states);
vk_command_buffer_set_error(cmd_buffer, result);
return;
}
}
if (batch_state.any_updateable) {
result =
build_leaves(commandBuffer, device, meta, args, infoCount, pInfos,
ppBuildRangeInfos, bvh_states, true);
if (result != VK_SUCCESS) {
free(bvh_states);
vk_command_buffer_set_error(cmd_buffer, result);
return;
}
}
vk_barrier_compute_w_to_compute_r(commandBuffer);
result =
morton_generate(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states);
if (result != VK_SUCCESS) {
free(bvh_states);
vk_command_buffer_set_error(cmd_buffer, result);
return;
}
vk_barrier_compute_w_to_compute_r(commandBuffer);
morton_sort(commandBuffer, device, args, infoCount, pInfos, bvh_states);
vk_barrier_compute_w_to_compute_r(commandBuffer);
if (batch_state.any_lbvh) {
result =
lbvh_build_internal(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states);
if (result != VK_SUCCESS) {
free(bvh_states);
vk_command_buffer_set_error(cmd_buffer, result);
return;
}
}
if (batch_state.any_ploc) {
result =
ploc_build_internal(commandBuffer, device, meta, args, infoCount, pInfos, bvh_states);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(cmd_buffer, result);
return;
}
}
vk_barrier_compute_w_to_compute_r(commandBuffer);
vk_barrier_compute_w_to_indirect_compute_r(commandBuffer);
}
if (args->emit_markers) {
device->as_build_ops->begin_debug_marker(commandBuffer,
VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE,
"encode");
}
for (unsigned pass = 0; pass < ARRAY_SIZE(ops->encode_as); pass++) {
if (!ops->encode_as[pass] && !ops->update_as[pass])
break;
bool progress;
do {
progress = false;
bool update;
uint32_t encode_key = 0;
for (uint32_t i = 0; i < infoCount; ++i) {
if (bvh_states[i].last_encode_pass == pass + 1)
continue;
if (!progress) {
update = (bvh_states[i].config.internal_type ==
INTERNAL_BUILD_TYPE_UPDATE);
if (update && !ops->update_as[pass])
continue;
if (!update && !ops->encode_as[pass])
continue;
encode_key = bvh_states[i].config.encode_key[pass];
progress = true;
if (update)
ops->update_bind_pipeline[pass](commandBuffer);
else
ops->encode_bind_pipeline[pass](commandBuffer, encode_key);
} else {
if (update != (bvh_states[i].config.internal_type ==
INTERNAL_BUILD_TYPE_UPDATE) ||
encode_key != bvh_states[i].config.encode_key[pass])
continue;
}
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, pInfos[i].dstAccelerationStructure);
if (update) {
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfos[i].srcAccelerationStructure);
ops->update_as[pass](commandBuffer,
&pInfos[i],
ppBuildRangeInfos[i],
bvh_states[i].leaf_node_count,
src,
accel_struct);
} else {
ops->encode_as[pass](commandBuffer,
&pInfos[i],
ppBuildRangeInfos[i],
pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.ir_offset,
pInfos[i].scratchData.deviceAddress + bvh_states[i].scratch.header_offset,
bvh_states[i].leaf_node_count,
encode_key,
accel_struct);
}
bvh_states[i].last_encode_pass = pass + 1;
}
} while (progress);
}
if (args->emit_markers)
device->as_build_ops->end_debug_marker(commandBuffer);
if (args->emit_markers)
device->as_build_ops->end_debug_marker(commandBuffer);
free(bvh_states);
}
void
vk_get_as_build_sizes(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
const uint32_t *pMaxPrimitiveCounts,
VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo,
const struct vk_acceleration_structure_build_args *args)
{
VK_FROM_HANDLE(vk_device, device, _device);
uint32_t leaf_count = 0;
for (uint32_t i = 0; i < pBuildInfo->geometryCount; i++)
leaf_count += pMaxPrimitiveCounts[i];
struct scratch_layout scratch;
get_scratch_layout(device, leaf_count, pBuildInfo, args, &scratch);
pSizeInfo->accelerationStructureSize =
device->as_build_ops->get_as_size(_device, pBuildInfo, leaf_count);
pSizeInfo->updateScratchSize = scratch.update_size;
pSizeInfo->buildScratchSize = scratch.size;
}
/* Return true if the common framework supports using this format for loading
* vertices. Must match the formats handled by load_vertices() on the GPU.
*/
bool
vk_acceleration_struct_vtx_format_supported(VkFormat format)
{
switch (format) {
case VK_FORMAT_R32G32_SFLOAT:
case VK_FORMAT_R32G32B32_SFLOAT:
case VK_FORMAT_R32G32B32A32_SFLOAT:
case VK_FORMAT_R16G16_SFLOAT:
case VK_FORMAT_R16G16B16_SFLOAT:
case VK_FORMAT_R16G16B16A16_SFLOAT:
case VK_FORMAT_R16G16_SNORM:
case VK_FORMAT_R16G16_UNORM:
case VK_FORMAT_R16G16B16A16_SNORM:
case VK_FORMAT_R16G16B16A16_UNORM:
case VK_FORMAT_R8G8_SNORM:
case VK_FORMAT_R8G8_UNORM:
case VK_FORMAT_R8G8B8A8_SNORM:
case VK_FORMAT_R8G8B8A8_UNORM:
case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
return true;
default:
return false;
}
}
/* Stubs of optional functions for drivers that don't implment them. */
VKAPI_ATTR void VKAPI_CALL
vk_common_CmdBuildAccelerationStructuresIndirectKHR(VkCommandBuffer commandBuffer,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkDeviceAddress *pIndirectDeviceAddresses,
const uint32_t *pIndirectStrides,
const uint32_t *const *ppMaxPrimitiveCounts)
{
unreachable("Unimplemented");
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_WriteAccelerationStructuresPropertiesKHR(VkDevice _device, uint32_t accelerationStructureCount,
const VkAccelerationStructureKHR *pAccelerationStructures,
VkQueryType queryType,
size_t dataSize,
void *pData,
size_t stride)
{
VK_FROM_HANDLE(vk_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_BuildAccelerationStructuresKHR(VkDevice _device,
VkDeferredOperationKHR deferredOperation,
uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
VK_FROM_HANDLE(vk_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CopyAccelerationStructureKHR(VkDevice _device,
VkDeferredOperationKHR deferredOperation,
const VkCopyAccelerationStructureInfoKHR *pInfo)
{
VK_FROM_HANDLE(vk_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CopyMemoryToAccelerationStructureKHR(VkDevice _device,
VkDeferredOperationKHR deferredOperation,
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
{
VK_FROM_HANDLE(vk_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}
VKAPI_ATTR VkResult VKAPI_CALL
vk_common_CopyAccelerationStructureToMemoryKHR(VkDevice _device,
VkDeferredOperationKHR deferredOperation,
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
{
VK_FROM_HANDLE(vk_device, device, _device);
unreachable("Unimplemented");
return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
}