mesa/src/amd/vulkan/radv_acceleration_structure.c
Konstantin Seurer 405c93c665
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run
radv: Optimize BVH4 acceleration structure updates
It is more efficient to compute the child index of the current node
inside the parent node and write the bounds when available. The previous
code could load up to 16 AABBs to compute the new ones. The new code
also only needs 1/7 of the previously used scratch memory. The new code
seems to be around 30% faster (0.5ms) in GOTG on a 6700XT.

Reviewed-by: Natalie Vock <natalie.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39139>
2026-01-05 15:24:54 +00:00

1163 lines
48 KiB
C

/*
* Copyright © 2021 Bas Nieuwenhuizen
*
* SPDX-License-Identifier: MIT
*/
#include "meta/radv_meta.h"
#include "radv_cs.h"
#include "radv_entrypoints.h"
#include "radix_sort/common/vk/barrier.h"
#include "radix_sort/radix_sort_u64.h"
#include "bvh/build_interface.h"
#include "bvh/bvh.h"
#include "vk_acceleration_structure.h"
#include "vk_common_entrypoints.h"
static const uint32_t copy_blas_addrs_gfx12_spv[] = {
#include "bvh/copy_blas_addrs_gfx12.spv.h"
};
static const uint32_t copy_spv[] = {
#include "bvh/copy.spv.h"
};
static const uint32_t encode_spv[] = {
#include "bvh/encode.spv.h"
};
static const uint32_t encode_gfx12_spv[] = {
#include "bvh/encode_gfx12.spv.h"
};
static const uint32_t encode_triangles_gfx12_spv[] = {
#include "bvh/encode_triangles_gfx12.spv.h"
};
static const uint32_t header_spv[] = {
#include "bvh/header.spv.h"
};
static const uint32_t update_spv[] = {
#include "bvh/update.spv.h"
};
static const uint32_t update_gfx12_spv[] = {
#include "bvh/update_gfx12.spv.h"
};
static const uint32_t leaf_spv[] = {
#include "bvh/radv_leaf.spv.h"
};
#define RADV_OFFSET_UNUSED 0xffffffff
struct acceleration_structure_layout {
uint32_t geometry_info_offset;
uint32_t leaf_node_offsets_offset;
uint32_t bvh_offset;
uint32_t leaf_nodes_offset;
uint32_t internal_nodes_offset;
uint32_t size;
};
struct update_scratch_layout {
uint32_t geometry_data_offset;
uint32_t bounds_offsets;
uint32_t internal_ready_count_offset;
uint32_t size;
};
enum radv_encode_key_bits {
RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS = (1 << 0),
RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12 = (1 << 1),
RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12 = (1 << 2),
};
static void
radv_get_acceleration_structure_layout(struct radv_device *device,
const struct vk_acceleration_structure_build_state *state,
struct acceleration_structure_layout *accel_struct)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t internal_node_max_child_count = radv_use_bvh8(pdev) ? 8 : 4;
/* There are no internal nodes with only one child node except the root node which does't matter here. */
uint32_t last_internal_node_min_child_count = 2;
/* With pair compression on GFX12, internal nodes with two triangles are always collapsed so they don't exist. the
* minimum child count therefore has to be 3.
*/
if (state->config.encode_key[0] & (RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12 | RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12))
last_internal_node_min_child_count = 3;
/* See CalcAccelStructInternalNodeCount (gpurt). */
uint32_t internal_count = (state->leaf_node_count * internal_node_max_child_count) /
(last_internal_node_min_child_count * (internal_node_max_child_count - 1)) +
1;
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
uint32_t bvh_leaf_size;
if (radv_use_bvh8(pdev)) {
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
bvh_leaf_size = sizeof(struct radv_gfx12_primitive_node);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
bvh_leaf_size = sizeof(struct radv_gfx12_primitive_node);
break;
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
bvh_leaf_size = sizeof(struct radv_gfx12_instance_node) + sizeof(struct radv_gfx12_instance_node_user_data);
break;
default:
UNREACHABLE("Unknown VkGeometryTypeKHR");
}
} else {
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
break;
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
break;
default:
UNREACHABLE("Unknown VkGeometryTypeKHR");
}
}
uint32_t internal_node_size =
radv_use_bvh8(pdev) ? sizeof(struct radv_gfx12_box_node) : sizeof(struct radv_bvh_box32_node);
uint32_t hw_leaf_node_count = state->leaf_node_count;
if (state->config.encode_key[0] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12)
hw_leaf_node_count = DIV_ROUND_UP(hw_leaf_node_count, 2);
uint64_t bvh_size = bvh_leaf_size * hw_leaf_node_count + internal_node_size * internal_count;
uint32_t offset = 0;
offset += sizeof(struct radv_accel_struct_header);
if (device->rra_trace.accel_structs) {
accel_struct->geometry_info_offset = offset;
offset += sizeof(struct radv_accel_struct_geometry_info) * state->build_info->geometryCount;
} else {
accel_struct->geometry_info_offset = RADV_OFFSET_UNUSED;
}
/* On GFX12, we need additional space for leaf node offsets since they do not have the same
* order as the application provided data.
*/
accel_struct->leaf_node_offsets_offset = offset;
if (state->config.encode_key[0] & RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS)
offset += state->leaf_node_count * 4;
/* Parent links, which have to go directly before bvh_offset as we index them using negative
* offsets from there. */
if (!radv_use_bvh8(pdev))
offset += bvh_size / 64 * 4;
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
offset = align(offset, 64);
accel_struct->bvh_offset = offset;
/* root node */
offset += internal_node_size;
if (!(state->config.encode_key[0] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12)) {
accel_struct->leaf_nodes_offset = offset;
offset += bvh_leaf_size * hw_leaf_node_count;
}
accel_struct->internal_nodes_offset = offset;
/* Factor out the root node. */
offset += internal_node_size * (internal_count - 1);
if (state->config.encode_key[0] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12)
offset += bvh_leaf_size * hw_leaf_node_count;
accel_struct->size = offset;
}
static void
radv_get_update_scratch_layout(struct radv_device *device, const struct vk_acceleration_structure_build_state *state,
struct update_scratch_layout *scratch)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1;
uint32_t offset = 0;
if (radv_use_bvh8(pdev)) {
scratch->geometry_data_offset = offset;
offset += sizeof(struct vk_bvh_geometry_data) * state->build_info->geometryCount;
scratch->bounds_offsets = offset;
offset += sizeof(vk_aabb) * internal_count;
}
scratch->internal_ready_count_offset = offset;
offset += sizeof(uint32_t) * internal_count;
scratch->size = offset;
}
VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
const uint32_t *pMaxPrimitiveCounts,
VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
{
VK_FROM_HANDLE(radv_device, device, _device);
STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
STATIC_ASSERT(sizeof(struct radv_gfx12_box_node) == RADV_GFX12_BVH_NODE_SIZE);
STATIC_ASSERT(sizeof(struct radv_gfx12_primitive_node) == RADV_GFX12_BVH_NODE_SIZE);
STATIC_ASSERT(sizeof(struct radv_gfx12_instance_node) == RADV_GFX12_BVH_NODE_SIZE);
STATIC_ASSERT(sizeof(struct radv_gfx12_instance_node_user_data) == RADV_GFX12_BVH_NODE_SIZE);
if (radv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
return;
vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts, pSizeInfo,
&device->meta_state.accel_struct_build.build_args);
}
void
radv_device_finish_accel_struct_build_state(struct radv_device *device)
{
VkDevice _device = radv_device_to_handle(device);
struct radv_meta_state *state = &device->meta_state;
if (state->accel_struct_build.radix_sort)
radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
}
static VkDeviceSize
radv_get_as_size(VkDevice _device, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_device, device, _device);
struct acceleration_structure_layout accel_struct;
radv_get_acceleration_structure_layout(device, state, &accel_struct);
return accel_struct.size;
}
static uint32_t
radv_get_triangle_batches_size(const struct vk_acceleration_structure_build_state *state)
{
return state->leaf_node_count * sizeof(struct radv_triangle_encode_task);
}
static VkDeviceSize
radv_get_encode_scratch_size(VkDevice _device, const struct vk_acceleration_structure_build_state *state)
{
if (state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12) {
uint32_t retry_batch_indices_size = state->leaf_node_count * sizeof(uint32_t);
return radv_get_triangle_batches_size(state) + retry_batch_indices_size;
}
return 0;
}
static VkDeviceSize
radv_get_update_scratch_size(VkDevice _device, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_device, device, _device);
struct update_scratch_layout scratch;
radv_get_update_scratch_layout(device, state, &scratch);
return scratch.size;
}
static void
radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t encode_key = 0;
if (radv_use_bvh8(pdev)) {
/*
* Leaf nodes are not written in the order provided by the application when BVH8 encoding is used.
* The proper order leaf nodes is used...
* 1. When fetching vertex positions for triangles.
* 2. When applying/writing BLAS pointers during TLAS deserialization/serialization. The type
* compared to VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR to handle the
* VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR case when the application queries the
* acceleration structure size.
*/
if ((state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR) ||
state->build_info->type != VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
encode_key |= RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS;
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
if (!(state->build_info->flags & (VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR |
VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR)) &&
geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
encode_key |= RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12;
}
state->config.encode_key[0] = encode_key;
state->config.encode_key[1] = encode_key;
state->config.encode_key[2] = encode_key;
uint32_t update_key = 0;
if (state->build_info->srcAccelerationStructure == state->build_info->dstAccelerationStructure)
update_key |= RADV_BUILD_FLAG_UPDATE_IN_PLACE;
if (state->build_info->geometryCount == 1)
update_key |= RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY;
if (device->meta_state.accel_struct_build.build_args.propagate_cull_flags)
update_key |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS;
state->config.update_key[0] = update_key;
}
static void
radv_bvh_build_bind_pipeline(VkCommandBuffer commandBuffer, enum radv_meta_object_key_type type, const uint32_t *spirv,
uint32_t spirv_size, uint32_t push_constants_size, uint32_t flags)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
VkPipeline pipeline;
VkResult result = vk_get_bvh_build_pipeline_spv(
&device->vk, &device->meta_state.device, (enum vk_meta_object_key_type)type, spirv, spirv_size,
push_constants_size, &device->meta_state.accel_struct_build.build_args, flags, &pipeline, false);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, result);
return;
}
device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
}
static void
radv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args, uint32_t size)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
VkPipelineLayout layout;
vk_get_bvh_build_pipeline_layout(&device->vk, &device->meta_state.device, size, &layout);
const VkPushConstantsInfoKHR pc_info = {
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
.layout = layout,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = size,
.pValues = args,
};
radv_CmdPushConstants2(commandBuffer, &pc_info);
}
static uint32_t
radv_build_flags(VkCommandBuffer commandBuffer, uint32_t key)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t flags = 0;
if (radv_use_bvh8(pdev))
flags |= RADV_BUILD_FLAG_BVH8;
if (!radv_emulate_rt(pdev)) {
/* gfx11 box intersection tests can return garbage with infs and non-standard box sorting */
if (pdev->info.gfx_level == GFX11)
flags |= RADV_BUILD_FLAG_NO_INFS;
if (pdev->info.gfx_level >= GFX11)
flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS;
}
if (key & RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS)
flags |= RADV_BUILD_FLAG_WRITE_LEAF_NODE_OFFSETS;
if (key & RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12)
flags |= RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES;
if (key & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12)
flags |= RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES;
return flags;
}
static VkResult
radv_encode_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE, encode_spv, sizeof(encode_spv),
sizeof(struct encode_args),
radv_build_flags(commandBuffer, state->config.encode_key[0]));
return VK_SUCCESS;
}
static VkResult
radv_encode_bind_pipeline_gfx12(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *state)
{
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE, encode_gfx12_spv,
sizeof(encode_gfx12_spv), sizeof(struct encode_gfx12_args),
radv_build_flags(commandBuffer, state->config.encode_key[0]));
return VK_SUCCESS;
}
static void
radv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
uint32_t dst_offset = layout.internal_nodes_offset - layout.bvh_offset;
radv_update_memory_cp(cmd_buffer, intermediate_header_addr + offsetof(struct vk_ir_header, dst_node_offset),
&dst_offset, sizeof(uint32_t));
if (radv_device_physical(device)->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
const struct encode_args args = {
.intermediate_bvh = intermediate_bvh_addr,
.output_bvh = vk_acceleration_structure_get_va(dst) + layout.bvh_offset,
.header = intermediate_header_addr,
.output_bvh_offset = layout.bvh_offset,
.leaf_node_count = state->leaf_node_count,
.geometry_type = vk_get_as_geometry_type(state->build_info),
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
struct radv_dispatch_info dispatch = {
.unaligned = true,
.ordered = true,
.blocks = {MAX2(state->leaf_node_count, 1), 1, 1},
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static void
radv_encode_as_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
struct vk_ir_header header = {
.sync_data =
{
.current_phase_end_counter = TASK_INDEX_INVALID,
/* Will be updated by the first PLOC shader invocation */
.task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
},
.dst_node_offset = layout.internal_nodes_offset - layout.bvh_offset,
.dst_leaf_node_offset = layout.leaf_nodes_offset - layout.bvh_offset,
.driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_INVOCATIONS_X] = 0,
.driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_INVOCATIONS_Y] = 1,
.driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_INVOCATIONS_Z] = 1,
.driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_RETRY_INVOCATIONS_X] = 0,
.driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_RETRY_INVOCATIONS_Y] = 1,
.driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_RETRY_INVOCATIONS_Z] = 1,
};
uint32_t header_update_size =
offsetof(struct vk_ir_header, driver_internal) - offsetof(struct vk_ir_header, sync_data);
if (state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12)
header_update_size = sizeof(struct vk_ir_header) - offsetof(struct vk_ir_header, sync_data);
const uint8_t *update_data = ((const uint8_t *)&header + offsetof(struct vk_ir_header, sync_data));
radv_update_memory_cp(cmd_buffer, intermediate_header_addr + offsetof(struct vk_ir_header, sync_data), update_data,
header_update_size);
if (radv_device_physical(device)->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
const struct encode_gfx12_args args = {
.intermediate_bvh = intermediate_bvh_addr,
.output_base = vk_acceleration_structure_get_va(dst),
.header = intermediate_header_addr,
.output_bvh_offset = layout.bvh_offset,
.leaf_node_offsets_offset = layout.leaf_node_offsets_offset,
.leaf_node_count = state->leaf_node_count,
.geometry_type = vk_get_as_geometry_type(state->build_info),
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1;
struct radv_dispatch_info dispatch = {
.ordered = true,
.blocks = {DIV_ROUND_UP(internal_count * 8, 64), 1, 1},
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static VkResult
radv_encode_triangles_bind_pipeline_gfx12(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *state)
{
bool compress_triangles = state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12;
if (!compress_triangles)
return VK_SUCCESS;
/* Wait for internal encoding to finish. */
vk_barrier_compute_w_to_compute_r(commandBuffer);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE_TRIANGLES_GFX12,
encode_triangles_gfx12_spv, sizeof(encode_triangles_gfx12_spv),
sizeof(struct encode_triangles_gfx12_args), 0);
return VK_SUCCESS;
}
static void
radv_encode_triangles_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
bool compress_triangles = state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12;
if (!compress_triangles)
return;
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
const struct encode_triangles_gfx12_args args = {
.intermediate_bvh = intermediate_bvh_addr,
.output_base = vk_acceleration_structure_get_va(dst),
.header = intermediate_header_addr,
.output_bvh_offset = layout.bvh_offset,
.leaf_node_offsets_offset = layout.leaf_node_offsets_offset,
.batches_size = radv_get_triangle_batches_size(state),
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
struct radv_dispatch_info dispatch = {
.unaligned = true,
.indirect_va = intermediate_header_addr +
offsetof(struct vk_ir_header, driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_INVOCATIONS_X]),
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static VkResult
radv_encode_triangles_retry_bind_pipeline_gfx12(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *state)
{
bool compress_triangles = state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12;
if (!compress_triangles)
return VK_SUCCESS;
/* Wait for the first triangle compression pass to finish. */
vk_barrier_compute_w_to_compute_r(commandBuffer);
vk_barrier_compute_w_to_indirect_compute_r(commandBuffer);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE_TRIANGLES_GFX12,
encode_triangles_gfx12_spv, sizeof(encode_triangles_gfx12_spv),
sizeof(struct encode_triangles_gfx12_args),
RADV_BUILD_FLAG_BATCH_COMPRESS_TRIANGLES_RETRY);
return VK_SUCCESS;
}
static void
radv_encode_triangles_retry_gfx12(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *state)
{
bool compress_triangles = state->config.encode_key[2] & RADV_ENCODE_KEY_BATCH_COMPRESS_GFX12;
if (!compress_triangles)
return;
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
const struct encode_triangles_gfx12_args args = {
.intermediate_bvh = intermediate_bvh_addr,
.output_base = vk_acceleration_structure_get_va(dst),
.header = intermediate_header_addr,
.output_bvh_offset = layout.bvh_offset,
.leaf_node_offsets_offset = layout.leaf_node_offsets_offset,
.batches_size = radv_get_triangle_batches_size(state),
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
struct radv_dispatch_info dispatch = {
.unaligned = true,
.indirect_va =
intermediate_header_addr +
offsetof(struct vk_ir_header, driver_internal[RADV_IR_HEADER_ENCODE_TRIANGLES_RETRY_INVOCATIONS_X]),
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static VkResult
radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
/* Wait for encoding to finish. */
vk_barrier_compute_w_to_compute_r(commandBuffer);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_HEADER, header_spv, sizeof(header_spv),
sizeof(struct header_args), 0);
return VK_SUCCESS;
}
static void
radv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
uint64_t instance_count =
state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? state->leaf_node_count : 0;
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
base = offsetof(struct radv_accel_struct_header, geometry_type);
struct header_args args = {
.src = intermediate_header_addr,
.dst = vk_acceleration_structure_get_va(dst),
.bvh_offset = layout.bvh_offset,
.internal_nodes_offset = layout.internal_nodes_offset - layout.bvh_offset,
.instance_count = instance_count,
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
struct radv_accel_struct_header header;
header.instance_offset = layout.bvh_offset + sizeof(struct radv_bvh_box32_node);
header.instance_count = instance_count;
header.leaf_node_offsets_offset = layout.leaf_node_offsets_offset;
header.compacted_size = layout.size;
header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
header.copy_dispatch_size[1] = 1;
header.copy_dispatch_size[2] = 1;
header.serialization_size =
header.compacted_size +
align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
sizeof(uint64_t) * header.instance_count;
header.build_flags = state->build_info->flags;
header.geometry_type = vk_get_as_geometry_type(state->build_info);
header.geometry_count = state->build_info->geometryCount;
radv_update_memory_cp(cmd_buffer, vk_acceleration_structure_get_va(dst) + base, (const char *)&header + base,
sizeof(header) - base);
if (layout.geometry_info_offset != RADV_OFFSET_UNUSED) {
uint64_t geometry_infos_size = state->build_info->geometryCount * sizeof(struct radv_accel_struct_geometry_info);
struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
if (!geometry_infos)
return;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
const VkAccelerationStructureGeometryKHR *geometry =
state->build_info->pGeometries ? &state->build_info->pGeometries[i] : state->build_info->ppGeometries[i];
geometry_infos[i].type = geometry->geometryType;
geometry_infos[i].flags = geometry->flags;
geometry_infos[i].primitive_count = state->build_range_infos[i].primitiveCount;
}
radv_CmdUpdateBuffer(commandBuffer, vk_buffer_to_handle(dst->buffer), dst->offset + layout.geometry_info_offset,
geometry_infos_size, geometry_infos);
free(geometry_infos);
}
}
static void
radv_init_update_scratch(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t scratch = state->build_info->scratchData.deviceAddress;
struct update_scratch_layout layout;
radv_get_update_scratch_layout(device, state, &layout);
/* Prepare ready counts for internal nodes */
radv_fill_memory(cmd_buffer, scratch + layout.internal_ready_count_offset,
layout.size - layout.internal_ready_count_offset, 0x0, RADV_COPY_FLAGS_DEVICE_LOCAL);
/* geometryCount == 1 passes the data as push constant. */
if (radv_use_bvh8(pdev) && !(state->config.update_key[0] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY)) {
uint32_t data_size = sizeof(struct vk_bvh_geometry_data) * state->build_info->geometryCount;
struct vk_bvh_geometry_data *data = malloc(data_size);
if (!data) {
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
return;
}
uint32_t first_id = 0;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
const VkAccelerationStructureGeometryKHR *geom =
state->build_info->pGeometries ? &state->build_info->pGeometries[i] : state->build_info->ppGeometries[i];
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &state->build_range_infos[i];
data[i] = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info);
first_id += build_range_info->primitiveCount;
}
radv_update_memory(cmd_buffer, scratch + layout.geometry_data_offset, data_size, data,
RADV_COPY_FLAGS_DEVICE_LOCAL);
free(data);
}
}
static void
radv_update_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state,
bool flushed_cp_after_init_update_scratch, bool flushed_compute_after_init_update_scratch)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Wait for update scratch initialization to finish.. */
if (!flushed_compute_after_init_update_scratch)
vk_barrier_compute_w_to_compute_r(commandBuffer);
if (!flushed_cp_after_init_update_scratch) {
if (radv_device_physical(device)->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
}
uint32_t flags = state->config.update_key[0];
if (radv_use_bvh8(pdev)) {
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_UPDATE, update_gfx12_spv,
sizeof(update_gfx12_spv), sizeof(struct update_args), flags);
} else {
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_UPDATE, update_spv, sizeof(update_spv),
sizeof(struct update_args), flags);
}
}
static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
{
uint32_t geometry_id_and_flags = geometry_id;
if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
return geometry_id_and_flags;
}
static void
radv_update_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
if (src != dst) {
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
/* Copy header/metadata */
const uint64_t src_va = vk_acceleration_structure_get_va(src);
const uint64_t dst_va = vk_acceleration_structure_get_va(dst);
radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL,
RADV_COPY_FLAGS_DEVICE_LOCAL);
}
struct update_scratch_layout layout;
radv_get_update_scratch_layout(device, state, &layout);
struct update_args update_consts = {
.src = vk_acceleration_structure_get_va(src),
.dst = vk_acceleration_structure_get_va(dst),
.internal_ready_count = state->build_info->scratchData.deviceAddress + layout.internal_ready_count_offset,
.leaf_node_count = state->leaf_node_count,
};
uint32_t first_id = 0;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
const VkAccelerationStructureGeometryKHR *geom =
state->build_info->pGeometries ? &state->build_info->pGeometries[i] : state->build_info->ppGeometries[i];
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &state->build_range_infos[i];
update_consts.geom_data = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info);
radv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts));
radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
first_id += build_range_info->primitiveCount;
}
}
static void
radv_update_as_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
if (src != dst) {
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
/* Copy header/metadata */
const uint64_t src_va = vk_acceleration_structure_get_va(src);
const uint64_t dst_va = vk_acceleration_structure_get_va(dst);
radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL,
RADV_COPY_FLAGS_DEVICE_LOCAL);
}
struct update_scratch_layout layout;
radv_get_update_scratch_layout(device, state, &layout);
struct update_gfx12_args update_consts = {
.src = vk_acceleration_structure_get_va(src),
.dst = vk_acceleration_structure_get_va(dst),
.geom_data = state->build_info->scratchData.deviceAddress + layout.geometry_data_offset,
.bounds = state->build_info->scratchData.deviceAddress + layout.bounds_offsets,
.internal_ready_count = state->build_info->scratchData.deviceAddress + layout.internal_ready_count_offset,
.leaf_node_count = state->leaf_node_count,
};
if (state->config.update_key[0] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY) {
const VkAccelerationStructureGeometryKHR *geom =
state->build_info->pGeometries ? &state->build_info->pGeometries[0] : state->build_info->ppGeometries[0];
update_consts.geom_data0 = vk_fill_geometry_data(state->build_info->type, 0, 0, geom, state->build_range_infos);
}
radv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts));
struct radv_dispatch_info dispatch = {
.ordered = true,
.unaligned = true,
.indirect_va =
vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, update_dispatch_size[0]),
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static const struct radix_sort_vk_target_config radix_sort_config = {
.keyval_dwords = 2,
.fill.workgroup_size_log2 = 7,
.fill.block_rows = 8,
.histogram.workgroup_size_log2 = 8,
.histogram.subgroup_size_log2 = 6,
.histogram.block_rows = 14,
.prefix.workgroup_size_log2 = 8,
.prefix.subgroup_size_log2 = 6,
.scatter.workgroup_size_log2 = 8,
.scatter.subgroup_size_log2 = 6,
.scatter.block_rows = 14,
};
static void
radv_write_buffer_cp(VkCommandBuffer commandBuffer, VkDeviceAddress addr, void *data, uint32_t size)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_update_memory_cp(cmd_buffer, addr, data, size);
}
static void
radv_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
}
static void
radv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_unaligned_dispatch(cmd_buffer, x, y, z);
}
static void
radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer, VkDeviceAddress addr, VkDeviceSize size, uint32_t data)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_fill_memory(cmd_buffer, addr, size, data, RADV_COPY_FLAGS_DEVICE_LOCAL);
}
VkResult
radv_device_init_accel_struct_build_state(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
mtx_lock(&device->meta_state.mtx);
if (device->meta_state.accel_struct_build.radix_sort)
goto exit;
device->meta_state.accel_struct_build.radix_sort = vk_create_radix_sort_u64(
radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache, radix_sort_config);
device->meta_state.accel_struct_build.build_ops = (struct vk_acceleration_structure_build_ops){
.begin_debug_marker = vk_accel_struct_cmd_begin_debug_marker,
.end_debug_marker = vk_accel_struct_cmd_end_debug_marker,
.get_build_config = radv_get_build_config,
.get_as_size = radv_get_as_size,
.get_update_scratch_size = radv_get_update_scratch_size,
.init_update_scratch = radv_init_update_scratch,
.update_bind_pipeline[0] = radv_update_bind_pipeline,
};
if (radv_use_bvh8(pdev)) {
device->meta_state.accel_struct_build.build_ops.update_as[0] = radv_update_as_gfx12;
device->meta_state.accel_struct_build.build_ops.get_encode_scratch_size = radv_get_encode_scratch_size;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[0] = radv_encode_bind_pipeline_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[1] =
radv_encode_triangles_bind_pipeline_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_as[1] = radv_encode_triangles_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[2] =
radv_encode_triangles_retry_bind_pipeline_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_as[2] = radv_encode_triangles_retry_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[3] = radv_init_header_bind_pipeline;
device->meta_state.accel_struct_build.build_ops.encode_as[3] = radv_init_header;
} else {
device->meta_state.accel_struct_build.build_ops.update_as[0] = radv_update_as;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[0] = radv_encode_bind_pipeline;
device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[1] = radv_init_header_bind_pipeline;
device->meta_state.accel_struct_build.build_ops.encode_as[1] = radv_init_header;
device->meta_state.accel_struct_build.build_ops.leaf_spirv_override = leaf_spv;
device->meta_state.accel_struct_build.build_ops.leaf_spirv_override_size = sizeof(leaf_spv);
}
device->vk.as_build_ops = &device->meta_state.accel_struct_build.build_ops;
device->vk.write_buffer_cp = radv_write_buffer_cp;
device->vk.flush_buffer_write_cp = radv_flush_buffer_write_cp;
device->vk.cmd_dispatch_unaligned = radv_cmd_dispatch_unaligned;
device->vk.cmd_fill_buffer_addr = radv_cmd_fill_buffer_addr;
struct vk_acceleration_structure_build_args *build_args = &device->meta_state.accel_struct_build.build_args;
build_args->subgroup_size = 64;
build_args->bvh_bounds_offset = offsetof(struct radv_accel_struct_header, aabb);
build_args->root_flags_offset = offsetof(struct radv_accel_struct_header, root_flags);
build_args->propagate_cull_flags = pdev->info.gfx_level >= GFX11;
build_args->emit_markers = device->sqtt.bo;
build_args->radix_sort = device->meta_state.accel_struct_build.radix_sort;
exit:
mtx_unlock(&device->meta_state.mtx);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_meta_saved_state saved_state;
VkResult result = radv_device_init_accel_struct_build_state(device);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, result);
return;
}
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
cmd_buffer->state.current_event_type = EventInternalUnknown;
vk_cmd_build_acceleration_structures(commandBuffer, &device->vk, &device->meta_state.device, infoCount, pInfos,
ppBuildRangeInfos, &device->meta_state.accel_struct_build.build_args);
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY, copy_spv, sizeof(copy_spv),
sizeof(struct copy_args), radv_build_flags(commandBuffer, 0) & RADV_BUILD_FLAG_BVH8);
struct copy_args consts = {
.src_addr = vk_acceleration_structure_get_va(src),
.dst_addr = vk_acceleration_structure_get_va(dst),
.mode = RADV_COPY_MODE_COPY,
};
radv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, 0, NULL, NULL);
radv_CmdDispatchIndirect(commandBuffer, vk_buffer_to_handle(src->buffer),
src->offset + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
VkAccelerationStructureCompatibilityKHR *pCompatibility)
{
VK_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY, copy_spv, sizeof(copy_spv),
sizeof(struct copy_args), radv_build_flags(commandBuffer, 0) & RADV_BUILD_FLAG_BVH8);
const struct copy_args consts = {
.src_addr = pInfo->src.deviceAddress,
.dst_addr = vk_acceleration_structure_get_va(dst),
.mode = RADV_COPY_MODE_DESERIALIZE,
};
radv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
radv_CmdDispatchBase(commandBuffer, 0, 0, 0, 512, 1, 1);
if (radv_use_bvh8(pdev)) {
/* Wait for the main copy dispatch to finish. */
vk_barrier_compute_w_to_compute_r(commandBuffer);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY_BLAS_ADDRS_GFX12,
copy_blas_addrs_gfx12_spv, sizeof(copy_blas_addrs_gfx12_spv),
sizeof(struct copy_args), 0);
radv_CmdDispatchBase(commandBuffer, 0, 0, 0, 256, 1, 1);
}
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY, copy_spv, sizeof(copy_spv),
sizeof(struct copy_args), radv_build_flags(commandBuffer, 0) & RADV_BUILD_FLAG_BVH8);
const struct copy_args consts = {
.src_addr = vk_acceleration_structure_get_va(src),
.dst_addr = pInfo->dst.deviceAddress,
.mode = RADV_COPY_MODE_SERIALIZE,
};
radv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, 0, NULL, NULL);
radv_CmdDispatchIndirect(commandBuffer, vk_buffer_to_handle(src->buffer),
src->offset + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
if (radv_use_bvh8(pdev)) {
/* Wait for the main copy dispatch to finish. */
vk_barrier_compute_w_to_compute_r(commandBuffer);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY_BLAS_ADDRS_GFX12,
copy_blas_addrs_gfx12_spv, sizeof(copy_blas_addrs_gfx12_spv),
sizeof(struct copy_args), 0);
radv_CmdDispatchBase(commandBuffer, 0, 0, 0, 256, 1, 1);
}
radv_meta_restore(&saved_state, cmd_buffer);
/* Set the header of the serialized data. */
uint8_t header_data[2 * VK_UUID_SIZE];
memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
radv_update_memory_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
}