mesa/src/amd/vulkan/radv_acceleration_structure.c
Konstantin Seurer c4b18c689f
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run
radv: Emit compressed primitive nodes on GFX12
Emits two triangles per node whenever possible. The nir code will
revisit the triangle node to handle the second triangle only if both
triangles are interescted by the ray.

Reviewed-by: Natalie Vock <natalie.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35734>
2025-08-07 20:23:15 +00:00

1017 lines
42 KiB
C

/*
* Copyright © 2021 Bas Nieuwenhuizen
*
* SPDX-License-Identifier: MIT
*/
#include "meta/radv_meta.h"
#include "radv_cs.h"
#include "radv_entrypoints.h"
#include "radix_sort/radix_sort_u64.h"
#include "bvh/build_interface.h"
#include "bvh/bvh.h"
#include "vk_acceleration_structure.h"
#include "vk_common_entrypoints.h"
static const uint32_t copy_blas_addrs_gfx12_spv[] = {
#include "bvh/copy_blas_addrs_gfx12.spv.h"
};
static const uint32_t copy_spv[] = {
#include "bvh/copy.spv.h"
};
static const uint32_t encode_spv[] = {
#include "bvh/encode.spv.h"
};
static const uint32_t encode_gfx12_spv[] = {
#include "bvh/encode_gfx12.spv.h"
};
static const uint32_t header_spv[] = {
#include "bvh/header.spv.h"
};
static const uint32_t update_spv[] = {
#include "bvh/update.spv.h"
};
static const uint32_t update_gfx12_spv[] = {
#include "bvh/update_gfx12.spv.h"
};
static const uint32_t leaf_spv[] = {
#include "bvh/radv_leaf.spv.h"
};
#define RADV_OFFSET_UNUSED 0xffffffff
struct acceleration_structure_layout {
uint32_t geometry_info_offset;
uint32_t leaf_node_offsets_offset;
uint32_t bvh_offset;
uint32_t leaf_nodes_offset;
uint32_t internal_nodes_offset;
uint32_t size;
};
struct update_scratch_layout {
uint32_t geometry_data_offset;
uint32_t bounds_offsets;
uint32_t internal_ready_count_offset;
uint32_t size;
};
enum radv_encode_key_bits {
RADV_ENCODE_KEY_COMPACT = (1 << 0),
RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS = (1 << 1),
RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12 = (1 << 2),
};
static void
radv_get_acceleration_structure_layout(struct radv_device *device,
const struct vk_acceleration_structure_build_state *state,
struct acceleration_structure_layout *accel_struct)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1;
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
uint32_t bvh_leaf_size;
uint32_t bvh_node_size_gcd;
if (radv_use_bvh8(pdev)) {
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
bvh_leaf_size = sizeof(struct radv_gfx12_primitive_node);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
bvh_leaf_size = sizeof(struct radv_gfx12_primitive_node);
break;
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
bvh_leaf_size = sizeof(struct radv_gfx12_instance_node) + sizeof(struct radv_gfx12_instance_node_user_data);
break;
default:
UNREACHABLE("Unknown VkGeometryTypeKHR");
}
bvh_node_size_gcd = RADV_GFX12_BVH_NODE_SIZE;
} else {
switch (geometry_type) {
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
bvh_leaf_size = sizeof(struct radv_bvh_triangle_node);
break;
case VK_GEOMETRY_TYPE_AABBS_KHR:
bvh_leaf_size = sizeof(struct radv_bvh_aabb_node);
break;
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
bvh_leaf_size = sizeof(struct radv_bvh_instance_node);
break;
default:
UNREACHABLE("Unknown VkGeometryTypeKHR");
}
bvh_node_size_gcd = 64;
}
uint32_t internal_node_size =
radv_use_bvh8(pdev) ? sizeof(struct radv_gfx12_box_node) : sizeof(struct radv_bvh_box32_node);
uint64_t bvh_size = bvh_leaf_size * state->leaf_node_count + internal_node_size * internal_count;
uint32_t offset = 0;
offset += sizeof(struct radv_accel_struct_header);
if (device->rra_trace.accel_structs) {
accel_struct->geometry_info_offset = offset;
offset += sizeof(struct radv_accel_struct_geometry_info) * state->build_info->geometryCount;
} else {
accel_struct->geometry_info_offset = RADV_OFFSET_UNUSED;
}
/* On GFX12, we need additional space for leaf node offsets since they do not have the same
* order as the application provided data.
*/
accel_struct->leaf_node_offsets_offset = offset;
if (state->config.encode_key[0] & RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS)
offset += state->leaf_node_count * 4;
/* Parent links, which have to go directly before bvh_offset as we index them using negative
* offsets from there. */
offset += bvh_size / bvh_node_size_gcd * 4;
/* The BVH and hence bvh_offset needs 64 byte alignment for RT nodes. */
offset = ALIGN(offset, 64);
accel_struct->bvh_offset = offset;
/* root node */
offset += internal_node_size;
accel_struct->leaf_nodes_offset = offset;
offset += bvh_leaf_size * state->leaf_node_count;
accel_struct->internal_nodes_offset = offset;
/* Factor out the root node. */
offset += internal_node_size * (internal_count - 1);
accel_struct->size = offset;
}
static void
radv_get_update_scratch_layout(struct radv_device *device, const struct vk_acceleration_structure_build_state *state,
struct update_scratch_layout *scratch)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1;
uint32_t offset = 0;
if (radv_use_bvh8(pdev)) {
scratch->geometry_data_offset = offset;
offset += sizeof(struct vk_bvh_geometry_data) * state->build_info->geometryCount;
scratch->bounds_offsets = offset;
offset += sizeof(vk_aabb) * internal_count;
} else {
scratch->bounds_offsets = offset;
offset += sizeof(vk_aabb) * state->leaf_node_count;
}
scratch->internal_ready_count_offset = offset;
offset += sizeof(uint32_t) * internal_count;
scratch->size = offset;
}
VKAPI_ATTR void VKAPI_CALL
radv_GetAccelerationStructureBuildSizesKHR(VkDevice _device, VkAccelerationStructureBuildTypeKHR buildType,
const VkAccelerationStructureBuildGeometryInfoKHR *pBuildInfo,
const uint32_t *pMaxPrimitiveCounts,
VkAccelerationStructureBuildSizesInfoKHR *pSizeInfo)
{
VK_FROM_HANDLE(radv_device, device, _device);
STATIC_ASSERT(sizeof(struct radv_bvh_triangle_node) == 64);
STATIC_ASSERT(sizeof(struct radv_bvh_aabb_node) == 64);
STATIC_ASSERT(sizeof(struct radv_bvh_instance_node) == 128);
STATIC_ASSERT(sizeof(struct radv_bvh_box16_node) == 64);
STATIC_ASSERT(sizeof(struct radv_bvh_box32_node) == 128);
STATIC_ASSERT(sizeof(struct radv_gfx12_box_node) == RADV_GFX12_BVH_NODE_SIZE);
STATIC_ASSERT(sizeof(struct radv_gfx12_primitive_node) == RADV_GFX12_BVH_NODE_SIZE);
STATIC_ASSERT(sizeof(struct radv_gfx12_instance_node) == RADV_GFX12_BVH_NODE_SIZE);
STATIC_ASSERT(sizeof(struct radv_gfx12_instance_node_user_data) == RADV_GFX12_BVH_NODE_SIZE);
if (radv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
return;
vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts, pSizeInfo,
&device->meta_state.accel_struct_build.build_args);
}
void
radv_device_finish_accel_struct_build_state(struct radv_device *device)
{
VkDevice _device = radv_device_to_handle(device);
struct radv_meta_state *state = &device->meta_state;
if (state->accel_struct_build.radix_sort)
radix_sort_vk_destroy(state->accel_struct_build.radix_sort, _device, &state->alloc);
}
static VkDeviceSize
radv_get_as_size(VkDevice _device, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_device, device, _device);
struct acceleration_structure_layout accel_struct;
radv_get_acceleration_structure_layout(device, state, &accel_struct);
return accel_struct.size;
}
static VkDeviceSize
radv_get_update_scratch_size(VkDevice _device, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_device, device, _device);
struct update_scratch_layout scratch;
radv_get_update_scratch_layout(device, state, &scratch);
return scratch.size;
}
static void
radv_get_build_config(VkDevice _device, struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_device, device, _device);
struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t encode_key = 0;
if (radv_use_bvh8(pdev)) {
encode_key |= RADV_ENCODE_KEY_COMPACT;
/*
* Leaf nodes are not written in the order provided by the application when BVH8 encoding is used.
* The proper order leaf nodes is used...
* 1. When fetching vertex positions for triangles.
* 2. When applying/writing BLAS pointers during TLAS deserialization/serialization. The type
* compared to VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR to handle the
* VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR case when the application queries the
* acceleration structure size.
*/
if ((state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR) ||
state->build_info->type != VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR)
encode_key |= RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS;
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
if (!(state->build_info->flags & (VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR |
VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_DATA_ACCESS_KHR)) &&
geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
encode_key |= RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12;
}
if (state->build_info->flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR)
encode_key |= RADV_ENCODE_KEY_COMPACT;
state->config.encode_key[0] = encode_key;
state->config.encode_key[1] = encode_key;
uint32_t update_key = 0;
if (state->build_info->srcAccelerationStructure == state->build_info->dstAccelerationStructure)
update_key |= RADV_BUILD_FLAG_UPDATE_IN_PLACE;
if (state->build_info->geometryCount == 1)
update_key |= RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY;
state->config.update_key[0] = update_key;
}
static void
radv_bvh_build_bind_pipeline(VkCommandBuffer commandBuffer, enum radv_meta_object_key_type type, const uint32_t *spirv,
uint32_t spirv_size, uint32_t push_constants_size, uint32_t flags)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
VkPipeline pipeline;
VkResult result = vk_get_bvh_build_pipeline_spv(
&device->vk, &device->meta_state.device, (enum vk_meta_object_key_type)type, spirv, spirv_size,
push_constants_size, &device->meta_state.accel_struct_build.build_args, flags, &pipeline);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, result);
return;
}
device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
}
static void
radv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args, uint32_t size)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
VkPipelineLayout layout;
vk_get_bvh_build_pipeline_layout(&device->vk, &device->meta_state.device, size, &layout);
const VkPushConstantsInfoKHR pc_info = {
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
.layout = layout,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.offset = 0,
.size = size,
.pValues = args,
};
radv_CmdPushConstants2(commandBuffer, &pc_info);
}
static uint32_t
radv_build_flags(VkCommandBuffer commandBuffer, uint32_t key)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
uint32_t flags = 0;
if (key & RADV_ENCODE_KEY_COMPACT)
flags |= RADV_BUILD_FLAG_COMPACT;
if (radv_use_bvh8(pdev))
flags |= RADV_BUILD_FLAG_BVH8;
if (!radv_emulate_rt(pdev)) {
/* gfx11 box intersection tests can return garbage with infs and non-standard box sorting */
if (pdev->info.gfx_level == GFX11)
flags |= RADV_BUILD_FLAG_NO_INFS;
if (pdev->info.gfx_level >= GFX11)
flags |= VK_BUILD_FLAG_PROPAGATE_CULL_FLAGS;
}
if (key & RADV_ENCODE_KEY_WRITE_LEAF_NODE_OFFSETS)
flags |= RADV_BUILD_FLAG_WRITE_LEAF_NODE_OFFSETS;
if (key & RADV_ENCODE_KEY_PAIR_COMPRESS_GFX12)
flags |= RADV_BUILD_FLAG_PAIR_COMPRESS_TRIANGLES;
return flags;
}
static VkResult
radv_encode_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE, encode_spv, sizeof(encode_spv),
sizeof(struct encode_args),
radv_build_flags(commandBuffer, state->config.encode_key[0]));
return VK_SUCCESS;
}
static VkResult
radv_encode_bind_pipeline_gfx12(VkCommandBuffer commandBuffer,
const struct vk_acceleration_structure_build_state *state)
{
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_ENCODE, encode_gfx12_spv,
sizeof(encode_gfx12_spv), sizeof(struct encode_gfx12_args),
radv_build_flags(commandBuffer, state->config.encode_key[0]));
return VK_SUCCESS;
}
static void
radv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
if (state->config.encode_key[0] & RADV_ENCODE_KEY_COMPACT) {
uint32_t dst_offset = layout.internal_nodes_offset - layout.bvh_offset;
radv_update_memory_cp(cmd_buffer, intermediate_header_addr + offsetof(struct vk_ir_header, dst_node_offset),
&dst_offset, sizeof(uint32_t));
if (radv_device_physical(device)->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
}
const struct encode_args args = {
.intermediate_bvh = intermediate_bvh_addr,
.output_bvh = vk_acceleration_structure_get_va(dst) + layout.bvh_offset,
.header = intermediate_header_addr,
.output_bvh_offset = layout.bvh_offset,
.leaf_node_count = state->leaf_node_count,
.geometry_type = vk_get_as_geometry_type(state->build_info),
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
struct radv_dispatch_info dispatch = {
.unaligned = true,
.ordered = true,
.blocks = {MAX2(state->leaf_node_count, 1), 1, 1},
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static void
radv_encode_as_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
struct vk_ir_header header = {
.sync_data =
{
.current_phase_end_counter = TASK_INDEX_INVALID,
/* Will be updated by the first PLOC shader invocation */
.task_counts = {TASK_INDEX_INVALID, TASK_INDEX_INVALID},
},
.dst_node_offset = layout.internal_nodes_offset - layout.bvh_offset,
.dst_leaf_node_offset = layout.leaf_nodes_offset - layout.bvh_offset,
};
const uint8_t *update_data = ((const uint8_t *)&header + offsetof(struct vk_ir_header, sync_data));
radv_update_memory_cp(cmd_buffer, intermediate_header_addr + offsetof(struct vk_ir_header, sync_data), update_data,
sizeof(struct vk_ir_header) - offsetof(struct vk_ir_header, sync_data));
if (radv_device_physical(device)->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
const struct encode_gfx12_args args = {
.intermediate_bvh = intermediate_bvh_addr,
.output_base = vk_acceleration_structure_get_va(dst),
.header = intermediate_header_addr,
.output_bvh_offset = layout.bvh_offset,
.leaf_node_offsets_offset = layout.leaf_node_offsets_offset,
.leaf_node_count = state->leaf_node_count,
.geometry_type = vk_get_as_geometry_type(state->build_info),
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
uint32_t internal_count = MAX2(state->leaf_node_count, 2) - 1;
struct radv_dispatch_info dispatch = {
.ordered = true,
.blocks = {DIV_ROUND_UP(internal_count * 8, 64), 1, 1},
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static VkResult
radv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
if (!(state->config.encode_key[1] & RADV_ENCODE_KEY_COMPACT))
return VK_SUCCESS;
/* Wait for encoding to finish. */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_HEADER, header_spv, sizeof(header_spv),
sizeof(struct header_args), 0);
return VK_SUCCESS;
}
static void
radv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
size_t base = offsetof(struct radv_accel_struct_header, compacted_size);
uint64_t instance_count =
state->build_info->type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR ? state->leaf_node_count : 0;
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
if (state->config.encode_key[1] & RADV_ENCODE_KEY_COMPACT) {
base = offsetof(struct radv_accel_struct_header, geometry_type);
struct header_args args = {
.src = intermediate_header_addr,
.dst = vk_acceleration_structure_get_va(dst),
.bvh_offset = layout.bvh_offset,
.internal_nodes_offset = layout.internal_nodes_offset - layout.bvh_offset,
.instance_count = instance_count,
};
radv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
radv_unaligned_dispatch(cmd_buffer, 1, 1, 1);
}
struct radv_accel_struct_header header;
header.instance_offset = layout.bvh_offset + sizeof(struct radv_bvh_box32_node);
header.instance_count = instance_count;
header.leaf_node_offsets_offset = layout.leaf_node_offsets_offset;
header.compacted_size = layout.size;
header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
header.copy_dispatch_size[1] = 1;
header.copy_dispatch_size[2] = 1;
header.serialization_size =
header.compacted_size +
align(sizeof(struct radv_accel_struct_serialization_header) + sizeof(uint64_t) * header.instance_count, 128);
header.size = header.serialization_size - sizeof(struct radv_accel_struct_serialization_header) -
sizeof(uint64_t) * header.instance_count;
header.build_flags = state->build_info->flags;
header.geometry_type = vk_get_as_geometry_type(state->build_info);
header.geometry_count = state->build_info->geometryCount;
radv_update_memory_cp(cmd_buffer, vk_acceleration_structure_get_va(dst) + base, (const char *)&header + base,
sizeof(header) - base);
if (layout.geometry_info_offset != RADV_OFFSET_UNUSED) {
uint64_t geometry_infos_size = state->build_info->geometryCount * sizeof(struct radv_accel_struct_geometry_info);
struct radv_accel_struct_geometry_info *geometry_infos = malloc(geometry_infos_size);
if (!geometry_infos)
return;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
const VkAccelerationStructureGeometryKHR *geometry =
state->build_info->pGeometries ? &state->build_info->pGeometries[i] : state->build_info->ppGeometries[i];
geometry_infos[i].type = geometry->geometryType;
geometry_infos[i].flags = geometry->flags;
geometry_infos[i].primitive_count = state->build_range_infos[i].primitiveCount;
}
radv_CmdUpdateBuffer(commandBuffer, vk_buffer_to_handle(dst->buffer), dst->offset + layout.geometry_info_offset,
geometry_infos_size, geometry_infos);
free(geometry_infos);
}
}
static void
radv_init_update_scratch(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t scratch = state->build_info->scratchData.deviceAddress;
struct update_scratch_layout layout;
radv_get_update_scratch_layout(device, state, &layout);
/* Prepare ready counts for internal nodes */
radv_fill_memory(cmd_buffer, scratch + layout.internal_ready_count_offset,
layout.size - layout.internal_ready_count_offset, 0x0, RADV_COPY_FLAGS_DEVICE_LOCAL);
/* geometryCount == 1 passes the data as push constant. */
if (radv_use_bvh8(pdev) && !(state->config.update_key[0] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY)) {
uint32_t data_size = sizeof(struct vk_bvh_geometry_data) * state->build_info->geometryCount;
struct vk_bvh_geometry_data *data = malloc(data_size);
if (!data) {
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
return;
}
uint32_t first_id = 0;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
const VkAccelerationStructureGeometryKHR *geom =
state->build_info->pGeometries ? &state->build_info->pGeometries[i] : state->build_info->ppGeometries[i];
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &state->build_range_infos[i];
data[i] = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info);
first_id += build_range_info->primitiveCount;
}
radv_update_memory(cmd_buffer, scratch + layout.geometry_data_offset, data_size, data,
RADV_COPY_FLAGS_DEVICE_LOCAL);
free(data);
}
}
static void
radv_update_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Wait for update scratch initialization to finish.. */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
if (radv_device_physical(device)->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
uint32_t flags = state->config.update_key[0];
if (radv_use_bvh8(pdev)) {
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_UPDATE, update_gfx12_spv,
sizeof(update_gfx12_spv), sizeof(struct update_args), flags);
} else {
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_UPDATE, update_spv, sizeof(update_spv),
sizeof(struct update_args), flags);
}
}
static uint32_t
pack_geometry_id_and_flags(uint32_t geometry_id, uint32_t flags)
{
uint32_t geometry_id_and_flags = geometry_id;
if (flags & VK_GEOMETRY_OPAQUE_BIT_KHR)
geometry_id_and_flags |= RADV_GEOMETRY_OPAQUE;
return geometry_id_and_flags;
}
static void
radv_update_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
if (src != dst) {
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
/* Copy header/metadata */
const uint64_t src_va = vk_acceleration_structure_get_va(src);
const uint64_t dst_va = vk_acceleration_structure_get_va(dst);
radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL,
RADV_COPY_FLAGS_DEVICE_LOCAL);
}
struct update_scratch_layout layout;
radv_get_update_scratch_layout(device, state, &layout);
struct update_args update_consts = {
.src = vk_acceleration_structure_get_va(src),
.dst = vk_acceleration_structure_get_va(dst),
.leaf_bounds = state->build_info->scratchData.deviceAddress,
.internal_ready_count = state->build_info->scratchData.deviceAddress + layout.internal_ready_count_offset,
.leaf_node_count = state->leaf_node_count,
};
uint32_t first_id = 0;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
const VkAccelerationStructureGeometryKHR *geom =
state->build_info->pGeometries ? &state->build_info->pGeometries[i] : state->build_info->ppGeometries[i];
const VkAccelerationStructureBuildRangeInfoKHR *build_range_info = &state->build_range_infos[i];
update_consts.geom_data = vk_fill_geometry_data(state->build_info->type, first_id, i, geom, build_range_info);
radv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts));
radv_unaligned_dispatch(cmd_buffer, build_range_info->primitiveCount, 1, 1);
first_id += build_range_info->primitiveCount;
}
}
static void
radv_update_as_gfx12(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, state->build_info->srcAccelerationStructure);
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
if (src != dst) {
struct acceleration_structure_layout layout;
radv_get_acceleration_structure_layout(device, state, &layout);
/* Copy header/metadata */
const uint64_t src_va = vk_acceleration_structure_get_va(src);
const uint64_t dst_va = vk_acceleration_structure_get_va(dst);
radv_copy_memory(cmd_buffer, src_va, dst_va, layout.bvh_offset, RADV_COPY_FLAGS_DEVICE_LOCAL,
RADV_COPY_FLAGS_DEVICE_LOCAL);
}
struct update_scratch_layout layout;
radv_get_update_scratch_layout(device, state, &layout);
struct update_gfx12_args update_consts = {
.src = vk_acceleration_structure_get_va(src),
.dst = vk_acceleration_structure_get_va(dst),
.geom_data = state->build_info->scratchData.deviceAddress + layout.geometry_data_offset,
.bounds = state->build_info->scratchData.deviceAddress + layout.bounds_offsets,
.internal_ready_count = state->build_info->scratchData.deviceAddress + layout.internal_ready_count_offset,
.leaf_node_count = state->leaf_node_count,
};
if (state->config.update_key[0] & RADV_BUILD_FLAG_UPDATE_SINGLE_GEOMETRY) {
const VkAccelerationStructureGeometryKHR *geom =
state->build_info->pGeometries ? &state->build_info->pGeometries[0] : state->build_info->ppGeometries[0];
update_consts.geom_data0 = vk_fill_geometry_data(state->build_info->type, 0, 0, geom, state->build_range_infos);
}
radv_bvh_build_set_args(commandBuffer, &update_consts, sizeof(update_consts));
struct radv_dispatch_info dispatch = {
.ordered = true,
.unaligned = true,
.indirect_va =
vk_acceleration_structure_get_va(src) + offsetof(struct radv_accel_struct_header, update_dispatch_size[0]),
};
radv_compute_dispatch(cmd_buffer, &dispatch);
}
static const struct radix_sort_vk_target_config radix_sort_config = {
.keyval_dwords = 2,
.fill.workgroup_size_log2 = 7,
.fill.block_rows = 8,
.histogram.workgroup_size_log2 = 8,
.histogram.subgroup_size_log2 = 6,
.histogram.block_rows = 14,
.prefix.workgroup_size_log2 = 8,
.prefix.subgroup_size_log2 = 6,
.scatter.workgroup_size_log2 = 8,
.scatter.subgroup_size_log2 = 6,
.scatter.block_rows = 14,
};
static void
radv_write_buffer_cp(VkCommandBuffer commandBuffer, VkDeviceAddress addr, void *data, uint32_t size)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_update_memory_cp(cmd_buffer, addr, data, size);
}
static void
radv_flush_buffer_write_cp(VkCommandBuffer commandBuffer)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.cp_sdma_ge_use_system_memory_scope)
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2;
}
static void
radv_cmd_dispatch_unaligned(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_unaligned_dispatch(cmd_buffer, x, y, z);
}
static void
radv_cmd_fill_buffer_addr(VkCommandBuffer commandBuffer, VkDeviceAddress addr, VkDeviceSize size, uint32_t data)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_fill_memory(cmd_buffer, addr, size, data, RADV_COPY_FLAGS_DEVICE_LOCAL);
}
VkResult
radv_device_init_accel_struct_build_state(struct radv_device *device)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
mtx_lock(&device->meta_state.mtx);
if (device->meta_state.accel_struct_build.radix_sort)
goto exit;
device->meta_state.accel_struct_build.radix_sort = vk_create_radix_sort_u64(
radv_device_to_handle(device), &device->meta_state.alloc, device->meta_state.cache, radix_sort_config);
device->meta_state.accel_struct_build.build_ops = (struct vk_acceleration_structure_build_ops){
.begin_debug_marker = vk_accel_struct_cmd_begin_debug_marker,
.end_debug_marker = vk_accel_struct_cmd_end_debug_marker,
.get_build_config = radv_get_build_config,
.get_as_size = radv_get_as_size,
.get_update_scratch_size = radv_get_update_scratch_size,
.encode_bind_pipeline[1] = radv_init_header_bind_pipeline,
.encode_as[1] = radv_init_header,
.init_update_scratch = radv_init_update_scratch,
.update_bind_pipeline[0] = radv_update_bind_pipeline,
};
if (radv_use_bvh8(pdev)) {
device->meta_state.accel_struct_build.build_ops.update_as[0] = radv_update_as_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[0] = radv_encode_bind_pipeline_gfx12;
device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as_gfx12;
} else {
device->meta_state.accel_struct_build.build_ops.update_as[0] = radv_update_as;
device->meta_state.accel_struct_build.build_ops.encode_bind_pipeline[0] = radv_encode_bind_pipeline;
device->meta_state.accel_struct_build.build_ops.encode_as[0] = radv_encode_as;
device->meta_state.accel_struct_build.build_ops.leaf_spirv_override = leaf_spv;
device->meta_state.accel_struct_build.build_ops.leaf_spirv_override_size = sizeof(leaf_spv);
}
device->vk.as_build_ops = &device->meta_state.accel_struct_build.build_ops;
device->vk.write_buffer_cp = radv_write_buffer_cp;
device->vk.flush_buffer_write_cp = radv_flush_buffer_write_cp;
device->vk.cmd_dispatch_unaligned = radv_cmd_dispatch_unaligned;
device->vk.cmd_fill_buffer_addr = radv_cmd_fill_buffer_addr;
struct vk_acceleration_structure_build_args *build_args = &device->meta_state.accel_struct_build.build_args;
build_args->subgroup_size = 64;
build_args->bvh_bounds_offset = offsetof(struct radv_accel_struct_header, aabb);
build_args->root_flags_offset = offsetof(struct radv_accel_struct_header, root_flags);
build_args->propagate_cull_flags = pdev->info.gfx_level >= GFX11;
build_args->emit_markers = device->sqtt.bo;
build_args->radix_sort = device->meta_state.accel_struct_build.radix_sort;
exit:
mtx_unlock(&device->meta_state.mtx);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_meta_saved_state saved_state;
VkResult result = radv_device_init_accel_struct_build_state(device);
if (result != VK_SUCCESS) {
vk_command_buffer_set_error(&cmd_buffer->vk, result);
return;
}
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
cmd_buffer->state.current_event_type = EventInternalUnknown;
vk_cmd_build_acceleration_structures(commandBuffer, &device->vk, &device->meta_state.device, infoCount, pInfos,
ppBuildRangeInfos, &device->meta_state.accel_struct_build.build_args);
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY, copy_spv, sizeof(copy_spv),
sizeof(struct copy_args), radv_build_flags(commandBuffer, 0) & RADV_BUILD_FLAG_BVH8);
struct copy_args consts = {
.src_addr = vk_acceleration_structure_get_va(src),
.dst_addr = vk_acceleration_structure_get_va(dst),
.mode = RADV_COPY_MODE_COPY,
};
radv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, 0, NULL, NULL);
radv_CmdDispatchIndirect(commandBuffer, vk_buffer_to_handle(src->buffer),
src->offset + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceAccelerationStructureCompatibilityKHR(VkDevice _device,
const VkAccelerationStructureVersionInfoKHR *pVersionInfo,
VkAccelerationStructureCompatibilityKHR *pCompatibility)
{
VK_FROM_HANDLE(radv_device, device, _device);
const struct radv_physical_device *pdev = radv_device_physical(device);
bool compat = memcmp(pVersionInfo->pVersionData, pdev->driver_uuid, VK_UUID_SIZE) == 0 &&
memcmp(pVersionInfo->pVersionData + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE) == 0;
*pCompatibility = compat ? VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR
: VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY, copy_spv, sizeof(copy_spv),
sizeof(struct copy_args), radv_build_flags(commandBuffer, 0) & RADV_BUILD_FLAG_BVH8);
const struct copy_args consts = {
.src_addr = pInfo->src.deviceAddress,
.dst_addr = vk_acceleration_structure_get_va(dst),
.mode = RADV_COPY_MODE_DESERIALIZE,
};
radv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
radv_CmdDispatchBase(commandBuffer, 0, 0, 0, 512, 1, 1);
if (radv_use_bvh8(pdev)) {
/* Wait for the main copy dispatch to finish. */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY_BLAS_ADDRS_GFX12,
copy_blas_addrs_gfx12_spv, sizeof(copy_blas_addrs_gfx12_spv),
sizeof(struct copy_args), 0);
radv_CmdDispatchBase(commandBuffer, 0, 0, 0, 256, 1, 1);
}
radv_meta_restore(&saved_state, cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
radv_CmdCopyAccelerationStructureToMemoryKHR(VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_meta_saved_state saved_state;
radv_meta_save(&saved_state, cmd_buffer,
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_CONSTANTS);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY, copy_spv, sizeof(copy_spv),
sizeof(struct copy_args), radv_build_flags(commandBuffer, 0) & RADV_BUILD_FLAG_BVH8);
const struct copy_args consts = {
.src_addr = vk_acceleration_structure_get_va(src),
.dst_addr = pInfo->dst.deviceAddress,
.mode = RADV_COPY_MODE_SERIALIZE,
};
radv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT,
VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT, 0, NULL, NULL);
radv_CmdDispatchIndirect(commandBuffer, vk_buffer_to_handle(src->buffer),
src->offset + offsetof(struct radv_accel_struct_header, copy_dispatch_size));
if (radv_use_bvh8(pdev)) {
/* Wait for the main copy dispatch to finish. */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_WRITE_BIT, 0, NULL, NULL) |
radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
VK_ACCESS_2_SHADER_READ_BIT, 0, NULL, NULL);
radv_bvh_build_bind_pipeline(commandBuffer, RADV_META_OBJECT_KEY_BVH_COPY_BLAS_ADDRS_GFX12,
copy_blas_addrs_gfx12_spv, sizeof(copy_blas_addrs_gfx12_spv),
sizeof(struct copy_args), 0);
radv_CmdDispatchBase(commandBuffer, 0, 0, 0, 256, 1, 1);
}
radv_meta_restore(&saved_state, cmd_buffer);
/* Set the header of the serialized data. */
uint8_t header_data[2 * VK_UUID_SIZE];
memcpy(header_data, pdev->driver_uuid, VK_UUID_SIZE);
memcpy(header_data + VK_UUID_SIZE, pdev->cache_uuid, VK_UUID_SIZE);
radv_update_memory_cp(cmd_buffer, pInfo->dst.deviceAddress, header_data, sizeof(header_data));
}