mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-20 15:38:19 +02:00
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40387>
784 lines
29 KiB
C
784 lines
29 KiB
C
/* Copyright © 2024 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "anv_private.h"
|
|
|
|
#include <math.h>
|
|
|
|
#include "util/u_debug.h"
|
|
#include "util/half_float.h"
|
|
#include "util/u_atomic.h"
|
|
|
|
#include "genxml/gen_macros.h"
|
|
#include "genxml/genX_pack.h"
|
|
#include "genxml/genX_rt_pack.h"
|
|
|
|
#include "ds/intel_tracepoints.h"
|
|
|
|
#include "bvh/anv_build_interface.h"
|
|
#include "vk_acceleration_structure.h"
|
|
#include "radix_sort/radix_sort_u64.h"
|
|
#include "radix_sort/common/vk/barrier.h"
|
|
|
|
#include "vk_common_entrypoints.h"
|
|
#include "genX_mi_builder.h"
|
|
|
|
#if GFX_VERx10 >= 125
|
|
|
|
/* Id to track bvh_dump */
|
|
static uint32_t blas_id = 0;
|
|
static uint32_t tlas_id = 0;
|
|
|
|
static void
|
|
begin_debug_marker(VkCommandBuffer commandBuffer,
|
|
struct vk_acceleration_structure_build_marker *marker)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
switch (marker->step) {
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
|
|
trace_intel_begin_as_build(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
|
|
trace_intel_begin_as_build_leaves(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
|
|
trace_intel_begin_as_morton_generate(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
|
|
trace_intel_begin_as_morton_sort(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
|
|
trace_intel_begin_as_lbvh_build_internal(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
|
|
trace_intel_begin_as_ploc_build_internal(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
|
|
trace_intel_begin_as_encode(&cmd_buffer->trace);
|
|
break;
|
|
default:
|
|
UNREACHABLE("Invalid build step");
|
|
}
|
|
}
|
|
|
|
static void
|
|
end_debug_marker(VkCommandBuffer commandBuffer,
|
|
struct vk_acceleration_structure_build_marker *marker)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
switch (marker->step) {
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_TOP:
|
|
trace_intel_end_as_build(&cmd_buffer->trace,
|
|
marker->top.tlas_count,
|
|
marker->top.blas_count);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_BUILD_LEAVES:
|
|
trace_intel_end_as_build_leaves(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_GENERATE:
|
|
trace_intel_end_as_morton_generate(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_MORTON_SORT:
|
|
trace_intel_end_as_morton_sort(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_LBVH_BUILD_INTERNAL:
|
|
trace_intel_end_as_lbvh_build_internal(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_PLOC_BUILD_INTERNAL:
|
|
trace_intel_end_as_ploc_build_internal(&cmd_buffer->trace);
|
|
break;
|
|
case VK_ACCELERATION_STRUCTURE_BUILD_STEP_ENCODE:
|
|
trace_intel_end_as_encode(&cmd_buffer->trace,
|
|
marker->encode.pass,
|
|
marker->encode.key,
|
|
marker->encode.leaf_node_count,
|
|
marker->encode.internal_node_count);
|
|
break;
|
|
default:
|
|
UNREACHABLE("Invalid build step");
|
|
}
|
|
}
|
|
|
|
static void
|
|
add_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
|
|
VkDeviceAddress src,
|
|
uint64_t dump_size,
|
|
VkGeometryTypeKHR geometry_type,
|
|
enum bvh_dump_type dump_type)
|
|
{
|
|
assert(dump_size % 4 == 0);
|
|
assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
|
|
|
struct anv_device *device = cmd_buffer->device;
|
|
struct anv_bo *bo = NULL;
|
|
|
|
VkResult result = anv_device_alloc_bo(device, "bvh_dump", dump_size,
|
|
ANV_BO_ALLOC_MAPPED |
|
|
ANV_BO_ALLOC_HOST_CACHED_COHERENT, 0,
|
|
&bo);
|
|
if (result != VK_SUCCESS) {
|
|
printf("Failed to allocate bvh for dump\n");
|
|
vk_command_buffer_set_error(&cmd_buffer->vk, result);
|
|
return;
|
|
}
|
|
|
|
struct anv_bvh_dump *bvh_dump = malloc(sizeof(struct anv_bvh_dump));
|
|
|
|
bvh_dump->bo = bo;
|
|
bvh_dump->bvh_id = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
|
|
tlas_id : blas_id;
|
|
bvh_dump->dump_size = dump_size;
|
|
bvh_dump->geometry_type = geometry_type;
|
|
bvh_dump->dump_type = dump_type;
|
|
|
|
struct anv_address dst_addr = { .bo = bvh_dump->bo, .offset = 0 };
|
|
struct anv_address src_addr = anv_address_from_u64(src);
|
|
|
|
vk_barrier_compute_w_to_compute_r(vk_command_buffer_to_handle(&cmd_buffer->vk));
|
|
anv_cmd_copy_addr(cmd_buffer, src_addr, dst_addr, bvh_dump->dump_size);
|
|
|
|
/* Add host barrier to read BVH data. */
|
|
vk_barrier_compute_w_to_host_r(vk_command_buffer_to_handle(&cmd_buffer->vk));
|
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
|
|
|
list_addtail(&bvh_dump->link, &cmd_buffer->bvh_dumps);
|
|
}
|
|
|
|
static void
|
|
debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
|
|
VkDeviceAddress header_addr,
|
|
uint64_t bvh_anv_size,
|
|
VkDeviceAddress intermediate_header_addr,
|
|
VkDeviceAddress intermediate_as_addr,
|
|
uint32_t leaf_count,
|
|
VkGeometryTypeKHR geometry_type)
|
|
{
|
|
if (INTEL_DEBUG(DEBUG_BVH_BLAS) &&
|
|
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
|
|
BVH_ANV);
|
|
}
|
|
|
|
if (INTEL_DEBUG(DEBUG_BVH_TLAS) &&
|
|
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
add_bvh_dump(cmd_buffer, header_addr, bvh_anv_size, geometry_type,
|
|
BVH_ANV);
|
|
}
|
|
|
|
if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_HDR) &&
|
|
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
add_bvh_dump(cmd_buffer, intermediate_header_addr,
|
|
sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
|
|
}
|
|
|
|
if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_HDR) &&
|
|
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
add_bvh_dump(cmd_buffer, intermediate_header_addr,
|
|
sizeof(struct vk_ir_header), geometry_type, BVH_IR_HDR);
|
|
}
|
|
|
|
uint32_t internal_node_count = MAX2(leaf_count, 2) - 1;
|
|
uint64_t internal_node_total_size = sizeof(struct vk_ir_box_node) *
|
|
internal_node_count;
|
|
|
|
if (INTEL_DEBUG(DEBUG_BVH_BLAS_IR_AS) &&
|
|
geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
uint64_t leaf_total_size;
|
|
|
|
switch (geometry_type) {
|
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
|
leaf_total_size = sizeof(struct vk_ir_triangle_node) * leaf_count;
|
|
break;
|
|
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
|
leaf_total_size = sizeof(struct vk_ir_aabb_node) * leaf_count;
|
|
break;
|
|
default:
|
|
UNREACHABLE("invalid geometry type");
|
|
}
|
|
|
|
add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size +
|
|
leaf_total_size, geometry_type, BVH_IR_AS);
|
|
}
|
|
|
|
if (INTEL_DEBUG(DEBUG_BVH_TLAS_IR_AS) &&
|
|
geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
uint64_t leaf_total_size = sizeof(struct vk_ir_instance_node) *
|
|
leaf_count;
|
|
add_bvh_dump(cmd_buffer, intermediate_as_addr, internal_node_total_size +
|
|
leaf_total_size, geometry_type, BVH_IR_AS);
|
|
}
|
|
|
|
|
|
if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
tlas_id++;
|
|
} else {
|
|
blas_id++;
|
|
}
|
|
}
|
|
|
|
#define STRINGIFY_HELPER(x) #x
|
|
#define STRINGIFY(x) STRINGIFY_HELPER(x)
|
|
|
|
#define ENCODE_SPV_PATH STRINGIFY(bvh/genX(encode).spv.h)
|
|
#define HEADER_SPV_PATH STRINGIFY(bvh/genX(header).spv.h)
|
|
#define COPY_SPV_PATH STRINGIFY(bvh/genX(copy).spv.h)
|
|
|
|
static const uint32_t encode_spv[] = {
|
|
#include ENCODE_SPV_PATH
|
|
};
|
|
|
|
static const uint32_t header_spv[] = {
|
|
#include HEADER_SPV_PATH
|
|
};
|
|
|
|
static const uint32_t copy_spv[] = {
|
|
#include COPY_SPV_PATH
|
|
};
|
|
|
|
static void
|
|
get_bvh_layout(VkGeometryTypeKHR geometry_type, uint32_t leaf_count,
|
|
struct bvh_layout *layout)
|
|
{
|
|
uint32_t internal_count = MAX2(leaf_count, 2) - 1;
|
|
|
|
uint64_t offset = ANV_RT_BVH_HEADER_SIZE;
|
|
|
|
/* This is where internal_nodes/leaves start to be encoded.
|
|
*
|
|
* NOTE: Root node offset is fixed to 256 so make sure you don't add
|
|
* anything above this offset.
|
|
*/
|
|
layout->bvh_offset = offset;
|
|
|
|
offset += internal_count * ANV_RT_INTERNAL_NODE_SIZE;
|
|
|
|
switch (geometry_type) {
|
|
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
|
/* Currently we encode one triangle within one quad leaf */
|
|
offset += leaf_count * ANV_RT_QUAD_LEAF_SIZE;
|
|
break;
|
|
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
|
offset += leaf_count * ANV_RT_PROCEDURAL_LEAF_SIZE;
|
|
break;
|
|
case VK_GEOMETRY_TYPE_INSTANCES_KHR:
|
|
offset += leaf_count * ANV_RT_INSTANCE_LEAF_SIZE;
|
|
break;
|
|
default:
|
|
UNREACHABLE("Unknown VkGeometryTypeKHR");
|
|
}
|
|
|
|
offset = align64(offset, 64);
|
|
layout->instance_leaves_offset = offset;
|
|
|
|
/* For a TLAS, we store the address of anv_instance_leaf after header
|
|
* This is for quick access in the copy.comp
|
|
*/
|
|
if (geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR) {
|
|
offset += leaf_count * sizeof(uint64_t);
|
|
}
|
|
|
|
layout->size = align64(offset, 64);
|
|
}
|
|
|
|
static VkDeviceSize
|
|
anv_get_as_size(VkDevice device, const struct vk_acceleration_structure_build_state *state)
|
|
{
|
|
struct bvh_layout layout;
|
|
get_bvh_layout(vk_get_as_geometry_type(state->build_info), state->leaf_node_count, &layout);
|
|
return layout.size;
|
|
}
|
|
|
|
static void
|
|
anv_get_build_config(VkDevice device, struct vk_acceleration_structure_build_state *state)
|
|
{
|
|
VkBuildAccelerationStructureFlagsKHR flags = state->build_info->flags;
|
|
|
|
/* TODO: ANV does not yet have support for AS updates without doing a full
|
|
* rebuild, this means that AS updates can cause their size to increase.
|
|
*
|
|
* The Vulkan spec says that the maximum size required for updating a
|
|
* compacted AS will be the "compacted size" that can be queried from it
|
|
* after the initial build, so in order for apps to behave we must report
|
|
* the compacted size of an updatable AS as the maximum possible size for
|
|
* any AS that could also be built from the same number of leaf nodes.
|
|
*/
|
|
state->config.encode_key[1] =
|
|
((flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR) &&
|
|
!(flags & VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR)) ? 1 : 0;
|
|
}
|
|
|
|
static void
|
|
anv_bvh_build_bind_pipeline(VkCommandBuffer commandBuffer,
|
|
enum anv_object_key_bvh_type type,
|
|
const uint32_t *spirv, uint32_t spirv_size,
|
|
uint32_t push_constant_size, uint32_t flags)
|
|
{
|
|
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
struct anv_device *device = cmd_buffer->device;
|
|
|
|
VkPipeline pipeline;
|
|
VkResult result = vk_get_bvh_build_pipeline_spv(&device->vk,
|
|
&device->meta_device,
|
|
(enum anv_object_key_bvh_type)type,
|
|
spirv, spirv_size, push_constant_size,
|
|
&device->accel_struct_build.build_args,
|
|
flags, &pipeline, false);
|
|
if (result != VK_SUCCESS) {
|
|
vk_command_buffer_set_error(&cmd_buffer->vk, result);
|
|
return;
|
|
}
|
|
|
|
device->vk.dispatch_table.CmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
|
|
pipeline);
|
|
}
|
|
|
|
static void
|
|
anv_bvh_build_set_args(VkCommandBuffer commandBuffer, const void *args,
|
|
uint32_t size)
|
|
{
|
|
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
struct anv_device *device = cmd_buffer->device;
|
|
VkPipelineLayout layout;
|
|
vk_get_bvh_build_pipeline_layout(&device->vk, &device->meta_device, size,
|
|
&layout);
|
|
|
|
VkPushConstantsInfo push_info = {
|
|
.sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO,
|
|
.layout = layout,
|
|
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
|
.offset = 0,
|
|
.size = size,
|
|
.pValues = args,
|
|
};
|
|
|
|
anv_CmdPushConstants2(commandBuffer, &push_info);
|
|
}
|
|
|
|
static VkResult
|
|
anv_encode_prepare(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
|
|
{
|
|
anv_bvh_build_bind_pipeline(commandBuffer,
|
|
ANV_OBJECT_KEY_BVH_ENCODE,
|
|
encode_spv, sizeof(encode_spv),
|
|
sizeof(struct encode_args), 0);
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
/* Helper to zero out the output BVH. */
|
|
static void
|
|
anv_clear_out_bvh(struct anv_cmd_buffer *cmd_buffer,
|
|
VkDeviceAddress output_bvh_addr, uint64_t bvh_size)
|
|
{
|
|
assert(bvh_size % 4 == 0);
|
|
struct anv_address anv_bvh_addr = anv_address_from_u64(output_bvh_addr);
|
|
|
|
anv_cmd_buffer_fill_area(cmd_buffer, anv_bvh_addr, bvh_size, 0 /* data */);
|
|
|
|
vk_barrier_compute_w_to_compute_r(vk_command_buffer_to_handle(&cmd_buffer->vk));
|
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
|
}
|
|
|
|
|
|
static void
|
|
anv_encode_as(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
|
|
{
|
|
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
|
|
|
|
struct bvh_layout bvh_layout;
|
|
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
|
get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout);
|
|
|
|
if (INTEL_DEBUG(DEBUG_BVH_NO_BUILD)) {
|
|
/* Zero out the whole BVH when we run with BVH_NO_BUILD debug option. */
|
|
anv_clear_out_bvh(cmd_buffer,
|
|
vk_acceleration_structure_get_va(dst) + bvh_layout.bvh_offset,
|
|
bvh_layout.size);
|
|
return;
|
|
}
|
|
|
|
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
|
|
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
|
|
|
|
STATIC_ASSERT(sizeof(struct anv_accel_struct_header) == ANV_RT_BVH_HEADER_SIZE);
|
|
STATIC_ASSERT(sizeof(struct anv_instance_leaf) == ANV_RT_INSTANCE_LEAF_SIZE);
|
|
STATIC_ASSERT(sizeof(struct anv_quad_leaf_node) == ANV_RT_QUAD_LEAF_SIZE);
|
|
STATIC_ASSERT(sizeof(struct anv_procedural_leaf_node) == ANV_RT_PROCEDURAL_LEAF_SIZE);
|
|
STATIC_ASSERT(sizeof(struct anv_internal_node) == ANV_RT_INTERNAL_NODE_SIZE);
|
|
|
|
const struct encode_args args = {
|
|
.intermediate_bvh = intermediate_bvh_addr,
|
|
.output_bvh = vk_acceleration_structure_get_va(dst) +
|
|
bvh_layout.bvh_offset,
|
|
.header = intermediate_header_addr,
|
|
.output_bvh_offset = bvh_layout.bvh_offset,
|
|
.leaf_node_count = state->leaf_node_count,
|
|
.geometry_type = geometry_type,
|
|
.instance_leaves_addr = vk_acceleration_structure_get_va(dst) +
|
|
bvh_layout.instance_leaves_offset,
|
|
};
|
|
anv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
|
|
|
|
anv_genX(cmd_buffer->device->info, cmd_dispatch_unaligned)
|
|
(commandBuffer, MAX2(state->leaf_node_count, 1), 1, 1);
|
|
}
|
|
|
|
static VkResult
|
|
anv_init_header_bind_pipeline(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
|
|
{
|
|
/* Add a barrier to ensure the writes from encode.comp is ready to be
|
|
* read by header.comp
|
|
*/
|
|
vk_barrier_compute_w_to_compute_r(commandBuffer);
|
|
|
|
anv_bvh_build_bind_pipeline(commandBuffer,
|
|
ANV_OBJECT_KEY_BVH_HEADER,
|
|
header_spv, sizeof(header_spv),
|
|
sizeof(struct header_args), 0);
|
|
|
|
return VK_SUCCESS;
|
|
}
|
|
|
|
static void
|
|
anv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_structure_build_state *state)
|
|
{
|
|
VK_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, dst, state->build_info->dstAccelerationStructure);
|
|
|
|
uint64_t intermediate_header_addr = state->build_info->scratchData.deviceAddress + state->scratch.header_offset;
|
|
uint64_t intermediate_bvh_addr = state->build_info->scratchData.deviceAddress + state->scratch.ir_offset;
|
|
|
|
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
|
|
|
|
struct bvh_layout bvh_layout;
|
|
get_bvh_layout(geometry_type, state->leaf_node_count, &bvh_layout);
|
|
|
|
VkDeviceAddress header_addr = vk_acceleration_structure_get_va(dst);
|
|
|
|
uint32_t instance_count = geometry_type == VK_GEOMETRY_TYPE_INSTANCES_KHR ?
|
|
state->leaf_node_count : 0;
|
|
|
|
struct header_args args = {
|
|
.src = intermediate_header_addr,
|
|
.dst = vk_acceleration_structure_get_va(dst),
|
|
.bvh_offset = bvh_layout.bvh_offset,
|
|
.instance_count = instance_count,
|
|
.instance_leaves_offset = bvh_layout.instance_leaves_offset,
|
|
.is_compacted = (state->config.encode_key[1] == 1),
|
|
.bvh_size = bvh_layout.size,
|
|
};
|
|
|
|
anv_bvh_build_set_args(commandBuffer, &args, sizeof(args));
|
|
vk_common_CmdDispatch(commandBuffer, 1, 1, 1);
|
|
|
|
if (INTEL_DEBUG_BVH_ANY) {
|
|
debug_record_as_to_bvh_dump(cmd_buffer, header_addr, bvh_layout.size,
|
|
intermediate_header_addr, intermediate_bvh_addr,
|
|
state->leaf_node_count, geometry_type);
|
|
}
|
|
}
|
|
|
|
static const struct vk_acceleration_structure_build_ops anv_build_ops = {
|
|
.begin_debug_marker = begin_debug_marker,
|
|
.end_debug_marker = end_debug_marker,
|
|
.get_as_size = anv_get_as_size,
|
|
.get_build_config = anv_get_build_config,
|
|
.encode_prepare = { anv_encode_prepare, anv_init_header_bind_pipeline },
|
|
.encode_as = { anv_encode_as, anv_init_header },
|
|
};
|
|
|
|
static VkResult
|
|
anv_device_init_accel_struct_build_state(struct anv_device *device)
|
|
{
|
|
VkResult result = VK_SUCCESS;
|
|
simple_mtx_lock(&device->accel_struct_build.mutex);
|
|
|
|
if (device->accel_struct_build.radix_sort)
|
|
goto exit;
|
|
|
|
const struct radix_sort_vk_target_config radix_sort_config = {
|
|
.keyval_dwords = 2,
|
|
.init = { .workgroup_size_log2 = 8, },
|
|
.fill = { .workgroup_size_log2 = 8, .block_rows = 8 },
|
|
.histogram = {
|
|
.workgroup_size_log2 = 8,
|
|
.subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
|
|
.block_rows = 14,
|
|
},
|
|
.prefix = {
|
|
.workgroup_size_log2 = 8,
|
|
.subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
|
|
},
|
|
.scatter = {
|
|
.workgroup_size_log2 = 8,
|
|
.subgroup_size_log2 = device->info->ver >= 20 ? 4 : 3,
|
|
.block_rows = 14,
|
|
},
|
|
};
|
|
|
|
device->accel_struct_build.radix_sort =
|
|
vk_create_radix_sort_u64(anv_device_to_handle(device),
|
|
&device->vk.alloc,
|
|
VK_NULL_HANDLE, radix_sort_config);
|
|
|
|
device->vk.as_build_ops = &anv_build_ops;
|
|
device->vk.write_buffer_cp = anv_cmd_write_buffer_cp;
|
|
device->vk.flush_buffer_write_cp = anv_cmd_flush_buffer_write_cp;
|
|
device->vk.cmd_dispatch_unaligned = anv_cmd_dispatch_unaligned;
|
|
device->vk.cmd_fill_buffer_addr = anv_cmd_fill_buffer_addr;
|
|
|
|
device->accel_struct_build.build_args =
|
|
(struct vk_acceleration_structure_build_args) {
|
|
.emit_markers = u_trace_enabled(&device->ds.trace_context),
|
|
.subgroup_size = device->info->ver >= 20 ? 16 : 8,
|
|
.radix_sort_64 = device->accel_struct_build.radix_sort,
|
|
/* See struct anv_accel_struct_header from anv_bvh.h
|
|
*
|
|
* Root pointer starts at offset 0 and bound box start at offset 8.
|
|
*/
|
|
.bvh_bounds_offset = 8,
|
|
};
|
|
|
|
exit:
|
|
simple_mtx_unlock(&device->accel_struct_build.mutex);
|
|
return result;
|
|
}
|
|
|
|
void
|
|
genX(GetAccelerationStructureBuildSizesKHR)(
|
|
VkDevice _device,
|
|
VkAccelerationStructureBuildTypeKHR buildType,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR* pBuildInfo,
|
|
const uint32_t* pMaxPrimitiveCounts,
|
|
VkAccelerationStructureBuildSizesInfoKHR* pSizeInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
if (anv_device_init_accel_struct_build_state(device) != VK_SUCCESS)
|
|
return;
|
|
|
|
vk_get_as_build_sizes(_device, buildType, pBuildInfo, pMaxPrimitiveCounts,
|
|
pSizeInfo, &device->accel_struct_build.build_args);
|
|
}
|
|
|
|
void
|
|
genX(GetDeviceAccelerationStructureCompatibilityKHR)(
|
|
VkDevice _device,
|
|
const VkAccelerationStructureVersionInfoKHR* pVersionInfo,
|
|
VkAccelerationStructureCompatibilityKHR* pCompatibility)
|
|
{
|
|
ANV_FROM_HANDLE(anv_device, device, _device);
|
|
struct vk_accel_struct_serialization_header* ser_header =
|
|
(struct vk_accel_struct_serialization_header*)(pVersionInfo->pVersionData);
|
|
|
|
if (memcmp(ser_header->accel_struct_compat,
|
|
device->physical->rt_uuid,
|
|
sizeof(device->physical->rt_uuid)) == 0) {
|
|
*pCompatibility = VK_ACCELERATION_STRUCTURE_COMPATIBILITY_COMPATIBLE_KHR;
|
|
} else {
|
|
*pCompatibility =
|
|
VK_ACCELERATION_STRUCTURE_COMPATIBILITY_INCOMPATIBLE_KHR;
|
|
}
|
|
}
|
|
|
|
void
|
|
genX(CmdBuildAccelerationStructuresKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
uint32_t infoCount,
|
|
const VkAccelerationStructureBuildGeometryInfoKHR* pInfos,
|
|
const VkAccelerationStructureBuildRangeInfoKHR* const* ppBuildRangeInfos)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
|
|
struct anv_device *device = cmd_buffer->device;
|
|
|
|
VkResult result = anv_device_init_accel_struct_build_state(device);
|
|
if (result != VK_SUCCESS) {
|
|
vk_command_buffer_set_error(&cmd_buffer->vk, result);
|
|
return;
|
|
}
|
|
|
|
struct anv_cmd_saved_state saved;
|
|
anv_cmd_buffer_save_state(cmd_buffer,
|
|
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
|
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
|
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
|
|
|
/* Apply any outstanding accumulated PC bits before we proceed on building
|
|
* Acceleration Structure.
|
|
*
|
|
* 2 reasons for this :
|
|
* - some of the data accessed by the build might need to be flushed as a
|
|
* result of a previous barrier
|
|
* - the scratch buffer might get reused between builds
|
|
*/
|
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
|
|
|
vk_cmd_build_acceleration_structures(commandBuffer, &device->vk,
|
|
&device->meta_device, infoCount,
|
|
pInfos, ppBuildRangeInfos,
|
|
&device->accel_struct_build.build_args);
|
|
|
|
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
|
}
|
|
|
|
void
|
|
genX(CmdCopyAccelerationStructureKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyAccelerationStructureInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
|
|
|
|
trace_intel_begin_as_copy(&cmd_buffer->trace);
|
|
|
|
struct anv_cmd_saved_state saved;
|
|
anv_cmd_buffer_save_state(cmd_buffer,
|
|
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
|
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
|
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
|
|
|
anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_COPY,
|
|
copy_spv, sizeof(copy_spv),
|
|
sizeof(struct copy_args), 0);
|
|
struct copy_args consts = {
|
|
.src_addr = vk_acceleration_structure_get_va(src),
|
|
.dst_addr = vk_acceleration_structure_get_va(dst),
|
|
.mode = ANV_COPY_MODE_COPY,
|
|
};
|
|
anv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
|
|
|
|
/* L1/L2 caches flushes should have been dealt with by pipeline barriers.
|
|
* Unfortunately some platforms require L3 flush because CS (reading the
|
|
* dispatch paramters) is not L3 coherent.
|
|
*/
|
|
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
VK_PIPELINE_STAGE_2_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
|
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
|
|
"bvh size read for dispatch");
|
|
}
|
|
|
|
anv_genX(cmd_buffer->device->info, CmdDispatchIndirect2KHR)(
|
|
commandBuffer,
|
|
&(VkDispatchIndirect2InfoKHR) {
|
|
.sType = VK_STRUCTURE_TYPE_DISPATCH_INDIRECT_2_INFO_KHR,
|
|
.addressRange = {
|
|
.address = vk_acceleration_structure_get_va(src) +
|
|
offsetof(struct anv_accel_struct_header,
|
|
copy_dispatch_size),
|
|
.size = src->size,
|
|
},
|
|
});
|
|
|
|
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
|
|
|
trace_intel_end_as_copy(&cmd_buffer->trace);
|
|
}
|
|
|
|
void
|
|
genX(CmdCopyAccelerationStructureToMemoryKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyAccelerationStructureToMemoryInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
|
|
struct anv_device *device = cmd_buffer->device;
|
|
|
|
trace_intel_begin_as_copy(&cmd_buffer->trace);
|
|
|
|
struct anv_cmd_saved_state saved;
|
|
anv_cmd_buffer_save_state(cmd_buffer,
|
|
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
|
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
|
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
|
|
|
anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_COPY,
|
|
copy_spv, sizeof(copy_spv),
|
|
sizeof(struct copy_args), 0);
|
|
struct copy_args consts = {
|
|
.src_addr = vk_acceleration_structure_get_va(src),
|
|
.dst_addr = pInfo->dst.deviceAddress,
|
|
.mode = ANV_COPY_MODE_SERIALIZE,
|
|
};
|
|
|
|
memcpy(consts.driver_uuid, device->physical->driver_uuid, VK_UUID_SIZE);
|
|
memcpy(consts.accel_struct_compat, device->physical->rt_uuid, VK_UUID_SIZE);
|
|
anv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
|
|
|
|
/* L1/L2 caches flushes should have been dealt with by pipeline barriers.
|
|
* Unfortunately some platforms require L3 flush because CS (reading the
|
|
* dispatch paramters) is not L3 coherent.
|
|
*/
|
|
if (!ANV_DEVINFO_HAS_COHERENT_L3_CS(cmd_buffer->device->info)) {
|
|
anv_add_pending_pipe_bits(cmd_buffer,
|
|
VK_PIPELINE_STAGE_2_TRANSFER_BIT,
|
|
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
|
|
ANV_PIPE_DATA_CACHE_FLUSH_BIT,
|
|
"bvh size read for dispatch");
|
|
}
|
|
|
|
anv_genX(device->info, CmdDispatchIndirect2KHR)(
|
|
commandBuffer,
|
|
&(VkDispatchIndirect2InfoKHR) {
|
|
.sType = VK_STRUCTURE_TYPE_DISPATCH_INDIRECT_2_INFO_KHR,
|
|
.addressRange = {
|
|
.address = vk_acceleration_structure_get_va(src) +
|
|
offsetof(struct anv_accel_struct_header,
|
|
copy_dispatch_size),
|
|
.size = src->size,
|
|
},
|
|
});
|
|
|
|
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
|
|
|
trace_intel_end_as_copy(&cmd_buffer->trace);
|
|
}
|
|
|
|
void
|
|
genX(CmdCopyMemoryToAccelerationStructureKHR)(
|
|
VkCommandBuffer commandBuffer,
|
|
const VkCopyMemoryToAccelerationStructureInfoKHR* pInfo)
|
|
{
|
|
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
|
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
|
|
|
|
trace_intel_begin_as_copy(&cmd_buffer->trace);
|
|
|
|
struct anv_cmd_saved_state saved;
|
|
anv_cmd_buffer_save_state(cmd_buffer,
|
|
ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
|
|
ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_ALL |
|
|
ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, &saved);
|
|
|
|
anv_bvh_build_bind_pipeline(commandBuffer, ANV_OBJECT_KEY_BVH_COPY,
|
|
copy_spv, sizeof(copy_spv),
|
|
sizeof(struct copy_args), 0);
|
|
|
|
const struct copy_args consts = {
|
|
.src_addr = pInfo->src.deviceAddress,
|
|
.dst_addr = vk_acceleration_structure_get_va(dst),
|
|
.mode = ANV_COPY_MODE_DESERIALIZE,
|
|
};
|
|
anv_bvh_build_set_args(commandBuffer, &consts, sizeof(consts));
|
|
|
|
vk_common_CmdDispatch(commandBuffer, 512, 1, 1);
|
|
anv_cmd_buffer_restore_state(cmd_buffer, &saved);
|
|
|
|
trace_intel_end_as_copy(&cmd_buffer->trace);
|
|
}
|
|
|
|
void
|
|
genX(DestroyAccelerationStructureKHR)(
|
|
VkDevice _device,
|
|
VkAccelerationStructureKHR accelerationStructure,
|
|
const VkAllocationCallbacks* pAllocator)
|
|
{
|
|
vk_common_DestroyAccelerationStructureKHR(_device, accelerationStructure,
|
|
pAllocator);
|
|
}
|
|
#endif
|