mesa/src/intel/vulkan/bvh/copy.comp
Konstantin Seurer ea51a67996 vulkan/bvh: Enable glsl extensions in meson
Having a list of all enabled/used extensions in meson allows us to get
rid of a lot of boilerplate in every bvh build shader.

Reviewed-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35326>
2025-09-16 20:18:01 +00:00

140 lines
5.8 KiB
Text

/* Copyright © 2022 Bas Nieuwenhuizen
* Copyright © 2024 Intel Coorporation
* SPDX-License-Identifier: MIT
*/
#version 460
layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;
#include "anv_build_interface.h"
layout(push_constant) uniform CONSTS {
copy_args args;
};
// Layout of serialized data
/**************************************|
| vk_accel_struct_serialization_header |
|--------------------------------------|
| For a TLAS, all handles to the BLAS |
| within this TLAS. |
| For a BLAS, nothing. |
|--------------------------------------|
| Driver-specific part. |
| For Intel, this starts with |
| anv_accel_struct_header as drawn |
| in anv_bvh.h |
|**************************************/
/*
* Explanation of BLAS handles:
* According to the spec of vkCmdCopyAccelerationStructureToMemoryKHR,
* for a TLAS, the handles of all BLAS/instances within this TLAS are
* tightly stored after vk_accel_struct_serialization_header, making this
* serialized-memory a semi-opaque object. The application might be able
* to swap/replace these handles with other handles. In fact this is what
* dEQP-VK.ray_tracing_pipeline.acceleration_structures.header_bottom_address.*
* is doing.
*
* Therefore, if the application updates the handles, we need to replace
* the old handles in anv_instance_leaf with the new one. To access
* anv_instance_leaf without traversing the TLAS, pointers to these
* anv_instance_leaf are stored right after anv_accel_struct_header,
* allowing us to know where they are in the TLAS instantly.
*
* Although, the fact that the application can swap/replace new handles
* of BLAS without rebuilding the TLAS sounds a bit odd.
*/
void
main(void)
{
uint32_t global_id = gl_GlobalInvocationID.x;
uint32_t lanes = gl_NumWorkGroups.x * 128;
uint32_t increment = lanes * 8;
uint64_t copy_src_addr = args.src_addr;
uint64_t copy_dst_addr = args.dst_addr;
if (args.mode == ANV_COPY_MODE_DESERIALIZE) {
copy_src_addr += SIZEOF(vk_accel_struct_serialization_header) +
DEREF(REF(vk_accel_struct_serialization_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
}
REF(anv_accel_struct_header) header = REF(anv_accel_struct_header)(copy_src_addr);
uint64_t instance_base = args.src_addr + SIZEOF(vk_accel_struct_serialization_header);
uint64_t instance_offset = SIZEOF(anv_accel_struct_header);
/* We store the address of instance_leaf after bvh header */
uint64_t instance_end = DEREF(header).instance_count * SIZEOF(uint64_t);
if (instance_end > 0)
instance_end += instance_offset;
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
copy_dst_addr += SIZEOF(vk_accel_struct_serialization_header) +
DEREF(REF(anv_accel_struct_header)(args.src_addr)).instance_count * SIZEOF(uint64_t);
if (global_id == 0) {
REF(vk_accel_struct_serialization_header) ser_header =
REF(vk_accel_struct_serialization_header)(args.dst_addr);
DEREF(ser_header).serialization_size = DEREF(header).serialization_size;
DEREF(ser_header).deserialization_size = DEREF(header).compacted_size;
DEREF(ser_header).instance_count = DEREF(header).instance_count;
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
DEREF(ser_header).driver_uuid[offset] = args.driver_uuid[offset];
}
for (uint32_t offset = 0; offset < VK_UUID_SIZE; offset++) {
DEREF(ser_header).accel_struct_compat[offset] = args.accel_struct_compat[offset];
}
}
instance_base = args.dst_addr + SIZEOF(vk_accel_struct_serialization_header);
} else if (args.mode == ANV_COPY_MODE_COPY) {
instance_end = 0;
}
uint64_t size = DEREF(header).compacted_size;
for (uint64_t offset = global_id * 8; offset < size; offset += increment) {
/* copy 8 bytes per iteration */
DEREF(REF(uint64_t)(copy_dst_addr + offset)) =
DEREF(REF(uint64_t)(copy_src_addr + offset));
/* Do the adjustment inline in the same invocation that copies the data so that we don't have
* to synchronize.
*/
if (offset < instance_end && offset >= instance_offset &&
(offset - instance_offset) % SIZEOF(uint64_t) == 0) {
uint64_t idx = (offset - instance_offset) / SIZEOF(uint64_t);
if (args.mode == ANV_COPY_MODE_SERIALIZE) {
/* Indirectly access the anv_instance_leaf, and store the blas_ptrs after ser_header */
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_src_addr + offset));
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
uint64_t blas_ptr = DEREF(instance_leaf).part1.bvh_ptr;
DEREF(INDEX(uint64_t, instance_base, idx)) = blas_ptr;
} else { /* ANV_COPY_MODE_DESERIALIZE */
/* Indirectly access the anv_instance_leaf, and replace the bvh_ptr with the ones after ser_header */
uint64_t instance_leaf_addr = DEREF(REF(uint64_t)(copy_dst_addr + offset));
REF(anv_instance_leaf) instance_leaf = REF(anv_instance_leaf)(instance_leaf_addr);
uint64_t blas_ptr = DEREF(INDEX(uint64_t, instance_base, idx));
DEREF(instance_leaf).part1.bvh_ptr = blas_ptr;
/* set the startNodePtr to blas_ptr + ANV_HEADER_SIZE */
uint64_t new_startNodePtr = blas_ptr + ANV_RT_BVH_HEADER_SIZE;
#if GFX_VERx10 >= 300
DEREF(instance_leaf).part0.QW_startNodePtr = new_startNodePtr;
#else
uint64_t mask = 0x0000fffffffffffful;
/* clear bits and set */
DEREF(instance_leaf).part0.QW_startNodePtr =
(DEREF(instance_leaf).part0.QW_startNodePtr & ~mask) | (new_startNodePtr & mask);
#endif
}
}
}
}