mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-01 08:08:06 +02:00
radv: Remove acceleration structure host builds
This code path is unmaintained and barely used. Signed-off-by: Konstantin Seurer <konstantin.seurer@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17028>
This commit is contained in:
parent
d0e2013633
commit
4f5da7939c
2 changed files with 23 additions and 671 deletions
|
|
@ -23,8 +23,6 @@
|
|||
#include "radv_acceleration_structure.h"
|
||||
#include "radv_private.h"
|
||||
|
||||
#include "util/format/format_utils.h"
|
||||
#include "util/half_float.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_cs.h"
|
||||
#include "radv_meta.h"
|
||||
|
|
@ -192,588 +190,8 @@ radv_WriteAccelerationStructuresPropertiesKHR(
|
|||
const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType,
|
||||
size_t dataSize, void *pData, size_t stride)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
char *data_out = (char *)pData;
|
||||
|
||||
for (uint32_t i = 0; i < accelerationStructureCount; ++i) {
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, accel, pAccelerationStructures[i]);
|
||||
const char *base_ptr = (const char *)device->ws->buffer_map(accel->bo);
|
||||
if (!base_ptr)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
const struct radv_accel_struct_header *header = (const void *)(base_ptr + accel->mem_offset);
|
||||
if (stride * i + sizeof(VkDeviceSize) <= dataSize) {
|
||||
uint64_t value;
|
||||
switch (queryType) {
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR:
|
||||
value = header->compacted_size;
|
||||
break;
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR:
|
||||
value = header->serialization_size;
|
||||
break;
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_BOTTOM_LEVEL_POINTERS_KHR:
|
||||
value = header->instance_count;
|
||||
break;
|
||||
case VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR:
|
||||
value = header->size;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled acceleration structure query");
|
||||
}
|
||||
*(VkDeviceSize *)(data_out + stride * i) = value;
|
||||
}
|
||||
device->ws->buffer_unmap(accel->bo);
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
struct radv_bvh_build_ctx {
|
||||
uint32_t *write_scratch;
|
||||
char *base;
|
||||
char *curr_ptr;
|
||||
};
|
||||
|
||||
static void
|
||||
build_triangles(struct radv_bvh_build_ctx *ctx, const VkAccelerationStructureGeometryKHR *geom,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *range, unsigned geometry_id)
|
||||
{
|
||||
const VkAccelerationStructureGeometryTrianglesDataKHR *tri_data = &geom->geometry.triangles;
|
||||
VkTransformMatrixKHR matrix;
|
||||
const char *index_data = (const char *)tri_data->indexData.hostAddress;
|
||||
const char *v_data_base = (const char *)tri_data->vertexData.hostAddress;
|
||||
|
||||
if (tri_data->indexType == VK_INDEX_TYPE_NONE_KHR)
|
||||
v_data_base += range->primitiveOffset;
|
||||
else
|
||||
index_data += range->primitiveOffset;
|
||||
|
||||
if (tri_data->transformData.hostAddress) {
|
||||
matrix = *(const VkTransformMatrixKHR *)((const char *)tri_data->transformData.hostAddress +
|
||||
range->transformOffset);
|
||||
} else {
|
||||
matrix = (VkTransformMatrixKHR){
|
||||
.matrix = {{1.0, 0.0, 0.0, 0.0}, {0.0, 1.0, 0.0, 0.0}, {0.0, 0.0, 1.0, 0.0}}};
|
||||
}
|
||||
|
||||
for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 64) {
|
||||
struct radv_bvh_triangle_node *node = (void *)ctx->curr_ptr;
|
||||
uint32_t node_offset = ctx->curr_ptr - ctx->base;
|
||||
uint32_t node_id = node_offset >> 3;
|
||||
*ctx->write_scratch++ = node_id;
|
||||
|
||||
for (unsigned v = 0; v < 3; ++v) {
|
||||
uint32_t v_index = range->firstVertex;
|
||||
switch (tri_data->indexType) {
|
||||
case VK_INDEX_TYPE_NONE_KHR:
|
||||
v_index += p * 3 + v;
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT8_EXT:
|
||||
v_index += *(const uint8_t *)index_data;
|
||||
index_data += 1;
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT16:
|
||||
v_index += *(const uint16_t *)index_data;
|
||||
index_data += 2;
|
||||
break;
|
||||
case VK_INDEX_TYPE_UINT32:
|
||||
v_index += *(const uint32_t *)index_data;
|
||||
index_data += 4;
|
||||
break;
|
||||
case VK_INDEX_TYPE_MAX_ENUM:
|
||||
unreachable("Unhandled VK_INDEX_TYPE_MAX_ENUM");
|
||||
break;
|
||||
}
|
||||
|
||||
const char *v_data = v_data_base + v_index * tri_data->vertexStride;
|
||||
float coords[4];
|
||||
switch (tri_data->vertexFormat) {
|
||||
case VK_FORMAT_R32G32_SFLOAT:
|
||||
coords[0] = *(const float *)(v_data + 0);
|
||||
coords[1] = *(const float *)(v_data + 4);
|
||||
coords[2] = 0.0f;
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R32G32B32_SFLOAT:
|
||||
coords[0] = *(const float *)(v_data + 0);
|
||||
coords[1] = *(const float *)(v_data + 4);
|
||||
coords[2] = *(const float *)(v_data + 8);
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R32G32B32A32_SFLOAT:
|
||||
coords[0] = *(const float *)(v_data + 0);
|
||||
coords[1] = *(const float *)(v_data + 4);
|
||||
coords[2] = *(const float *)(v_data + 8);
|
||||
coords[3] = *(const float *)(v_data + 12);
|
||||
break;
|
||||
case VK_FORMAT_R16G16_SFLOAT:
|
||||
coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0));
|
||||
coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2));
|
||||
coords[2] = 0.0f;
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R16G16B16_SFLOAT:
|
||||
coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0));
|
||||
coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2));
|
||||
coords[2] = _mesa_half_to_float(*(const uint16_t *)(v_data + 4));
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
||||
coords[0] = _mesa_half_to_float(*(const uint16_t *)(v_data + 0));
|
||||
coords[1] = _mesa_half_to_float(*(const uint16_t *)(v_data + 2));
|
||||
coords[2] = _mesa_half_to_float(*(const uint16_t *)(v_data + 4));
|
||||
coords[3] = _mesa_half_to_float(*(const uint16_t *)(v_data + 6));
|
||||
break;
|
||||
case VK_FORMAT_R16G16_SNORM:
|
||||
coords[0] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 0), 16);
|
||||
coords[1] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 2), 16);
|
||||
coords[2] = 0.0f;
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R16G16_UNORM:
|
||||
coords[0] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 0), 16);
|
||||
coords[1] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 2), 16);
|
||||
coords[2] = 0.0f;
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R16G16B16A16_SNORM:
|
||||
coords[0] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 0), 16);
|
||||
coords[1] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 2), 16);
|
||||
coords[2] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 4), 16);
|
||||
coords[3] = _mesa_snorm_to_float(*(const int16_t *)(v_data + 6), 16);
|
||||
break;
|
||||
case VK_FORMAT_R16G16B16A16_UNORM:
|
||||
coords[0] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 0), 16);
|
||||
coords[1] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 2), 16);
|
||||
coords[2] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 4), 16);
|
||||
coords[3] = _mesa_unorm_to_float(*(const uint16_t *)(v_data + 6), 16);
|
||||
break;
|
||||
case VK_FORMAT_R8G8_SNORM:
|
||||
coords[0] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 0), 8);
|
||||
coords[1] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 1), 8);
|
||||
coords[2] = 0.0f;
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R8G8_UNORM:
|
||||
coords[0] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 0), 8);
|
||||
coords[1] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 1), 8);
|
||||
coords[2] = 0.0f;
|
||||
coords[3] = 1.0f;
|
||||
break;
|
||||
case VK_FORMAT_R8G8B8A8_SNORM:
|
||||
coords[0] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 0), 8);
|
||||
coords[1] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 1), 8);
|
||||
coords[2] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 2), 8);
|
||||
coords[3] = _mesa_snorm_to_float(*(const int8_t *)(v_data + 3), 8);
|
||||
break;
|
||||
case VK_FORMAT_R8G8B8A8_UNORM:
|
||||
coords[0] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 0), 8);
|
||||
coords[1] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 1), 8);
|
||||
coords[2] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 2), 8);
|
||||
coords[3] = _mesa_unorm_to_float(*(const uint8_t *)(v_data + 3), 8);
|
||||
break;
|
||||
case VK_FORMAT_A2B10G10R10_UNORM_PACK32: {
|
||||
uint32_t val = *(const uint32_t *)v_data;
|
||||
coords[0] = _mesa_unorm_to_float((val >> 0) & 0x3FF, 10);
|
||||
coords[1] = _mesa_unorm_to_float((val >> 10) & 0x3FF, 10);
|
||||
coords[2] = _mesa_unorm_to_float((val >> 20) & 0x3FF, 10);
|
||||
coords[3] = _mesa_unorm_to_float((val >> 30) & 0x3, 2);
|
||||
} break;
|
||||
default:
|
||||
unreachable("Unhandled vertex format in BVH build");
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
float r = 0;
|
||||
for (unsigned k = 0; k < 4; ++k)
|
||||
r += matrix.matrix[j][k] * coords[k];
|
||||
node->coords[v][j] = r;
|
||||
}
|
||||
|
||||
node->triangle_id = p;
|
||||
node->geometry_id_and_flags = geometry_id | (geom->flags << 28);
|
||||
|
||||
/* Seems to be needed for IJ, otherwise I = J = ? */
|
||||
node->id = 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
build_instances(struct radv_device *device, struct radv_bvh_build_ctx *ctx,
|
||||
const VkAccelerationStructureGeometryKHR *geom,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *range)
|
||||
{
|
||||
const VkAccelerationStructureGeometryInstancesDataKHR *inst_data = &geom->geometry.instances;
|
||||
|
||||
for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 128) {
|
||||
const char *instance_data =
|
||||
(const char *)inst_data->data.hostAddress + range->primitiveOffset;
|
||||
const VkAccelerationStructureInstanceKHR *instance =
|
||||
inst_data->arrayOfPointers
|
||||
? (((const VkAccelerationStructureInstanceKHR *const *)instance_data)[p])
|
||||
: &((const VkAccelerationStructureInstanceKHR *)instance_data)[p];
|
||||
if (!instance->accelerationStructureReference) {
|
||||
continue;
|
||||
}
|
||||
|
||||
struct radv_bvh_instance_node *node = (void *)ctx->curr_ptr;
|
||||
uint32_t node_offset = ctx->curr_ptr - ctx->base;
|
||||
uint32_t node_id = (node_offset >> 3) | radv_bvh_node_instance;
|
||||
*ctx->write_scratch++ = node_id;
|
||||
|
||||
float transform[16], inv_transform[16];
|
||||
memcpy(transform, &instance->transform.matrix, sizeof(instance->transform.matrix));
|
||||
transform[12] = transform[13] = transform[14] = 0.0f;
|
||||
transform[15] = 1.0f;
|
||||
|
||||
util_invert_mat4x4(inv_transform, transform);
|
||||
memcpy(node->wto_matrix, inv_transform, sizeof(node->wto_matrix));
|
||||
node->wto_matrix[3] = transform[3];
|
||||
node->wto_matrix[7] = transform[7];
|
||||
node->wto_matrix[11] = transform[11];
|
||||
node->custom_instance_and_mask = instance->instanceCustomIndex | (instance->mask << 24);
|
||||
node->sbt_offset_and_flags =
|
||||
instance->instanceShaderBindingTableRecordOffset | (instance->flags << 24);
|
||||
node->instance_id = p;
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
node->otw_matrix[i * 3 + j] = instance->transform.matrix[j][i];
|
||||
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, src_accel_struct,
|
||||
(VkAccelerationStructureKHR)instance->accelerationStructureReference);
|
||||
const void *src_base = device->ws->buffer_map(src_accel_struct->bo);
|
||||
if (!src_base)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
src_base = (const char *)src_base + src_accel_struct->mem_offset;
|
||||
const struct radv_accel_struct_header *src_header = src_base;
|
||||
node->base_ptr = radv_accel_struct_get_va(src_accel_struct) | src_header->root_node_offset;
|
||||
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
node->aabb[0][j] = instance->transform.matrix[j][3];
|
||||
node->aabb[1][j] = instance->transform.matrix[j][3];
|
||||
for (unsigned k = 0; k < 3; ++k) {
|
||||
node->aabb[0][j] += MIN2(instance->transform.matrix[j][k] * src_header->aabb[0][k],
|
||||
instance->transform.matrix[j][k] * src_header->aabb[1][k]);
|
||||
node->aabb[1][j] += MAX2(instance->transform.matrix[j][k] * src_header->aabb[0][k],
|
||||
instance->transform.matrix[j][k] * src_header->aabb[1][k]);
|
||||
}
|
||||
}
|
||||
device->ws->buffer_unmap(src_accel_struct->bo);
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
build_aabbs(struct radv_bvh_build_ctx *ctx, const VkAccelerationStructureGeometryKHR *geom,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *range, unsigned geometry_id)
|
||||
{
|
||||
const VkAccelerationStructureGeometryAabbsDataKHR *aabb_data = &geom->geometry.aabbs;
|
||||
|
||||
for (uint32_t p = 0; p < range->primitiveCount; ++p, ctx->curr_ptr += 64) {
|
||||
struct radv_bvh_aabb_node *node = (void *)ctx->curr_ptr;
|
||||
uint32_t node_offset = ctx->curr_ptr - ctx->base;
|
||||
uint32_t node_id = (node_offset >> 3) | radv_bvh_node_aabb;
|
||||
*ctx->write_scratch++ = node_id;
|
||||
|
||||
const VkAabbPositionsKHR *aabb =
|
||||
(const VkAabbPositionsKHR *)((const char *)aabb_data->data.hostAddress +
|
||||
range->primitiveOffset + p * aabb_data->stride);
|
||||
|
||||
node->aabb[0][0] = aabb->minX;
|
||||
node->aabb[0][1] = aabb->minY;
|
||||
node->aabb[0][2] = aabb->minZ;
|
||||
node->aabb[1][0] = aabb->maxX;
|
||||
node->aabb[1][1] = aabb->maxY;
|
||||
node->aabb[1][2] = aabb->maxZ;
|
||||
node->primitive_id = p;
|
||||
node->geometry_id_and_flags = geometry_id;
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
leaf_node_count(const VkAccelerationStructureBuildGeometryInfoKHR *info,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *ranges)
|
||||
{
|
||||
uint32_t count = 0;
|
||||
for (uint32_t i = 0; i < info->geometryCount; ++i) {
|
||||
count += ranges[i].primitiveCount;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static void
|
||||
compute_bounds(const char *base_ptr, uint32_t node_id, float *bounds)
|
||||
{
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
bounds[i] = INFINITY;
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
bounds[3 + i] = -INFINITY;
|
||||
|
||||
switch (node_id & 7) {
|
||||
case radv_bvh_node_triangle: {
|
||||
const struct radv_bvh_triangle_node *node = (const void *)(base_ptr + (node_id / 8 * 64));
|
||||
for (unsigned v = 0; v < 3; ++v) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
bounds[j] = MIN2(bounds[j], node->coords[v][j]);
|
||||
bounds[3 + j] = MAX2(bounds[3 + j], node->coords[v][j]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_internal: {
|
||||
const struct radv_bvh_box32_node *node = (const void *)(base_ptr + (node_id / 8 * 64));
|
||||
for (unsigned c2 = 0; c2 < 4; ++c2) {
|
||||
if (isnan(node->coords[c2][0][0]))
|
||||
continue;
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
bounds[j] = MIN2(bounds[j], node->coords[c2][0][j]);
|
||||
bounds[3 + j] = MAX2(bounds[3 + j], node->coords[c2][1][j]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_instance: {
|
||||
const struct radv_bvh_instance_node *node = (const void *)(base_ptr + (node_id / 8 * 64));
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
bounds[j] = MIN2(bounds[j], node->aabb[0][j]);
|
||||
bounds[3 + j] = MAX2(bounds[3 + j], node->aabb[1][j]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case radv_bvh_node_aabb: {
|
||||
const struct radv_bvh_aabb_node *node = (const void *)(base_ptr + (node_id / 8 * 64));
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
bounds[j] = MIN2(bounds[j], node->aabb[0][j]);
|
||||
bounds[3 + j] = MAX2(bounds[3 + j], node->aabb[1][j]);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct bvh_opt_entry {
|
||||
uint64_t key;
|
||||
uint32_t node_id;
|
||||
};
|
||||
|
||||
static int
|
||||
bvh_opt_compare(const void *_a, const void *_b)
|
||||
{
|
||||
const struct bvh_opt_entry *a = _a;
|
||||
const struct bvh_opt_entry *b = _b;
|
||||
|
||||
if (a->key < b->key)
|
||||
return -1;
|
||||
if (a->key > b->key)
|
||||
return 1;
|
||||
if (a->node_id < b->node_id)
|
||||
return -1;
|
||||
if (a->node_id > b->node_id)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
optimize_bvh(const char *base_ptr, uint32_t *node_ids, uint32_t node_count)
|
||||
{
|
||||
if (node_count == 0)
|
||||
return;
|
||||
|
||||
float bounds[6];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
bounds[i] = INFINITY;
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
bounds[3 + i] = -INFINITY;
|
||||
|
||||
for (uint32_t i = 0; i < node_count; ++i) {
|
||||
float node_bounds[6];
|
||||
compute_bounds(base_ptr, node_ids[i], node_bounds);
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
bounds[j] = MIN2(bounds[j], node_bounds[j]);
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
bounds[3 + j] = MAX2(bounds[3 + j], node_bounds[3 + j]);
|
||||
}
|
||||
|
||||
struct bvh_opt_entry *entries = calloc(node_count, sizeof(struct bvh_opt_entry));
|
||||
if (!entries)
|
||||
return;
|
||||
|
||||
for (uint32_t i = 0; i < node_count; ++i) {
|
||||
float node_bounds[6];
|
||||
compute_bounds(base_ptr, node_ids[i], node_bounds);
|
||||
float node_coords[3];
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
node_coords[j] = (node_bounds[j] + node_bounds[3 + j]) * 0.5;
|
||||
int32_t coords[3];
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
coords[j] = MAX2(
|
||||
MIN2((int32_t)((node_coords[j] - bounds[j]) / (bounds[3 + j] - bounds[j]) * (1 << 21)),
|
||||
(1 << 21) - 1),
|
||||
0);
|
||||
uint64_t key = 0;
|
||||
for (unsigned j = 0; j < 21; ++j)
|
||||
for (unsigned k = 0; k < 3; ++k)
|
||||
key |= (uint64_t)((coords[k] >> j) & 1) << (j * 3 + k);
|
||||
entries[i].key = key;
|
||||
entries[i].node_id = node_ids[i];
|
||||
}
|
||||
|
||||
qsort(entries, node_count, sizeof(entries[0]), bvh_opt_compare);
|
||||
for (unsigned i = 0; i < node_count; ++i)
|
||||
node_ids[i] = entries[i].node_id;
|
||||
|
||||
free(entries);
|
||||
}
|
||||
|
||||
static void
|
||||
fill_accel_struct_header(struct radv_accel_struct_header *header)
|
||||
{
|
||||
/* 16 bytes per invocation, 64 invocations per workgroup */
|
||||
header->copy_dispatch_size[0] = DIV_ROUND_UP(header->compacted_size, 16 * 64);
|
||||
header->copy_dispatch_size[1] = 1;
|
||||
header->copy_dispatch_size[2] = 1;
|
||||
|
||||
header->serialization_size =
|
||||
header->compacted_size + align(sizeof(struct radv_accel_struct_serialization_header) +
|
||||
sizeof(uint64_t) * header->instance_count,
|
||||
128);
|
||||
|
||||
header->size = header->serialization_size -
|
||||
sizeof(struct radv_accel_struct_serialization_header) -
|
||||
sizeof(uint64_t) * header->instance_count;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
build_bvh(struct radv_device *device, const VkAccelerationStructureBuildGeometryInfoKHR *info,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *ranges)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, accel, info->dstAccelerationStructure);
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
uint32_t *scratch[2];
|
||||
scratch[0] = info->scratchData.hostAddress;
|
||||
scratch[1] = scratch[0] + leaf_node_count(info, ranges);
|
||||
|
||||
char *base_ptr = (char *)device->ws->buffer_map(accel->bo);
|
||||
if (!base_ptr)
|
||||
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
base_ptr = base_ptr + accel->mem_offset;
|
||||
struct radv_accel_struct_header *header = (void *)base_ptr;
|
||||
void *first_node_ptr = (char *)base_ptr + ALIGN(sizeof(*header), 64);
|
||||
|
||||
struct radv_bvh_build_ctx ctx = {.write_scratch = scratch[0],
|
||||
.base = base_ptr,
|
||||
.curr_ptr = (char *)first_node_ptr + 128};
|
||||
|
||||
uint64_t instance_offset = (const char *)ctx.curr_ptr - (const char *)base_ptr;
|
||||
uint64_t instance_count = 0;
|
||||
|
||||
/* This initializes the leaf nodes of the BVH all at the same level. */
|
||||
for (int inst = 1; inst >= 0; --inst) {
|
||||
for (uint32_t i = 0; i < info->geometryCount; ++i) {
|
||||
const VkAccelerationStructureGeometryKHR *geom =
|
||||
info->pGeometries ? &info->pGeometries[i] : info->ppGeometries[i];
|
||||
|
||||
if ((inst && geom->geometryType != VK_GEOMETRY_TYPE_INSTANCES_KHR) ||
|
||||
(!inst && geom->geometryType == VK_GEOMETRY_TYPE_INSTANCES_KHR))
|
||||
continue;
|
||||
|
||||
switch (geom->geometryType) {
|
||||
case VK_GEOMETRY_TYPE_TRIANGLES_KHR:
|
||||
build_triangles(&ctx, geom, ranges + i, i);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_AABBS_KHR:
|
||||
build_aabbs(&ctx, geom, ranges + i, i);
|
||||
break;
|
||||
case VK_GEOMETRY_TYPE_INSTANCES_KHR: {
|
||||
result = build_instances(device, &ctx, geom, ranges + i);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
instance_count += ranges[i].primitiveCount;
|
||||
break;
|
||||
}
|
||||
case VK_GEOMETRY_TYPE_MAX_ENUM_KHR:
|
||||
unreachable("VK_GEOMETRY_TYPE_MAX_ENUM_KHR unhandled");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t node_counts[2] = {ctx.write_scratch - scratch[0], 0};
|
||||
optimize_bvh(base_ptr, scratch[0], node_counts[0]);
|
||||
unsigned d;
|
||||
|
||||
/*
|
||||
* This is the most naive BVH building algorithm I could think of:
|
||||
* just iteratively builds each level from bottom to top with
|
||||
* the children of each node being in-order and tightly packed.
|
||||
*
|
||||
* Is probably terrible for traversal but should be easy to build an
|
||||
* equivalent GPU version.
|
||||
*/
|
||||
for (d = 0; node_counts[d & 1] > 1 || d == 0; ++d) {
|
||||
uint32_t child_count = node_counts[d & 1];
|
||||
const uint32_t *children = scratch[d & 1];
|
||||
uint32_t *dst_ids = scratch[(d & 1) ^ 1];
|
||||
unsigned dst_count;
|
||||
unsigned child_idx = 0;
|
||||
for (dst_count = 0; child_idx < MAX2(1, child_count); ++dst_count, child_idx += 4) {
|
||||
unsigned local_child_count = MIN2(4, child_count - child_idx);
|
||||
uint32_t child_ids[4];
|
||||
float bounds[4][6];
|
||||
|
||||
for (unsigned c = 0; c < local_child_count; ++c) {
|
||||
uint32_t id = children[child_idx + c];
|
||||
child_ids[c] = id;
|
||||
|
||||
compute_bounds(base_ptr, id, bounds[c]);
|
||||
}
|
||||
|
||||
struct radv_bvh_box32_node *node;
|
||||
|
||||
/* Put the root node at base_ptr so the id = 0, which allows some
|
||||
* traversal optimizations. */
|
||||
if (child_idx == 0 && local_child_count == child_count) {
|
||||
node = first_node_ptr;
|
||||
header->root_node_offset = ((char *)first_node_ptr - (char *)base_ptr) / 64 * 8 + 5;
|
||||
} else {
|
||||
uint32_t dst_id = (ctx.curr_ptr - base_ptr) / 64;
|
||||
dst_ids[dst_count] = dst_id * 8 + 5;
|
||||
|
||||
node = (void *)ctx.curr_ptr;
|
||||
ctx.curr_ptr += 128;
|
||||
}
|
||||
|
||||
for (unsigned c = 0; c < local_child_count; ++c) {
|
||||
node->children[c] = child_ids[c];
|
||||
for (unsigned i = 0; i < 2; ++i)
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
node->coords[c][i][j] = bounds[c][i * 3 + j];
|
||||
}
|
||||
for (unsigned c = local_child_count; c < 4; ++c) {
|
||||
for (unsigned i = 0; i < 2; ++i)
|
||||
for (unsigned j = 0; j < 3; ++j)
|
||||
node->coords[c][i][j] = NAN;
|
||||
}
|
||||
}
|
||||
|
||||
node_counts[(d & 1) ^ 1] = dst_count;
|
||||
}
|
||||
|
||||
compute_bounds(base_ptr, header->root_node_offset, &header->aabb[0][0]);
|
||||
|
||||
header->instance_offset = instance_offset;
|
||||
header->instance_count = instance_count;
|
||||
header->compacted_size = (char *)ctx.curr_ptr - base_ptr;
|
||||
|
||||
fill_accel_struct_header(header);
|
||||
|
||||
fail:
|
||||
device->ws->buffer_unmap(accel->bo);
|
||||
return result;
|
||||
unreachable("Unimplemented");
|
||||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
|
|
@ -782,44 +200,16 @@ radv_BuildAccelerationStructuresKHR(
|
|||
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
for (uint32_t i = 0; i < infoCount; ++i) {
|
||||
result = build_bvh(device, pInfos + i, ppBuildRangeInfos[i]);
|
||||
if (result != VK_SUCCESS)
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
unreachable("Unimplemented");
|
||||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
radv_CopyAccelerationStructureKHR(VkDevice _device, VkDeferredOperationKHR deferredOperation,
|
||||
const VkCopyAccelerationStructureInfoKHR *pInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, src_struct, pInfo->src);
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, dst_struct, pInfo->dst);
|
||||
|
||||
char *src_ptr = (char *)device->ws->buffer_map(src_struct->bo);
|
||||
if (!src_ptr)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
char *dst_ptr = (char *)device->ws->buffer_map(dst_struct->bo);
|
||||
if (!dst_ptr) {
|
||||
device->ws->buffer_unmap(src_struct->bo);
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
src_ptr += src_struct->mem_offset;
|
||||
dst_ptr += dst_struct->mem_offset;
|
||||
|
||||
const struct radv_accel_struct_header *header = (const void *)src_ptr;
|
||||
memcpy(dst_ptr, src_ptr, header->compacted_size);
|
||||
|
||||
device->ws->buffer_unmap(src_struct->bo);
|
||||
device->ws->buffer_unmap(dst_struct->bo);
|
||||
return VK_SUCCESS;
|
||||
unreachable("Unimplemented");
|
||||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
|
||||
static nir_builder
|
||||
|
|
@ -2285,7 +1675,18 @@ radv_CmdBuildAccelerationStructuresKHR(
|
|||
header.instance_count = bvh_states[i].instance_count;
|
||||
header.compacted_size = bvh_states[i].node_offset;
|
||||
|
||||
fill_accel_struct_header(&header);
|
||||
header.copy_dispatch_size[0] = DIV_ROUND_UP(header.compacted_size, 16 * 64);
|
||||
header.copy_dispatch_size[1] = 1;
|
||||
header.copy_dispatch_size[2] = 1;
|
||||
|
||||
header.serialization_size =
|
||||
header.compacted_size + align(sizeof(struct radv_accel_struct_serialization_header) +
|
||||
sizeof(uint64_t) * header.instance_count,
|
||||
128);
|
||||
|
||||
header.size = header.serialization_size -
|
||||
sizeof(struct radv_accel_struct_serialization_header) -
|
||||
sizeof(uint64_t) * header.instance_count;
|
||||
|
||||
radv_update_buffer_cp(cmd_buffer,
|
||||
radv_buffer_get_va(accel_struct->bo) + accel_struct->mem_offset + base,
|
||||
|
|
@ -2353,29 +1754,8 @@ radv_CopyMemoryToAccelerationStructureKHR(VkDevice _device,
|
|||
VkDeferredOperationKHR deferredOperation,
|
||||
const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct, pInfo->dst);
|
||||
|
||||
char *base = device->ws->buffer_map(accel_struct->bo);
|
||||
if (!base)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
base += accel_struct->mem_offset;
|
||||
const struct radv_accel_struct_header *header = (const struct radv_accel_struct_header *)base;
|
||||
|
||||
const char *src = pInfo->src.hostAddress;
|
||||
struct radv_accel_struct_serialization_header *src_header = (void *)src;
|
||||
src += sizeof(*src_header) + sizeof(uint64_t) * src_header->instance_count;
|
||||
|
||||
memcpy(base, src, src_header->compacted_size);
|
||||
|
||||
for (unsigned i = 0; i < src_header->instance_count; ++i) {
|
||||
uint64_t *p = (uint64_t *)(base + i * 128 + header->instance_offset);
|
||||
*p = (*p & 63) | src_header->instances[i];
|
||||
}
|
||||
|
||||
device->ws->buffer_unmap(accel_struct->bo);
|
||||
return VK_SUCCESS;
|
||||
unreachable("Unimplemented");
|
||||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
|
|
@ -2383,36 +1763,8 @@ radv_CopyAccelerationStructureToMemoryKHR(VkDevice _device,
|
|||
VkDeferredOperationKHR deferredOperation,
|
||||
const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_acceleration_structure, accel_struct, pInfo->src);
|
||||
|
||||
const char *base = device->ws->buffer_map(accel_struct->bo);
|
||||
if (!base)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
base += accel_struct->mem_offset;
|
||||
const struct radv_accel_struct_header *header = (const struct radv_accel_struct_header *)base;
|
||||
|
||||
char *dst = pInfo->dst.hostAddress;
|
||||
struct radv_accel_struct_serialization_header *dst_header = (void *)dst;
|
||||
dst += sizeof(*dst_header) + sizeof(uint64_t) * header->instance_count;
|
||||
|
||||
memcpy(dst_header->driver_uuid, device->physical_device->driver_uuid, VK_UUID_SIZE);
|
||||
memset(dst_header->accel_struct_compat, 0, VK_UUID_SIZE);
|
||||
|
||||
dst_header->serialization_size = header->serialization_size;
|
||||
dst_header->compacted_size = header->compacted_size;
|
||||
dst_header->instance_count = header->instance_count;
|
||||
|
||||
memcpy(dst, base, header->compacted_size);
|
||||
|
||||
for (unsigned i = 0; i < header->instance_count; ++i) {
|
||||
dst_header->instances[i] =
|
||||
*(const uint64_t *)(base + i * 128 + header->instance_offset) & ~63ull;
|
||||
}
|
||||
|
||||
device->ws->buffer_unmap(accel_struct->bo);
|
||||
return VK_SUCCESS;
|
||||
unreachable("Unimplemented");
|
||||
return VK_ERROR_FEATURE_NOT_PRESENT;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
|
|
|
|||
|
|
@ -1677,7 +1677,7 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
features->accelerationStructure = true;
|
||||
features->accelerationStructureCaptureReplay = false;
|
||||
features->accelerationStructureIndirectBuild = false;
|
||||
features->accelerationStructureHostCommands = true;
|
||||
features->accelerationStructureHostCommands = false;
|
||||
features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue