From e62c464e4e9575e4ec199b0276de5951675afd1e Mon Sep 17 00:00:00 2001 From: Konstantin Seurer Date: Mon, 26 May 2025 16:26:46 +0200 Subject: [PATCH] radv/rra: Only write used BLAS Halves the size of cp2077 captures. Reviewed-by: Natalie Vock Part-of: --- src/amd/vulkan/layers/radv_rra_layer.c | 11 +++++--- src/amd/vulkan/radv_rra.c | 37 ++++++++++++++++++++++++-- src/amd/vulkan/radv_rra.h | 3 +++ src/amd/vulkan/radv_rra_gfx10_3.c | 6 +++++ src/amd/vulkan/radv_rra_gfx12.c | 8 ++++++ 5 files changed, 59 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/layers/radv_rra_layer.c b/src/amd/vulkan/layers/radv_rra_layer.c index a5c1e71c812..b95a1331d45 100644 --- a/src/amd/vulkan/layers/radv_rra_layer.c +++ b/src/amd/vulkan/layers/radv_rra_layer.c @@ -183,7 +183,7 @@ exit: static void handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructureKHR accelerationStructure, - uint64_t size) + uint64_t size, bool can_be_tlas) { VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure); @@ -244,6 +244,8 @@ handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructure } } + data->can_be_tlas |= can_be_tlas; + if (!data->buffer) return; @@ -294,7 +296,8 @@ rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t in VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, pInfos + i, primitive_counts, &size_info); - handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure, size_info.accelerationStructureSize); + handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure, size_info.accelerationStructureSize, + pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR); } simple_mtx_unlock(&device->rra_trace.data_mtx); @@ -315,7 +318,7 @@ rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyA struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, src); struct radv_rra_accel_struct_data *data = entry->data; - handle_accel_struct_write(commandBuffer, pInfo->dst, data->size); + handle_accel_struct_write(commandBuffer, pInfo->dst, data->size, data->can_be_tlas); simple_mtx_unlock(&device->rra_trace.data_mtx); } @@ -332,7 +335,7 @@ rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer, simple_mtx_lock(&device->rra_trace.data_mtx); VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst); - handle_accel_struct_write(commandBuffer, pInfo->dst, dst->size); + handle_accel_struct_write(commandBuffer, pInfo->dst, dst->size, true); simple_mtx_unlock(&device->rra_trace.data_mtx); } diff --git a/src/amd/vulkan/radv_rra.c b/src/amd/vulkan/radv_rra.c index 336a1517240..e9249fb2d8f 100644 --- a/src/amd/vulkan/radv_rra.c +++ b/src/amd/vulkan/radv_rra.c @@ -290,7 +290,8 @@ rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struc static VkResult rra_dump_acceleration_structure(const struct radv_physical_device *pdev, struct radv_rra_accel_struct_data *accel_struct, uint8_t *data, - struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output) + struct hash_table_u64 *accel_struct_vas, struct set *used_blas, bool should_validate, + FILE *output) { struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data; @@ -385,6 +386,7 @@ rra_dump_acceleration_structure(const struct radv_physical_device *pdev, } struct rra_transcoding_context ctx = { + .used_blas = used_blas, .src = data + header->bvh_offset, .dst = dst_structure_data, .dst_leaf_offset = RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size, @@ -992,6 +994,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) uint64_t *ray_history_sizes = NULL; struct hash_entry **hash_entries = NULL; FILE *file = NULL; + struct set *used_blas = NULL; uint32_t struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs); accel_struct_offsets = calloc(struct_count, sizeof(uint64_t)); @@ -1059,14 +1062,43 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename) if (result != VK_SUCCESS) goto cleanup; + used_blas = _mesa_set_create(NULL, _mesa_hash_u64, _mesa_key_u64_equal); + if (!used_blas) + goto cleanup; + for (unsigned i = 0; i < struct_count; i++) { struct radv_rra_accel_struct_data *data = hash_entries[i]->data; + if (!data->can_be_tlas) + continue; + void *mapped_data = rra_map_accel_struct_data(©_ctx, i); if (!mapped_data) continue; accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file); - result = rra_dump_acceleration_structure(pdev, data, mapped_data, device->rra_trace.accel_struct_vas, + result = rra_dump_acceleration_structure(pdev, data, mapped_data, device->rra_trace.accel_struct_vas, used_blas, + device->rra_trace.validate_as, file); + + rra_unmap_accel_struct_data(©_ctx, i); + + if (result == VK_SUCCESS) + written_accel_struct_count++; + } + + for (unsigned i = 0; i < struct_count; i++) { + struct radv_rra_accel_struct_data *data = hash_entries[i]->data; + if (data->can_be_tlas) + continue; + + if (!_mesa_set_search(used_blas, &data->va)) + continue; + + void *mapped_data = rra_map_accel_struct_data(©_ctx, i); + if (!mapped_data) + continue; + + accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file); + result = rra_dump_acceleration_structure(pdev, data, mapped_data, device->rra_trace.accel_struct_vas, used_blas, device->rra_trace.validate_as, file); rra_unmap_accel_struct_data(©_ctx, i); @@ -1259,6 +1291,7 @@ cleanup: if (file) fclose(file); + _mesa_set_destroy(used_blas, NULL); free(hash_entries); free(ray_history_sizes); free(ray_history_offsets); diff --git a/src/amd/vulkan/radv_rra.h b/src/amd/vulkan/radv_rra.h index c5af1d8f735..ddaaa3a89e9 100644 --- a/src/amd/vulkan/radv_rra.h +++ b/src/amd/vulkan/radv_rra.h @@ -12,6 +12,7 @@ #define RADV_RRA_H #include "util/hash_table.h" +#include "util/set.h" #include "util/simple_mtx.h" #include "util/u_dynarray.h" #include "util/u_math.h" @@ -31,6 +32,7 @@ struct radv_rra_accel_struct_data { uint64_t size; struct radv_rra_accel_struct_buffer *buffer; VkAccelerationStructureTypeKHR type; + bool can_be_tlas; bool is_dead; }; @@ -290,6 +292,7 @@ struct rra_bvh_info { }; struct rra_transcoding_context { + struct set *used_blas; const uint8_t *src; uint8_t *dst; uint32_t dst_leaf_offset; diff --git a/src/amd/vulkan/radv_rra_gfx10_3.c b/src/amd/vulkan/radv_rra_gfx10_3.c index 14cee1bbe75..8ff1f01aa9d 100644 --- a/src/amd/vulkan/radv_rra_gfx10_3.c +++ b/src/amd/vulkan/radv_rra_gfx10_3.c @@ -257,6 +257,12 @@ rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct ra memcpy(dst->wto_matrix, src->wto_matrix.values, sizeof(dst->wto_matrix)); memcpy(dst->otw_matrix, src->otw_matrix.values, sizeof(dst->otw_matrix)); + + uint64_t *addr = ralloc(ctx->used_blas, uint64_t); + if (addr) { + *addr = blas_va; + _mesa_set_add(ctx->used_blas, addr); + } } static void diff --git a/src/amd/vulkan/radv_rra_gfx12.c b/src/amd/vulkan/radv_rra_gfx12.c index 4cad241a38b..de4a524feee 100644 --- a/src/amd/vulkan/radv_rra_gfx12.c +++ b/src/amd/vulkan/radv_rra_gfx12.c @@ -171,6 +171,8 @@ rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id const struct radv_gfx12_instance_node_user_data *user_data = (const void *)((const uint8_t *)src_child_node + sizeof(struct radv_gfx12_instance_node)); + uint64_t blas_addr = radv_node_to_addr(dst->pointer_flags_bvh_addr) - user_data->bvh_offset; + dst->pointer_flags_bvh_addr = dst->pointer_flags_bvh_addr - (user_data->bvh_offset >> 3) + (sizeof(struct rra_accel_struct_metadata) >> 3); dst->unused = parent_id; @@ -179,6 +181,12 @@ rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id sideband_data->custom_instance_and_flags = user_data->custom_instance; sideband_data->blas_metadata_size = offsetof(struct rra_accel_struct_metadata, unused); sideband_data->otw_matrix = user_data->otw_matrix; + + uint64_t *addr = ralloc(ctx->used_blas, uint64_t); + if (addr) { + *addr = blas_addr; + _mesa_set_add(ctx->used_blas, addr); + } } } }