radv/rra: Only write used BLAS

Halves the size of cp2077 captures.

Reviewed-by: Natalie Vock <natalie.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35166>
This commit is contained in:
Konstantin Seurer 2025-05-26 16:26:46 +02:00 committed by Marge Bot
parent 5dcd9b93b5
commit e62c464e4e
5 changed files with 59 additions and 6 deletions

View file

@ -183,7 +183,7 @@ exit:
static void
handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructureKHR accelerationStructure,
uint64_t size)
uint64_t size, bool can_be_tlas)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure);
@ -244,6 +244,8 @@ handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructure
}
}
data->can_be_tlas |= can_be_tlas;
if (!data->buffer)
return;
@ -294,7 +296,8 @@ rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t in
VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
pInfos + i, primitive_counts, &size_info);
handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure, size_info.accelerationStructureSize);
handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure, size_info.accelerationStructureSize,
pInfos[i].type == VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);
}
simple_mtx_unlock(&device->rra_trace.data_mtx);
@ -315,7 +318,7 @@ rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyA
struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, src);
struct radv_rra_accel_struct_data *data = entry->data;
handle_accel_struct_write(commandBuffer, pInfo->dst, data->size);
handle_accel_struct_write(commandBuffer, pInfo->dst, data->size, data->can_be_tlas);
simple_mtx_unlock(&device->rra_trace.data_mtx);
}
@ -332,7 +335,7 @@ rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
simple_mtx_lock(&device->rra_trace.data_mtx);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
handle_accel_struct_write(commandBuffer, pInfo->dst, dst->size);
handle_accel_struct_write(commandBuffer, pInfo->dst, dst->size, true);
simple_mtx_unlock(&device->rra_trace.data_mtx);
}

View file

@ -290,7 +290,8 @@ rra_validate_header(struct radv_rra_accel_struct_data *accel_struct, const struc
static VkResult
rra_dump_acceleration_structure(const struct radv_physical_device *pdev,
struct radv_rra_accel_struct_data *accel_struct, uint8_t *data,
struct hash_table_u64 *accel_struct_vas, bool should_validate, FILE *output)
struct hash_table_u64 *accel_struct_vas, struct set *used_blas, bool should_validate,
FILE *output)
{
struct radv_accel_struct_header *header = (struct radv_accel_struct_header *)data;
@ -385,6 +386,7 @@ rra_dump_acceleration_structure(const struct radv_physical_device *pdev,
}
struct rra_transcoding_context ctx = {
.used_blas = used_blas,
.src = data + header->bvh_offset,
.dst = dst_structure_data,
.dst_leaf_offset = RRA_ROOT_NODE_OFFSET + bvh_info.internal_nodes_size,
@ -992,6 +994,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
uint64_t *ray_history_sizes = NULL;
struct hash_entry **hash_entries = NULL;
FILE *file = NULL;
struct set *used_blas = NULL;
uint32_t struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
accel_struct_offsets = calloc(struct_count, sizeof(uint64_t));
@ -1059,14 +1062,43 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
if (result != VK_SUCCESS)
goto cleanup;
used_blas = _mesa_set_create(NULL, _mesa_hash_u64, _mesa_key_u64_equal);
if (!used_blas)
goto cleanup;
for (unsigned i = 0; i < struct_count; i++) {
struct radv_rra_accel_struct_data *data = hash_entries[i]->data;
if (!data->can_be_tlas)
continue;
void *mapped_data = rra_map_accel_struct_data(&copy_ctx, i);
if (!mapped_data)
continue;
accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file);
result = rra_dump_acceleration_structure(pdev, data, mapped_data, device->rra_trace.accel_struct_vas,
result = rra_dump_acceleration_structure(pdev, data, mapped_data, device->rra_trace.accel_struct_vas, used_blas,
device->rra_trace.validate_as, file);
rra_unmap_accel_struct_data(&copy_ctx, i);
if (result == VK_SUCCESS)
written_accel_struct_count++;
}
for (unsigned i = 0; i < struct_count; i++) {
struct radv_rra_accel_struct_data *data = hash_entries[i]->data;
if (data->can_be_tlas)
continue;
if (!_mesa_set_search(used_blas, &data->va))
continue;
void *mapped_data = rra_map_accel_struct_data(&copy_ctx, i);
if (!mapped_data)
continue;
accel_struct_offsets[written_accel_struct_count] = (uint64_t)ftell(file);
result = rra_dump_acceleration_structure(pdev, data, mapped_data, device->rra_trace.accel_struct_vas, used_blas,
device->rra_trace.validate_as, file);
rra_unmap_accel_struct_data(&copy_ctx, i);
@ -1259,6 +1291,7 @@ cleanup:
if (file)
fclose(file);
_mesa_set_destroy(used_blas, NULL);
free(hash_entries);
free(ray_history_sizes);
free(ray_history_offsets);

View file

@ -12,6 +12,7 @@
#define RADV_RRA_H
#include "util/hash_table.h"
#include "util/set.h"
#include "util/simple_mtx.h"
#include "util/u_dynarray.h"
#include "util/u_math.h"
@ -31,6 +32,7 @@ struct radv_rra_accel_struct_data {
uint64_t size;
struct radv_rra_accel_struct_buffer *buffer;
VkAccelerationStructureTypeKHR type;
bool can_be_tlas;
bool is_dead;
};
@ -290,6 +292,7 @@ struct rra_bvh_info {
};
struct rra_transcoding_context {
struct set *used_blas;
const uint8_t *src;
uint8_t *dst;
uint32_t dst_leaf_offset;

View file

@ -257,6 +257,12 @@ rra_transcode_instance_node(struct rra_transcoding_context *ctx, const struct ra
memcpy(dst->wto_matrix, src->wto_matrix.values, sizeof(dst->wto_matrix));
memcpy(dst->otw_matrix, src->otw_matrix.values, sizeof(dst->otw_matrix));
uint64_t *addr = ralloc(ctx->used_blas, uint64_t);
if (addr) {
*addr = blas_va;
_mesa_set_add(ctx->used_blas, addr);
}
}
static void

View file

@ -171,6 +171,8 @@ rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id
const struct radv_gfx12_instance_node_user_data *user_data =
(const void *)((const uint8_t *)src_child_node + sizeof(struct radv_gfx12_instance_node));
uint64_t blas_addr = radv_node_to_addr(dst->pointer_flags_bvh_addr) - user_data->bvh_offset;
dst->pointer_flags_bvh_addr = dst->pointer_flags_bvh_addr - (user_data->bvh_offset >> 3) +
(sizeof(struct rra_accel_struct_metadata) >> 3);
dst->unused = parent_id;
@ -179,6 +181,12 @@ rra_transcode_node_gfx12(struct rra_transcoding_context *ctx, uint32_t parent_id
sideband_data->custom_instance_and_flags = user_data->custom_instance;
sideband_data->blas_metadata_size = offsetof(struct rra_accel_struct_metadata, unused);
sideband_data->otw_matrix = user_data->otw_matrix;
uint64_t *addr = ralloc(ctx->used_blas, uint64_t);
if (addr) {
*addr = blas_addr;
_mesa_set_add(ctx->used_blas, addr);
}
}
}
}