radv/rra: Reduce the memory requirement of copy_after_build

vkd3d-proton always sets the acceleration structure size to be the
whole buffer size. Because of that, allocating read back buffers
for all acceleration structures causes a system with a finite amount
of RAM to OOM.

This is solved by allocating read back buffers on build where the
required size is known.

Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29537>
This commit is contained in:
Konstantin Seurer 2024-06-04 12:04:22 +02:00 committed by Marge Bot
parent c2c555402b
commit 090ca37352
5 changed files with 118 additions and 34 deletions

View file

@ -71,20 +71,23 @@ rra_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
}
static VkResult
rra_init_accel_struct_data_buffer(VkDevice vk_device, struct radv_rra_accel_struct_data *data)
rra_init_accel_struct_data_buffer(VkDevice vk_device, struct radv_rra_accel_struct_buffer *buffer, uint32_t size)
{
VK_FROM_HANDLE(radv_device, device, vk_device);
buffer->ref_cnt = 1;
VkBufferCreateInfo buffer_create_info = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.size = data->size,
.size = size,
};
VkResult result = radv_create_buffer(device, &buffer_create_info, NULL, &data->buffer, true);
VkResult result = radv_create_buffer(device, &buffer_create_info, NULL, &buffer->buffer, true);
if (result != VK_SUCCESS)
return result;
VkMemoryRequirements requirements;
vk_common_GetBufferMemoryRequirements(vk_device, data->buffer, &requirements);
vk_common_GetBufferMemoryRequirements(vk_device, buffer->buffer, &requirements);
VkMemoryAllocateFlagsInfo flags_info = {
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
@ -97,19 +100,21 @@ rra_init_accel_struct_data_buffer(VkDevice vk_device, struct radv_rra_accel_stru
.allocationSize = requirements.size,
.memoryTypeIndex = device->rra_trace.copy_memory_index,
};
result = radv_alloc_memory(device, &alloc_info, NULL, &data->memory, true);
result = radv_alloc_memory(device, &alloc_info, NULL, &buffer->memory, true);
if (result != VK_SUCCESS)
goto fail_buffer;
result = vk_common_BindBufferMemory(vk_device, data->buffer, data->memory, 0);
result = vk_common_BindBufferMemory(vk_device, buffer->buffer, buffer->memory, 0);
if (result != VK_SUCCESS)
goto fail_memory;
return result;
fail_memory:
radv_FreeMemory(vk_device, data->memory, NULL);
radv_FreeMemory(vk_device, buffer->memory, NULL);
buffer->memory = VK_NULL_HANDLE;
fail_buffer:
radv_DestroyBuffer(vk_device, data->buffer, NULL);
radv_DestroyBuffer(vk_device, buffer->buffer, NULL);
buffer->buffer = VK_NULL_HANDLE;
return result;
}
@ -137,7 +142,6 @@ rra_CreateAccelerationStructureKHR(VkDevice _device, const VkAccelerationStructu
}
data->va = buffer->bo ? vk_acceleration_structure_get_va(structure) : 0;
data->size = structure->size;
data->type = pCreateInfo->type;
data->is_dead = false;
@ -149,20 +153,12 @@ rra_CreateAccelerationStructureKHR(VkDevice _device, const VkAccelerationStructu
if (result != VK_SUCCESS)
goto fail_data;
if (device->rra_trace.copy_after_build) {
result = rra_init_accel_struct_data_buffer(_device, data);
if (result != VK_SUCCESS)
goto fail_event;
}
_mesa_hash_table_insert(device->rra_trace.accel_structs, structure, data);
if (data->va)
_mesa_hash_table_u64_insert(device->rra_trace.accel_struct_vas, data->va, structure);
goto exit;
fail_event:
radv_DestroyEvent(_device, data->build_event, NULL);
fail_data:
free(data);
fail_as:
@ -174,12 +170,16 @@ exit:
}
static void
handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructureKHR accelerationStructure)
handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructureKHR accelerationStructure,
uint64_t size)
{
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VK_FROM_HANDLE(vk_acceleration_structure, accel_struct, accelerationStructure);
size = MIN2(size, accel_struct->size);
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
VkDevice _device = radv_device_to_handle(device);
struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, accel_struct);
struct radv_rra_accel_struct_data *data = entry->data;
@ -207,19 +207,37 @@ handle_accel_struct_write(VkCommandBuffer commandBuffer, VkAccelerationStructure
_mesa_hash_table_u64_insert(device->rra_trace.accel_struct_vas, data->va, accel_struct);
}
if (data->size < size) {
data->size = size;
if (device->rra_trace.copy_after_build) {
if (data->buffer)
radv_rra_accel_struct_buffer_unref(device, data->buffer);
data->buffer = calloc(1, sizeof(struct radv_rra_accel_struct_buffer));
if (rra_init_accel_struct_data_buffer(_device, data->buffer, size) != VK_SUCCESS)
return;
}
}
if (!data->buffer)
return;
if (!_mesa_set_search(cmd_buffer->accel_struct_buffers, data->buffer)) {
radv_radv_rra_accel_struct_buffer_ref(data->buffer);
_mesa_set_add(cmd_buffer->accel_struct_buffers, data->buffer);
}
VkBufferCopy2 region = {
.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2,
.srcOffset = accel_struct->offset,
.size = accel_struct->size,
.size = size,
};
VkCopyBufferInfo2 copyInfo = {
.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2,
.srcBuffer = accel_struct->buffer,
.dstBuffer = data->buffer,
.dstBuffer = data->buffer->buffer,
.regionCount = 1,
.pRegions = &region,
};
@ -239,8 +257,21 @@ rra_CmdBuildAccelerationStructuresKHR(VkCommandBuffer commandBuffer, uint32_t in
simple_mtx_lock(&device->rra_trace.data_mtx);
for (uint32_t i = 0; i < infoCount; ++i)
handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure);
for (uint32_t i = 0; i < infoCount; ++i) {
uint32_t *primitive_counts = alloca(pInfos[i].geometryCount * sizeof(uint32_t));
for (uint32_t geometry_index = 0; geometry_index < pInfos[i].geometryCount; geometry_index++)
primitive_counts[geometry_index] = ppBuildRangeInfos[i][geometry_index].primitiveCount;
/* vkd3d-proton specifies the size of the backing buffer. This can cause false positives when removing aliasing
* acceleration structures, because a buffer can be used by multiple acceleration structures. Therefore we need to
* compute the actual size. */
VkAccelerationStructureBuildSizesInfoKHR size_info;
device->layer_dispatch.rra.GetAccelerationStructureBuildSizesKHR(radv_device_to_handle(device),
VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR,
pInfos + i, primitive_counts, &size_info);
handle_accel_struct_write(commandBuffer, pInfos[i].dstAccelerationStructure, size_info.accelerationStructureSize);
}
simple_mtx_unlock(&device->rra_trace.data_mtx);
}
@ -255,7 +286,12 @@ rra_CmdCopyAccelerationStructureKHR(VkCommandBuffer commandBuffer, const VkCopyA
simple_mtx_lock(&device->rra_trace.data_mtx);
handle_accel_struct_write(commandBuffer, pInfo->dst);
VK_FROM_HANDLE(vk_acceleration_structure, src, pInfo->src);
struct hash_entry *entry = _mesa_hash_table_search(device->rra_trace.accel_structs, src);
struct radv_rra_accel_struct_data *data = entry->data;
handle_accel_struct_write(commandBuffer, pInfo->dst, data->size);
simple_mtx_unlock(&device->rra_trace.data_mtx);
}
@ -271,7 +307,8 @@ rra_CmdCopyMemoryToAccelerationStructureKHR(VkCommandBuffer commandBuffer,
simple_mtx_lock(&device->rra_trace.data_mtx);
handle_accel_struct_write(commandBuffer, pInfo->dst);
VK_FROM_HANDLE(vk_acceleration_structure, dst, pInfo->dst);
handle_accel_struct_write(commandBuffer, pInfo->dst, dst->size);
simple_mtx_unlock(&device->rra_trace.data_mtx);
}

View file

@ -18,6 +18,7 @@
#include "radv_pipeline_rt.h"
#include "radv_radeon_winsys.h"
#include "radv_rmv.h"
#include "radv_rra.h"
#include "radv_shader.h"
#include "radv_shader_object.h"
#include "radv_sqtt.h"
@ -309,6 +310,9 @@ radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
if (cmd_buffer->qf != RADV_QUEUE_SPARSE) {
util_dynarray_fini(&cmd_buffer->ray_history);
radv_rra_accel_struct_buffers_unref(device, cmd_buffer->accel_struct_buffers);
_mesa_set_destroy(cmd_buffer->accel_struct_buffers, NULL);
list_for_each_entry_safe (struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) {
radv_rmv_log_command_buffer_bo_destroy(device, up->upload_bo);
radv_bo_destroy(device, &cmd_buffer->vk.base, up->upload_bo);
@ -387,6 +391,7 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, VkCommandBufferLevel level,
for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
cmd_buffer->accel_struct_buffers = _mesa_pointer_set_create(NULL);
util_dynarray_init(&cmd_buffer->ray_history, NULL);
}
@ -438,6 +443,8 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB
util_dynarray_clear(&cmd_buffer->ray_history);
radv_rra_accel_struct_buffers_unref(device, cmd_buffer->accel_struct_buffers);
cmd_buffer->push_constant_stages = 0;
cmd_buffer->scratch_size_per_wave_needed = 0;
cmd_buffer->scratch_waves_wanted = 0;

View file

@ -578,6 +578,7 @@ struct radv_cmd_buffer {
uint32_t sqtt_cb_id;
struct set *accel_struct_buffers;
struct util_dynarray ray_history;
};

View file

@ -974,6 +974,30 @@ radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *d
util_dynarray_clear(&data->ray_history);
}
void
radv_radv_rra_accel_struct_buffer_ref(struct radv_rra_accel_struct_buffer *buffer)
{
assert(buffer->ref_cnt >= 1);
p_atomic_inc(&buffer->ref_cnt);
}
void
radv_rra_accel_struct_buffer_unref(struct radv_device *device, struct radv_rra_accel_struct_buffer *buffer)
{
if (p_atomic_dec_zero(&buffer->ref_cnt)) {
VkDevice _device = radv_device_to_handle(device);
radv_DestroyBuffer(_device, buffer->buffer, NULL);
radv_FreeMemory(_device, buffer->memory, NULL);
}
}
void
radv_rra_accel_struct_buffers_unref(struct radv_device *device, struct set *buffers)
{
set_foreach_remove (buffers, entry)
radv_rra_accel_struct_buffer_unref(device, (void *)entry->key);
}
void
radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data)
{
@ -997,11 +1021,14 @@ radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data)
}
void
radv_destroy_rra_accel_struct_data(VkDevice device, struct radv_rra_accel_struct_data *data)
radv_destroy_rra_accel_struct_data(VkDevice _device, struct radv_rra_accel_struct_data *data)
{
radv_DestroyEvent(device, data->build_event, NULL);
radv_DestroyBuffer(device, data->buffer, NULL);
radv_FreeMemory(device, data->memory, NULL);
VK_FROM_HANDLE(radv_device, device, _device);
if (data->buffer)
radv_rra_accel_struct_buffer_unref(device, data->buffer);
radv_DestroyEvent(_device, data->build_event, NULL);
free(data);
}
@ -1131,9 +1158,9 @@ rra_map_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
if (radv_GetEventStatus(ctx->device, data->build_event) != VK_EVENT_SET)
return NULL;
if (data->memory) {
if (data->buffer->memory) {
void *mapped_data;
vk_common_MapMemory(ctx->device, data->memory, 0, VK_WHOLE_SIZE, 0, &mapped_data);
vk_common_MapMemory(ctx->device, data->buffer->memory, 0, VK_WHOLE_SIZE, 0, &mapped_data);
return mapped_data;
}
@ -1189,8 +1216,8 @@ rra_unmap_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
{
struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
if (data->memory)
vk_common_UnmapMemory(ctx->device, data->memory);
if (data->buffer && data->buffer->memory)
vk_common_UnmapMemory(ctx->device, data->buffer->memory);
}
enum rra_ray_history_token_type {

View file

@ -25,12 +25,17 @@ struct radv_rra_accel_struct_data {
VkEvent build_event;
uint64_t va;
uint64_t size;
VkBuffer buffer;
VkDeviceMemory memory;
struct radv_rra_accel_struct_buffer *buffer;
VkAccelerationStructureTypeKHR type;
bool is_dead;
};
struct radv_rra_accel_struct_buffer {
VkBuffer buffer;
VkDeviceMemory memory;
uint32_t ref_cnt;
};
enum radv_rra_ray_history_metadata_type {
RADV_RRA_COUNTER_INFO = 1,
RADV_RRA_DISPATCH_SIZE = 2,
@ -165,6 +170,13 @@ VkResult radv_rra_trace_init(struct radv_device *device);
void radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *data);
void radv_radv_rra_accel_struct_buffer_ref(struct radv_rra_accel_struct_buffer *buffer);
void radv_rra_accel_struct_buffer_unref(struct radv_device *device, struct radv_rra_accel_struct_buffer *buffer);
struct set;
void radv_rra_accel_struct_buffers_unref(struct radv_device *device, struct set *buffers);
void radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data);
void radv_destroy_rra_accel_struct_data(VkDevice device, struct radv_rra_accel_struct_data *data);