mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 02:10:11 +01:00
radv/rra: Dump basic ray history tokens
This only dumps the begin tokens. Tokens are written to a buffer containing a 12 byte header at the beginning. We use an intermediate format for the ray history tokens because the RRA format is very inefficient. Reviewed-by: Friedrich Vock <friedrich.vock@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25548>
This commit is contained in:
parent
26939f016d
commit
767f628079
9 changed files with 778 additions and 30 deletions
|
|
@ -32,10 +32,43 @@ VKAPI_ATTR VkResult VKAPI_CALL
|
|||
rra_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_queue, queue, _queue);
|
||||
|
||||
if (queue->device->rra_trace.triggered) {
|
||||
queue->device->rra_trace.triggered = false;
|
||||
|
||||
if (_mesa_hash_table_num_entries(queue->device->rra_trace.accel_structs) == 0) {
|
||||
fprintf(stderr, "radv: No acceleration structures captured, not saving RRA trace.\n");
|
||||
} else {
|
||||
char filename[2048];
|
||||
time_t t = time(NULL);
|
||||
struct tm now = *localtime(&t);
|
||||
snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", util_get_process_name(),
|
||||
1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
|
||||
|
||||
VkResult result = radv_rra_dump_trace(_queue, filename);
|
||||
if (result == VK_SUCCESS)
|
||||
fprintf(stderr, "radv: RRA capture saved to '%s'\n", filename);
|
||||
else
|
||||
fprintf(stderr, "radv: Failed to save RRA capture!\n");
|
||||
}
|
||||
}
|
||||
|
||||
VkResult result = queue->device->layer_dispatch.rra.QueuePresentKHR(_queue, pPresentInfo);
|
||||
if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
|
||||
return result;
|
||||
|
||||
VkDevice _device = radv_device_to_handle(queue->device);
|
||||
radv_rra_trace_clear_ray_history(_device, &queue->device->rra_trace);
|
||||
|
||||
if (queue->device->rra_trace.triggered) {
|
||||
result = queue->device->layer_dispatch.rra.DeviceWaitIdle(_device);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
struct radv_ray_history_header *header = queue->device->rra_trace.ray_history_data;
|
||||
header->offset = sizeof(struct radv_ray_history_header);
|
||||
}
|
||||
|
||||
if (!queue->device->rra_trace.copy_after_build)
|
||||
return VK_SUCCESS;
|
||||
|
||||
|
|
@ -46,7 +79,7 @@ rra_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
|
|||
if (!data->is_dead)
|
||||
continue;
|
||||
|
||||
radv_destroy_rra_accel_struct_data(radv_device_to_handle(queue->device), data);
|
||||
radv_destroy_rra_accel_struct_data(_device, data);
|
||||
_mesa_hash_table_remove(accel_structs, entry);
|
||||
}
|
||||
|
||||
|
|
@ -292,3 +325,45 @@ rra_DestroyAccelerationStructureKHR(VkDevice _device, VkAccelerationStructureKHR
|
|||
|
||||
device->layer_dispatch.rra.DestroyAccelerationStructureKHR(_device, _structure, pAllocator);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
rra_QueueSubmit2KHR(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits, VkFence _fence)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_queue, queue, _queue);
|
||||
struct radv_device *device = queue->device;
|
||||
|
||||
VkResult result = device->layer_dispatch.rra.QueueSubmit2KHR(_queue, submitCount, pSubmits, _fence);
|
||||
if (result != VK_SUCCESS || !device->rra_trace.triggered)
|
||||
return result;
|
||||
|
||||
uint32_t total_trace_count = 0;
|
||||
|
||||
simple_mtx_lock(&device->rra_trace.data_mtx);
|
||||
|
||||
for (uint32_t submit_index = 0; submit_index < submitCount; submit_index++) {
|
||||
for (uint32_t i = 0; i < pSubmits[submit_index].commandBufferInfoCount; i++) {
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pSubmits[submit_index].pCommandBufferInfos[i].commandBuffer);
|
||||
uint32_t trace_count =
|
||||
util_dynarray_num_elements(&cmd_buffer->ray_history, struct radv_rra_ray_history_data *);
|
||||
if (!trace_count)
|
||||
continue;
|
||||
|
||||
total_trace_count += trace_count;
|
||||
util_dynarray_append_dynarray(&device->rra_trace.ray_history, &cmd_buffer->ray_history);
|
||||
}
|
||||
}
|
||||
|
||||
if (!total_trace_count) {
|
||||
simple_mtx_unlock(&device->rra_trace.data_mtx);
|
||||
return result;
|
||||
}
|
||||
|
||||
result = device->layer_dispatch.rra.DeviceWaitIdle(radv_device_to_handle(device));
|
||||
|
||||
struct radv_ray_history_header *header = device->rra_trace.ray_history_data;
|
||||
header->submit_base_index += total_trace_count;
|
||||
|
||||
simple_mtx_unlock(&device->rra_trace.data_mtx);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -570,6 +570,12 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
if (args->vars.iteration_instance_count) {
|
||||
nir_def *iteration_instance_count = nir_load_deref(b, args->vars.iteration_instance_count);
|
||||
iteration_instance_count = nir_iadd_imm(b, iteration_instance_count, 1 << 16);
|
||||
nir_store_deref(b, args->vars.iteration_instance_count, iteration_instance_count, 0x1);
|
||||
}
|
||||
|
||||
/* instance */
|
||||
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
|
||||
|
|
@ -670,6 +676,12 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
if (args->vars.iteration_instance_count) {
|
||||
nir_def *iteration_instance_count = nir_load_deref(b, args->vars.iteration_instance_count);
|
||||
iteration_instance_count = nir_iadd_imm(b, iteration_instance_count, 1);
|
||||
nir_store_deref(b, args->vars.iteration_instance_count, iteration_instance_count, 0x1);
|
||||
}
|
||||
}
|
||||
nir_pop_loop(b, NULL);
|
||||
|
||||
|
|
|
|||
|
|
@ -118,6 +118,9 @@ struct radv_ray_traversal_vars {
|
|||
/* Information about the current instance used for culling. */
|
||||
nir_deref_instr *instance_addr;
|
||||
nir_deref_instr *sbt_offset_and_flags;
|
||||
|
||||
/* Statistics. Iteration count in the low 16 bits, candidate instance counts in the high 16 bits. */
|
||||
nir_deref_instr *iteration_instance_count;
|
||||
};
|
||||
|
||||
struct radv_ray_traversal_args {
|
||||
|
|
|
|||
|
|
@ -855,6 +855,137 @@ radv_nir_lower_rt_io(nir_shader *nir, bool monolithic, uint32_t payload_offset)
|
|||
}
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
radv_build_token_begin(nir_builder *b, struct rt_variables *vars, nir_def *hit, enum radv_packed_token_type token_type,
|
||||
nir_def *token_size, uint32_t max_token_size)
|
||||
{
|
||||
struct radv_rra_trace_data *rra_trace = &vars->device->rra_trace;
|
||||
assert(rra_trace->ray_history_addr);
|
||||
assert(rra_trace->ray_history_buffer_size >= max_token_size);
|
||||
|
||||
nir_def *ray_history_addr = nir_imm_int64(b, rra_trace->ray_history_addr);
|
||||
|
||||
nir_def *launch_id = nir_load_ray_launch_id(b);
|
||||
|
||||
nir_def *trace = nir_imm_true(b);
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
nir_def *remainder = nir_umod_imm(b, nir_channel(b, launch_id, i), rra_trace->ray_history_resolution_scale);
|
||||
trace = nir_iand(b, trace, nir_ieq_imm(b, remainder, 0));
|
||||
}
|
||||
nir_push_if(b, trace);
|
||||
|
||||
static_assert(offsetof(struct radv_ray_history_header, offset) == 0, "Unexpected offset");
|
||||
nir_def *base_offset = nir_global_atomic(b, 32, ray_history_addr, token_size, .atomic_op = nir_atomic_op_iadd);
|
||||
|
||||
/* Abuse the dword alignment of token_size to add an invalid bit to offset. */
|
||||
trace = nir_ieq_imm(b, nir_iand_imm(b, base_offset, 1), 0);
|
||||
|
||||
nir_def *in_bounds = nir_ule_imm(b, base_offset, rra_trace->ray_history_buffer_size - max_token_size);
|
||||
/* Make sure we don't overwrite the header in case of an overflow. */
|
||||
in_bounds = nir_iand(b, in_bounds, nir_uge_imm(b, base_offset, sizeof(struct radv_ray_history_header)));
|
||||
|
||||
nir_push_if(b, nir_iand(b, trace, in_bounds));
|
||||
|
||||
nir_def *dst_addr = nir_iadd(b, ray_history_addr, nir_u2u64(b, base_offset));
|
||||
|
||||
nir_def *launch_size = nir_load_ray_launch_size(b);
|
||||
|
||||
nir_def *launch_id_comps[3];
|
||||
nir_def *launch_size_comps[3];
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
launch_id_comps[i] = nir_udiv_imm(b, nir_channel(b, launch_id, i), rra_trace->ray_history_resolution_scale);
|
||||
launch_size_comps[i] = nir_udiv_imm(b, nir_channel(b, launch_size, i), rra_trace->ray_history_resolution_scale);
|
||||
}
|
||||
|
||||
nir_def *global_index =
|
||||
nir_iadd(b, launch_id_comps[0],
|
||||
nir_iadd(b, nir_imul(b, launch_id_comps[1], launch_size_comps[0]),
|
||||
nir_imul(b, launch_id_comps[2], nir_imul(b, launch_size_comps[0], launch_size_comps[1]))));
|
||||
nir_def *launch_index_and_hit = nir_bcsel(b, hit, nir_ior_imm(b, global_index, 1u << 29u), global_index);
|
||||
nir_build_store_global(b, nir_ior_imm(b, launch_index_and_hit, token_type << 30), dst_addr, .align_mul = 4);
|
||||
|
||||
return nir_iadd_imm(b, dst_addr, 4);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_build_token_end(nir_builder *b)
|
||||
{
|
||||
nir_pop_if(b, NULL);
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_build_end_trace_token(nir_builder *b, struct rt_variables *vars, nir_def *tmax, nir_def *hit,
|
||||
nir_def *iteration_instance_count)
|
||||
{
|
||||
nir_def *token_size = nir_bcsel(b, hit, nir_imm_int(b, sizeof(struct radv_packed_end_trace_token)),
|
||||
nir_imm_int(b, offsetof(struct radv_packed_end_trace_token, primitive_id)));
|
||||
|
||||
nir_def *dst_addr = radv_build_token_begin(b, vars, hit, radv_packed_token_end_trace, token_size,
|
||||
sizeof(struct radv_packed_end_trace_token));
|
||||
{
|
||||
nir_build_store_global(b, nir_load_var(b, vars->accel_struct), dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 8);
|
||||
|
||||
nir_def *dispatch_indices =
|
||||
nir_load_smem_amd(b, 2, nir_imm_int64(b, vars->device->rra_trace.ray_history_addr),
|
||||
nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), .align_mul = 4);
|
||||
nir_def *dispatch_index = nir_iadd(b, nir_channel(b, dispatch_indices, 0), nir_channel(b, dispatch_indices, 1));
|
||||
nir_def *dispatch_and_flags = nir_iand_imm(b, nir_load_var(b, vars->cull_mask_and_flags), 0xFFFF);
|
||||
dispatch_and_flags = nir_ior(b, dispatch_and_flags, dispatch_index);
|
||||
nir_build_store_global(b, dispatch_and_flags, dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_def *shifted_cull_mask = nir_iand_imm(b, nir_load_var(b, vars->cull_mask_and_flags), 0xFF000000);
|
||||
|
||||
nir_def *packed_args = nir_load_var(b, vars->sbt_offset);
|
||||
packed_args = nir_ior(b, packed_args, nir_ishl_imm(b, nir_load_var(b, vars->sbt_stride), 4));
|
||||
packed_args = nir_ior(b, packed_args, nir_ishl_imm(b, nir_load_var(b, vars->miss_index), 8));
|
||||
packed_args = nir_ior(b, packed_args, shifted_cull_mask);
|
||||
nir_build_store_global(b, packed_args, dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_build_store_global(b, nir_load_var(b, vars->origin), dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 12);
|
||||
|
||||
nir_build_store_global(b, nir_load_var(b, vars->tmin), dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_build_store_global(b, nir_load_var(b, vars->direction), dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 12);
|
||||
|
||||
nir_build_store_global(b, tmax, dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_build_store_global(b, iteration_instance_count, dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_push_if(b, hit);
|
||||
{
|
||||
nir_build_store_global(b, nir_load_var(b, vars->primitive_id), dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_def *geometry_id = nir_iand_imm(b, nir_load_var(b, vars->geometry_id_and_flags), 0xFFFFFFF);
|
||||
nir_build_store_global(b, geometry_id, dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_def *instance_id_and_hit_kind =
|
||||
nir_build_load_global(b, 1, 32,
|
||||
nir_iadd_imm(b, nir_load_var(b, vars->instance_addr),
|
||||
offsetof(struct radv_bvh_instance_node, instance_id)));
|
||||
instance_id_and_hit_kind =
|
||||
nir_ior(b, instance_id_and_hit_kind, nir_ishl_imm(b, nir_load_var(b, vars->hit_kind), 24));
|
||||
nir_build_store_global(b, instance_id_and_hit_kind, dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
|
||||
nir_build_store_global(b, nir_load_var(b, vars->tmax), dst_addr, .align_mul = 4);
|
||||
dst_addr = nir_iadd_imm(b, dst_addr, 4);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
radv_build_token_end(b);
|
||||
}
|
||||
|
||||
static nir_function_impl *
|
||||
lower_any_hit_for_intersection(nir_shader *any_hit)
|
||||
{
|
||||
|
|
@ -1432,6 +1563,14 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
.sbt_offset_and_flags = nir_build_deref_var(b, trav_vars.sbt_offset_and_flags),
|
||||
};
|
||||
|
||||
nir_variable *iteration_instance_count = NULL;
|
||||
if (vars->device->rra_trace.ray_history_addr) {
|
||||
iteration_instance_count =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "iteration_instance_count");
|
||||
nir_store_var(b, iteration_instance_count, nir_imm_int(b, 0), 0x1);
|
||||
trav_vars_args.iteration_instance_count = nir_build_deref_var(b, iteration_instance_count);
|
||||
}
|
||||
|
||||
struct traversal_data data = {
|
||||
.device = device,
|
||||
.vars = vars,
|
||||
|
|
@ -1464,8 +1603,14 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
.data = &data,
|
||||
};
|
||||
|
||||
nir_def *original_tmax = nir_load_var(b, vars->tmax);
|
||||
|
||||
radv_build_ray_traversal(device, b, &args);
|
||||
|
||||
if (vars->device->rra_trace.ray_history_addr)
|
||||
radv_build_end_trace_token(b, vars, original_tmax, nir_load_var(b, trav_vars.hit),
|
||||
nir_load_var(b, iteration_instance_count));
|
||||
|
||||
nir_metadata_preserve(nir_shader_get_entrypoint(b->shader), nir_metadata_none);
|
||||
radv_nir_lower_hit_attrib_derefs(b->shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -326,6 +326,8 @@ radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer)
|
|||
struct radv_cmd_buffer *cmd_buffer = container_of(vk_cmd_buffer, struct radv_cmd_buffer, vk);
|
||||
|
||||
if (cmd_buffer->qf != RADV_QUEUE_SPARSE) {
|
||||
util_dynarray_fini(&cmd_buffer->ray_history);
|
||||
|
||||
list_for_each_entry_safe (struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) {
|
||||
radv_rmv_log_command_buffer_bo_destroy(cmd_buffer->device, up->upload_bo);
|
||||
cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
|
||||
|
|
@ -404,6 +406,8 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer **
|
|||
|
||||
for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
|
||||
vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
|
||||
|
||||
util_dynarray_init(&cmd_buffer->ray_history, NULL);
|
||||
}
|
||||
|
||||
*cmd_buffer_out = &cmd_buffer->vk;
|
||||
|
|
@ -438,6 +442,8 @@ radv_reset_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer, UNUSED VkCommandB
|
|||
free(up);
|
||||
}
|
||||
|
||||
util_dynarray_clear(&cmd_buffer->ray_history);
|
||||
|
||||
cmd_buffer->push_constant_stages = 0;
|
||||
cmd_buffer->scratch_size_per_wave_needed = 0;
|
||||
cmd_buffer->scratch_waves_wanted = 0;
|
||||
|
|
@ -10334,6 +10340,71 @@ radv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_
|
|||
radv_compute_dispatch(cmd_buffer, &info);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_trace_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *cmd,
|
||||
uint64_t indirect_va)
|
||||
{
|
||||
if (!cmd || indirect_va)
|
||||
return;
|
||||
|
||||
struct radv_rra_ray_history_data *data = malloc(sizeof(struct radv_rra_ray_history_data));
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
uint32_t width = DIV_ROUND_UP(cmd->width, cmd_buffer->device->rra_trace.ray_history_resolution_scale);
|
||||
uint32_t height = DIV_ROUND_UP(cmd->height, cmd_buffer->device->rra_trace.ray_history_resolution_scale);
|
||||
uint32_t depth = DIV_ROUND_UP(cmd->depth, cmd_buffer->device->rra_trace.ray_history_resolution_scale);
|
||||
|
||||
struct radv_rra_ray_history_counter counter = {
|
||||
.dispatch_size = {width, height, depth},
|
||||
.hit_shader_count = cmd->hitShaderBindingTableSize / cmd->hitShaderBindingTableStride,
|
||||
.miss_shader_count = cmd->missShaderBindingTableSize / cmd->missShaderBindingTableStride,
|
||||
.shader_count = cmd_buffer->state.rt_pipeline->stage_count,
|
||||
.pipeline_api_hash = cmd_buffer->state.rt_pipeline->base.base.pipeline_hash,
|
||||
.mode = 1,
|
||||
.stride = sizeof(uint32_t),
|
||||
.data_size = 0,
|
||||
.ray_id_begin = 0,
|
||||
.ray_id_end = 0xFFFFFFFF,
|
||||
.pipeline_type = RADV_RRA_PIPELINE_RAY_TRACING,
|
||||
};
|
||||
|
||||
struct radv_rra_ray_history_dispatch_size dispatch_size = {
|
||||
.size = {width, height, depth},
|
||||
};
|
||||
|
||||
struct radv_rra_ray_history_traversal_flags traversal_flags = {0};
|
||||
|
||||
data->metadata = (struct radv_rra_ray_history_metadata){
|
||||
.counter_info.type = RADV_RRA_COUNTER_INFO,
|
||||
.counter_info.size = sizeof(struct radv_rra_ray_history_counter),
|
||||
.counter = counter,
|
||||
|
||||
.dispatch_size_info.type = RADV_RRA_DISPATCH_SIZE,
|
||||
.dispatch_size_info.size = sizeof(struct radv_rra_ray_history_dispatch_size),
|
||||
.dispatch_size = dispatch_size,
|
||||
|
||||
.traversal_flags_info.type = RADV_RRA_TRAVERSAL_FLAGS,
|
||||
.traversal_flags_info.size = sizeof(struct radv_rra_ray_history_traversal_flags),
|
||||
.traversal_flags = traversal_flags,
|
||||
};
|
||||
|
||||
uint32_t dispatch_index = util_dynarray_num_elements(&cmd_buffer->ray_history, struct radv_rra_ray_history_data *)
|
||||
<< 16;
|
||||
|
||||
util_dynarray_append(&cmd_buffer->ray_history, struct radv_rra_ray_history_data *, data);
|
||||
|
||||
cmd_buffer->state.flush_bits |=
|
||||
RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
|
||||
radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL) |
|
||||
radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
|
||||
|
||||
radv_update_buffer_cp(
|
||||
cmd_buffer,
|
||||
cmd_buffer->device->rra_trace.ray_history_addr + offsetof(struct radv_ray_history_header, dispatch_index),
|
||||
&dispatch_index, sizeof(dispatch_index));
|
||||
}
|
||||
|
||||
enum radv_rt_mode {
|
||||
radv_rt_mode_direct,
|
||||
radv_rt_mode_indirect,
|
||||
|
|
@ -10366,6 +10437,9 @@ radv_trace_rays(struct radv_cmd_buffer *cmd_buffer, VkTraceRaysIndirectCommand2K
|
|||
if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_RT)
|
||||
return;
|
||||
|
||||
if (unlikely(cmd_buffer->device->rra_trace.ray_history_buffer))
|
||||
radv_trace_trace_rays(cmd_buffer, tables, indirect_va);
|
||||
|
||||
struct radv_compute_pipeline *pipeline = &cmd_buffer->state.rt_pipeline->base;
|
||||
struct radv_shader *rt_prolog = cmd_buffer->state.rt_prolog;
|
||||
uint32_t base_reg = rt_prolog->info.user_data_0;
|
||||
|
|
|
|||
|
|
@ -630,28 +630,8 @@ capture_trace(VkQueue _queue)
|
|||
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
char filename[2048];
|
||||
struct tm now;
|
||||
time_t t;
|
||||
|
||||
t = time(NULL);
|
||||
now = *localtime(&t);
|
||||
|
||||
if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA) {
|
||||
if (_mesa_hash_table_num_entries(queue->device->rra_trace.accel_structs) == 0) {
|
||||
fprintf(stderr, "radv: No acceleration structures captured, not saving RRA trace.\n");
|
||||
} else {
|
||||
snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.rra", util_get_process_name(),
|
||||
1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
|
||||
|
||||
result = radv_rra_dump_trace(_queue, filename);
|
||||
|
||||
if (result == VK_SUCCESS)
|
||||
fprintf(stderr, "radv: RRA capture saved to '%s'\n", filename);
|
||||
else
|
||||
fprintf(stderr, "radv: Failed to save RRA capture!\n");
|
||||
}
|
||||
}
|
||||
if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA)
|
||||
queue->device->rra_trace.triggered = true;
|
||||
|
||||
if (queue->device->vk.memory_trace_data.is_enabled) {
|
||||
simple_mtx_lock(&queue->device->vk.memory_trace_data.token_mtx);
|
||||
|
|
@ -1064,7 +1044,9 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
|
|||
}
|
||||
|
||||
if ((device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA) && radv_enable_rt(physical_device, false)) {
|
||||
radv_rra_trace_init(device);
|
||||
result = radv_rra_trace_init(device);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (device->vk.enabled_features.rayTracingPipelineShaderGroupHandleCaptureReplay) {
|
||||
|
|
@ -1087,6 +1069,8 @@ fail:
|
|||
|
||||
radv_sqtt_finish(device);
|
||||
|
||||
radv_rra_trace_finish(radv_device_to_handle(device), &device->rra_trace);
|
||||
|
||||
radv_spm_finish(device);
|
||||
|
||||
radv_trap_handler_finish(device);
|
||||
|
|
|
|||
|
|
@ -803,12 +803,13 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra
|
|||
goto fail;
|
||||
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
|
||||
bool emit_ray_history = !!device->rra_trace.ray_history_buffer;
|
||||
|
||||
radv_hash_rt_shaders(device, pipeline->sha1, stages, pCreateInfo, pipeline->groups);
|
||||
pipeline->base.base.pipeline_hash = *(uint64_t *)pipeline->sha1;
|
||||
|
||||
bool cache_hit = false;
|
||||
if (!keep_executable_info)
|
||||
if (!keep_executable_info && !emit_ray_history)
|
||||
cache_hit = radv_ray_tracing_pipeline_cache_search(device, cache, pipeline, pCreateInfo);
|
||||
|
||||
if (!cache_hit) {
|
||||
|
|
@ -828,7 +829,7 @@ radv_rt_pipeline_create(VkDevice _device, VkPipelineCache _cache, const VkRayTra
|
|||
|
||||
radv_rmv_log_rt_pipeline_create(device, pipeline);
|
||||
|
||||
if (!cache_hit)
|
||||
if (!cache_hit && !emit_ray_history)
|
||||
radv_ray_tracing_pipeline_cache_insert(device, cache, pipeline, pCreateInfo->stageCount, pipeline->sha1);
|
||||
|
||||
/* write shader VAs into group handles */
|
||||
|
|
|
|||
|
|
@ -847,13 +847,131 @@ struct radv_rra_accel_struct_data {
|
|||
|
||||
void radv_destroy_rra_accel_struct_data(VkDevice device, struct radv_rra_accel_struct_data *data);
|
||||
|
||||
struct radv_ray_history_header {
|
||||
uint32_t offset;
|
||||
uint32_t dispatch_index;
|
||||
uint32_t submit_base_index;
|
||||
};
|
||||
|
||||
enum radv_packed_token_type {
|
||||
radv_packed_token_end_trace,
|
||||
};
|
||||
|
||||
struct radv_packed_token_header {
|
||||
uint32_t launch_index : 29;
|
||||
uint32_t hit : 1;
|
||||
uint32_t token_type : 2;
|
||||
};
|
||||
|
||||
struct radv_packed_end_trace_token {
|
||||
struct radv_packed_token_header header;
|
||||
|
||||
uint32_t accel_struct_lo;
|
||||
uint32_t accel_struct_hi;
|
||||
|
||||
uint32_t flags : 16;
|
||||
uint32_t dispatch_index : 16;
|
||||
|
||||
uint32_t sbt_offset : 4;
|
||||
uint32_t sbt_stride : 4;
|
||||
uint32_t miss_index : 16;
|
||||
uint32_t cull_mask : 8;
|
||||
|
||||
float origin[3];
|
||||
float tmin;
|
||||
float direction[3];
|
||||
float tmax;
|
||||
|
||||
uint32_t iteration_count : 16;
|
||||
uint32_t instance_count : 16;
|
||||
|
||||
uint32_t primitive_id;
|
||||
uint32_t geometry_id;
|
||||
|
||||
uint32_t instance_id : 24;
|
||||
uint32_t hit_kind : 8;
|
||||
|
||||
float t;
|
||||
};
|
||||
static_assert(sizeof(struct radv_packed_end_trace_token) == 72, "Unexpected radv_packed_end_trace_token size");
|
||||
|
||||
enum radv_rra_ray_history_metadata_type {
|
||||
RADV_RRA_COUNTER_INFO = 1,
|
||||
RADV_RRA_DISPATCH_SIZE = 2,
|
||||
RADV_RRA_TRAVERSAL_FLAGS = 3,
|
||||
};
|
||||
|
||||
struct radv_rra_ray_history_metadata_info {
|
||||
enum radv_rra_ray_history_metadata_type type : 32;
|
||||
uint32_t padding;
|
||||
uint64_t size;
|
||||
};
|
||||
|
||||
enum radv_rra_pipeline_type {
|
||||
RADV_RRA_PIPELINE_RAY_TRACING,
|
||||
};
|
||||
|
||||
struct radv_rra_ray_history_counter {
|
||||
uint32_t dispatch_size[3];
|
||||
uint32_t hit_shader_count;
|
||||
uint32_t miss_shader_count;
|
||||
uint32_t shader_count;
|
||||
uint64_t pipeline_api_hash;
|
||||
uint32_t mode;
|
||||
uint32_t mask;
|
||||
uint32_t stride;
|
||||
uint32_t data_size;
|
||||
uint32_t lost_token_size;
|
||||
uint32_t ray_id_begin;
|
||||
uint32_t ray_id_end;
|
||||
enum radv_rra_pipeline_type pipeline_type : 32;
|
||||
};
|
||||
|
||||
struct radv_rra_ray_history_dispatch_size {
|
||||
uint32_t size[3];
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
struct radv_rra_ray_history_traversal_flags {
|
||||
uint32_t box_sort_mode : 1;
|
||||
uint32_t node_ptr_flags : 1;
|
||||
uint32_t reserved : 30;
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
struct radv_rra_ray_history_metadata {
|
||||
struct radv_rra_ray_history_metadata_info counter_info;
|
||||
struct radv_rra_ray_history_counter counter;
|
||||
|
||||
struct radv_rra_ray_history_metadata_info dispatch_size_info;
|
||||
struct radv_rra_ray_history_dispatch_size dispatch_size;
|
||||
|
||||
struct radv_rra_ray_history_metadata_info traversal_flags_info;
|
||||
struct radv_rra_ray_history_traversal_flags traversal_flags;
|
||||
};
|
||||
static_assert(sizeof(struct radv_rra_ray_history_metadata) == 136,
|
||||
"radv_rra_ray_history_metadata does not match RRA expectations");
|
||||
|
||||
struct radv_rra_ray_history_data {
|
||||
struct radv_rra_ray_history_metadata metadata;
|
||||
};
|
||||
|
||||
struct radv_rra_trace_data {
|
||||
struct hash_table *accel_structs;
|
||||
struct hash_table_u64 *accel_struct_vas;
|
||||
simple_mtx_t data_mtx;
|
||||
bool validate_as;
|
||||
bool copy_after_build;
|
||||
bool triggered;
|
||||
uint32_t copy_memory_index;
|
||||
|
||||
struct util_dynarray ray_history;
|
||||
VkBuffer ray_history_buffer;
|
||||
VkDeviceMemory ray_history_memory;
|
||||
void *ray_history_data;
|
||||
uint64_t ray_history_addr;
|
||||
uint32_t ray_history_buffer_size;
|
||||
uint32_t ray_history_resolution_scale;
|
||||
};
|
||||
|
||||
enum radv_dispatch_table {
|
||||
|
|
@ -1776,6 +1894,8 @@ struct radv_cmd_buffer {
|
|||
uint64_t shader_upload_seq;
|
||||
|
||||
uint32_t sqtt_cb_id;
|
||||
|
||||
struct util_dynarray ray_history;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
|
|
@ -3002,9 +3122,10 @@ VkResult radv_sqtt_get_timed_cmdbuf(struct radv_queue *queue, struct radeon_wins
|
|||
VkResult radv_sqtt_acquire_gpu_timestamp(struct radv_device *device, struct radeon_winsys_bo **gpu_timestamp_bo,
|
||||
uint32_t *gpu_timestamp_offset, void **gpu_timestamp_ptr);
|
||||
|
||||
void radv_rra_trace_init(struct radv_device *device);
|
||||
VkResult radv_rra_trace_init(struct radv_device *device);
|
||||
|
||||
VkResult radv_rra_dump_trace(VkQueue vk_queue, char *filename);
|
||||
void radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *data);
|
||||
void radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data);
|
||||
|
||||
void radv_memory_trace_init(struct radv_device *device);
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ static_assert(sizeof(struct rra_file_header) == 32, "rra_file_header does not ma
|
|||
|
||||
enum rra_chunk_version {
|
||||
RADV_RRA_ASIC_API_INFO_CHUNK_VERSION = 0x1,
|
||||
RADV_RRA_RAY_HISTORY_CHUNK_VERSION = 0x2,
|
||||
RADV_RRA_ACCEL_STRUCT_CHUNK_VERSION = 0xF0005,
|
||||
};
|
||||
|
||||
|
|
@ -894,7 +895,7 @@ exit:
|
|||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
VkResult
|
||||
radv_rra_trace_init(struct radv_device *device)
|
||||
{
|
||||
device->rra_trace.validate_as = debug_get_bool_option("RADV_RRA_TRACE_VALIDATE", false);
|
||||
|
|
@ -906,11 +907,91 @@ radv_rra_trace_init(struct radv_device *device)
|
|||
device->rra_trace.copy_memory_index = radv_find_memory_index(
|
||||
device->physical_device,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
||||
|
||||
util_dynarray_init(&device->rra_trace.ray_history, NULL);
|
||||
|
||||
device->rra_trace.ray_history_buffer_size = debug_get_num_option("RADV_RRA_TRACE_HISTORY_SIZE", 100 * 1024 * 1024);
|
||||
if (device->rra_trace.ray_history_buffer_size <
|
||||
sizeof(struct radv_ray_history_header) + sizeof(struct radv_packed_end_trace_token))
|
||||
return VK_SUCCESS;
|
||||
|
||||
device->rra_trace.ray_history_resolution_scale = debug_get_num_option("RADV_RRA_TRACE_RESOLUTION_SCALE", 1);
|
||||
device->rra_trace.ray_history_resolution_scale = MAX2(device->rra_trace.ray_history_resolution_scale, 1);
|
||||
|
||||
VkBufferCreateInfo buffer_create_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext =
|
||||
&(VkBufferUsageFlags2CreateInfoKHR){
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
|
||||
.usage = VK_BUFFER_USAGE_2_TRANSFER_SRC_BIT_KHR | VK_BUFFER_USAGE_2_SHADER_DEVICE_ADDRESS_BIT_KHR,
|
||||
},
|
||||
.size = device->rra_trace.ray_history_buffer_size,
|
||||
};
|
||||
|
||||
VkDevice _device = radv_device_to_handle(device);
|
||||
VkResult result = radv_CreateBuffer(_device, &buffer_create_info, NULL, &device->rra_trace.ray_history_buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
VkMemoryRequirements requirements;
|
||||
vk_common_GetBufferMemoryRequirements(_device, device->rra_trace.ray_history_buffer, &requirements);
|
||||
|
||||
VkMemoryAllocateInfo alloc_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = radv_find_memory_index(device->physical_device, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT),
|
||||
};
|
||||
|
||||
result = radv_AllocateMemory(_device, &alloc_info, NULL, &device->rra_trace.ray_history_memory);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = vk_common_MapMemory(_device, device->rra_trace.ray_history_memory, 0, VK_WHOLE_SIZE, 0,
|
||||
(void **)&device->rra_trace.ray_history_data);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = vk_common_BindBufferMemory(_device, device->rra_trace.ray_history_buffer,
|
||||
device->rra_trace.ray_history_memory, 0);
|
||||
|
||||
VkBufferDeviceAddressInfo addr_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
|
||||
.buffer = device->rra_trace.ray_history_buffer,
|
||||
};
|
||||
device->rra_trace.ray_history_addr = radv_GetBufferDeviceAddress(_device, &addr_info);
|
||||
|
||||
struct radv_ray_history_header *ray_history_header = device->rra_trace.ray_history_data;
|
||||
memset(ray_history_header, 0, sizeof(struct radv_ray_history_header));
|
||||
ray_history_header->offset = 1;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
radv_rra_trace_clear_ray_history(VkDevice _device, struct radv_rra_trace_data *data)
|
||||
{
|
||||
util_dynarray_foreach (&data->ray_history, struct radv_rra_ray_history_data *, _entry) {
|
||||
struct radv_rra_ray_history_data *entry = *_entry;
|
||||
free(entry);
|
||||
}
|
||||
util_dynarray_clear(&data->ray_history);
|
||||
}
|
||||
|
||||
void
|
||||
radv_rra_trace_finish(VkDevice vk_device, struct radv_rra_trace_data *data)
|
||||
{
|
||||
radv_DestroyBuffer(vk_device, data->ray_history_buffer, NULL);
|
||||
|
||||
if (data->ray_history_memory)
|
||||
vk_common_UnmapMemory(vk_device, data->ray_history_memory);
|
||||
|
||||
radv_FreeMemory(vk_device, data->ray_history_memory, NULL);
|
||||
|
||||
radv_rra_trace_clear_ray_history(vk_device, data);
|
||||
util_dynarray_fini(&data->ray_history);
|
||||
|
||||
if (data->accel_structs)
|
||||
hash_table_foreach (data->accel_structs, entry)
|
||||
radv_destroy_rra_accel_struct_data(vk_device, entry->data);
|
||||
|
|
@ -953,6 +1034,8 @@ struct rra_copy_context {
|
|||
void *mapped_data;
|
||||
|
||||
struct hash_entry **entries;
|
||||
|
||||
uint32_t min_size;
|
||||
};
|
||||
|
||||
static VkResult
|
||||
|
|
@ -962,7 +1045,7 @@ rra_copy_context_init(struct rra_copy_context *ctx)
|
|||
if (device->rra_trace.copy_after_build)
|
||||
return VK_SUCCESS;
|
||||
|
||||
uint32_t max_size = 0;
|
||||
uint32_t max_size = ctx->min_size;
|
||||
uint32_t accel_struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
|
||||
for (unsigned i = 0; i < accel_struct_count; i++) {
|
||||
struct radv_rra_accel_struct_data *data = ctx->entries[i]->data;
|
||||
|
|
@ -1115,6 +1198,119 @@ rra_unmap_accel_struct_data(struct rra_copy_context *ctx, uint32_t i)
|
|||
vk_common_UnmapMemory(ctx->device, data->memory);
|
||||
}
|
||||
|
||||
enum rra_ray_history_token_type {
|
||||
rra_ray_history_token_begin,
|
||||
rra_ray_history_token_tlas,
|
||||
rra_ray_history_token_blas,
|
||||
rra_ray_history_token_end,
|
||||
rra_ray_history_token_call,
|
||||
rra_ray_history_token_timestamp,
|
||||
rra_ray_history_token_ahit_status,
|
||||
rra_ray_history_token_call2,
|
||||
rra_ray_history_token_isec_status,
|
||||
rra_ray_history_token_end2,
|
||||
rra_ray_history_token_begin2,
|
||||
rra_ray_history_token_normal = 0xFFFF,
|
||||
};
|
||||
|
||||
struct rra_ray_history_id_token {
|
||||
uint32_t id : 30;
|
||||
uint32_t reserved : 1;
|
||||
uint32_t has_control : 1;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_id_token) == 4, "rra_ray_history_id_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_control_token {
|
||||
uint32_t type : 16;
|
||||
uint32_t length : 8;
|
||||
uint32_t data : 8;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_control_token) == 4,
|
||||
"rra_ray_history_control_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_begin_token {
|
||||
uint32_t wave_id;
|
||||
uint32_t launch_ids[3];
|
||||
uint32_t accel_struct_lo;
|
||||
uint32_t accel_struct_hi;
|
||||
uint32_t ray_flags;
|
||||
uint32_t cull_mask : 8;
|
||||
uint32_t stb_offset : 4;
|
||||
uint32_t stb_stride : 4;
|
||||
uint32_t miss_index : 16;
|
||||
float origin[3];
|
||||
float tmin;
|
||||
float direction[3];
|
||||
float tmax;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_begin_token) == 64,
|
||||
"rra_ray_history_begin_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_begin2_token {
|
||||
struct rra_ray_history_begin_token base;
|
||||
uint32_t call_instruction_id;
|
||||
uint32_t unique_wave_id;
|
||||
uint32_t parent_unique_wave_id;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_begin2_token) == 76,
|
||||
"rra_ray_history_begin2_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_end_token {
|
||||
uint32_t primitive_index;
|
||||
uint32_t geometry_index;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_end_token) == 8,
|
||||
"rra_ray_history_end_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_end2_token {
|
||||
struct rra_ray_history_end_token base;
|
||||
uint32_t instance_index : 24;
|
||||
uint32_t hit_kind : 8;
|
||||
uint32_t iteration_count;
|
||||
uint32_t candidate_instance_count;
|
||||
float t;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_end2_token) == 24,
|
||||
"rra_ray_history_end2_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_tlas_token {
|
||||
uint64_t addr;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_tlas_token) == 8,
|
||||
"rra_ray_history_tlas_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_blas_token {
|
||||
uint64_t addr;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_blas_token) == 8,
|
||||
"rra_ray_history_blas_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_call_token {
|
||||
uint32_t addr[2];
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_call_token) == 8,
|
||||
"rra_ray_history_call_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_call2_token {
|
||||
struct rra_ray_history_call_token base;
|
||||
uint32_t sbt_index;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_call2_token) == 12,
|
||||
"rra_ray_history_call2_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_isec_token {
|
||||
float t;
|
||||
uint32_t hit_kind;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_isec_token) == 8,
|
||||
"rra_ray_history_isec_token does not match RRA expectations");
|
||||
|
||||
struct rra_ray_history_timestamp_token {
|
||||
uint64_t gpu_timestamp;
|
||||
};
|
||||
static_assert(sizeof(struct rra_ray_history_timestamp_token) == 8,
|
||||
"rra_ray_history_timestamp_token does not match RRA expectations");
|
||||
|
||||
VkResult
|
||||
radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
||||
{
|
||||
|
|
@ -1127,13 +1323,22 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
|||
return result;
|
||||
|
||||
uint64_t *accel_struct_offsets = NULL;
|
||||
uint64_t *ray_history_offsets = NULL;
|
||||
struct hash_entry **hash_entries = NULL;
|
||||
FILE *file = NULL;
|
||||
|
||||
uint32_t struct_count = _mesa_hash_table_num_entries(device->rra_trace.accel_structs);
|
||||
accel_struct_offsets = calloc(struct_count, sizeof(uint64_t));
|
||||
if (!accel_struct_offsets)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
uint32_t dispatch_count =
|
||||
util_dynarray_num_elements(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *);
|
||||
ray_history_offsets = calloc(dispatch_count, sizeof(uint64_t));
|
||||
if (!ray_history_offsets) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
hash_entries = malloc(sizeof(*hash_entries) * struct_count);
|
||||
if (!hash_entries) {
|
||||
|
|
@ -1175,6 +1380,7 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
|||
.queue = vk_queue,
|
||||
.entries = hash_entries,
|
||||
.family_index = queue->vk.queue_family_index,
|
||||
.min_size = device->rra_trace.ray_history_buffer_size,
|
||||
};
|
||||
|
||||
result = rra_copy_context_init(©_ctx);
|
||||
|
|
@ -1197,6 +1403,118 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
|||
written_accel_struct_count++;
|
||||
}
|
||||
|
||||
uint64_t ray_history_offset = (uint64_t)ftell(file);
|
||||
|
||||
uint32_t ray_history_index = 0xFFFFFFFF;
|
||||
struct radv_rra_ray_history_data *ray_history = NULL;
|
||||
|
||||
uint8_t *history = device->rra_trace.ray_history_data;
|
||||
struct radv_ray_history_header *history_header = (void *)history;
|
||||
|
||||
uint32_t history_buffer_size_mb = device->rra_trace.ray_history_buffer_size / 1024 / 1024;
|
||||
uint32_t history_size_mb = history_header->offset / 1024 / 1024;
|
||||
if (history_header->offset > device->rra_trace.ray_history_buffer_size) {
|
||||
fprintf(stderr, "radv: rra: The ray history buffer size (%u MB) is to small. %u MB is required.\n",
|
||||
history_buffer_size_mb, history_size_mb);
|
||||
} else {
|
||||
fprintf(stderr, "radv: rra: Ray history buffer size = %u MB, ray history size = %u MB.\n", history_buffer_size_mb,
|
||||
history_size_mb);
|
||||
}
|
||||
|
||||
uint32_t token_size;
|
||||
for (uint32_t offset = sizeof(struct radv_ray_history_header); offset < history_header->offset;
|
||||
offset += token_size) {
|
||||
struct radv_packed_end_trace_token *src = (void *)(history + offset);
|
||||
token_size = src->header.hit ? sizeof(struct radv_packed_end_trace_token)
|
||||
: offsetof(struct radv_packed_end_trace_token, primitive_id);
|
||||
|
||||
if (src->dispatch_index != ray_history_index) {
|
||||
ray_history_index = src->dispatch_index;
|
||||
assert(ray_history_index < dispatch_count);
|
||||
ray_history = *util_dynarray_element(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *,
|
||||
ray_history_index);
|
||||
|
||||
assert(!ray_history_offsets[ray_history_index]);
|
||||
ray_history_offsets[ray_history_index] = (uint64_t)ftell(file);
|
||||
fwrite(&ray_history->metadata, sizeof(struct radv_rra_ray_history_metadata), 1, file);
|
||||
}
|
||||
|
||||
uint32_t *dispatch_size = ray_history->metadata.dispatch_size.size;
|
||||
|
||||
uint32_t x = src->header.launch_index % dispatch_size[0];
|
||||
uint32_t y = (src->header.launch_index / dispatch_size[0]) % dispatch_size[1];
|
||||
uint32_t z = src->header.launch_index / (dispatch_size[0] * dispatch_size[1]);
|
||||
|
||||
struct rra_ray_history_id_token begin_id = {
|
||||
.id = src->header.launch_index,
|
||||
.has_control = true,
|
||||
};
|
||||
struct rra_ray_history_control_token begin_control = {
|
||||
.type = rra_ray_history_token_begin,
|
||||
.length = sizeof(struct rra_ray_history_begin_token) / 4,
|
||||
};
|
||||
struct rra_ray_history_begin_token begin = {
|
||||
.wave_id = src->header.launch_index / 32,
|
||||
.launch_ids = {x, y, z},
|
||||
.accel_struct_lo = src->accel_struct_lo,
|
||||
.accel_struct_hi = src->accel_struct_hi & 0x1FFFFFF,
|
||||
.ray_flags = src->flags,
|
||||
.cull_mask = src->cull_mask,
|
||||
.stb_offset = src->sbt_offset,
|
||||
.stb_stride = src->sbt_stride,
|
||||
.miss_index = src->miss_index,
|
||||
.origin[0] = src->origin[0],
|
||||
.origin[1] = src->origin[1],
|
||||
.origin[2] = src->origin[2],
|
||||
.tmin = src->tmin,
|
||||
.direction[0] = src->direction[0],
|
||||
.direction[1] = src->direction[1],
|
||||
.direction[2] = src->direction[2],
|
||||
.tmax = src->tmax,
|
||||
};
|
||||
fwrite(&begin_id, sizeof(begin_id), 1, file);
|
||||
fwrite(&begin_control, sizeof(begin_control), 1, file);
|
||||
fwrite(&begin, sizeof(begin), 1, file);
|
||||
|
||||
struct rra_ray_history_id_token end_id = {
|
||||
.id = src->header.launch_index,
|
||||
.has_control = true,
|
||||
};
|
||||
struct rra_ray_history_control_token end_control = {
|
||||
.type = rra_ray_history_token_end2,
|
||||
.length = sizeof(struct rra_ray_history_end2_token) / 4,
|
||||
};
|
||||
struct rra_ray_history_end2_token end = {
|
||||
.base.primitive_index = 0xFFFFFFFF,
|
||||
.base.geometry_index = 0xFFFFFFFF,
|
||||
.iteration_count = src->iteration_count,
|
||||
.candidate_instance_count = src->instance_count,
|
||||
};
|
||||
|
||||
if (src->header.hit) {
|
||||
end.base.primitive_index = src->primitive_id;
|
||||
end.base.geometry_index = src->geometry_id;
|
||||
end.instance_index = src->instance_id;
|
||||
end.hit_kind = src->hit_kind;
|
||||
end.t = src->t;
|
||||
}
|
||||
|
||||
fwrite(&end_id, sizeof(end_id), 1, file);
|
||||
fwrite(&end_control, sizeof(end_control), 1, file);
|
||||
fwrite(&end, sizeof(end), 1, file);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dispatch_count; i++) {
|
||||
if (ray_history_offsets[i])
|
||||
continue;
|
||||
|
||||
ray_history = *util_dynarray_element(&device->rra_trace.ray_history, struct radv_rra_ray_history_data *, i);
|
||||
ray_history_offsets[i] = (uint64_t)ftell(file);
|
||||
fwrite(&ray_history->metadata, sizeof(struct radv_rra_ray_history_metadata), 1, file);
|
||||
}
|
||||
|
||||
history_header->offset = 1;
|
||||
|
||||
rra_copy_context_finish(©_ctx);
|
||||
|
||||
uint64_t chunk_info_offset = (uint64_t)ftell(file);
|
||||
|
|
@ -1204,10 +1522,24 @@ radv_rra_dump_trace(VkQueue vk_queue, char *filename)
|
|||
rra_dump_chunk_description(asic_info_offset, 0, sizeof(struct rra_asic_info), "AsicInfo",
|
||||
RADV_RRA_ASIC_API_INFO_CHUNK_VERSION, file);
|
||||
|
||||
for (uint32_t i = 0; i < dispatch_count; i++) {
|
||||
uint64_t tokens_size;
|
||||
if (i == dispatch_count - 1)
|
||||
tokens_size = (uint64_t)(chunk_info_offset - ray_history_offsets[i]);
|
||||
else
|
||||
tokens_size = (uint64_t)(ray_history_offsets[i + 1] - ray_history_offsets[i]);
|
||||
tokens_size -= sizeof(struct radv_rra_ray_history_metadata);
|
||||
|
||||
rra_dump_chunk_description(ray_history_offsets[i], 0, sizeof(struct radv_rra_ray_history_metadata),
|
||||
"HistoryMetadata", RADV_RRA_RAY_HISTORY_CHUNK_VERSION, file);
|
||||
rra_dump_chunk_description(ray_history_offsets[i] + sizeof(struct radv_rra_ray_history_metadata), 0, tokens_size,
|
||||
"HistoryTokensRaw", RADV_RRA_RAY_HISTORY_CHUNK_VERSION, file);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < written_accel_struct_count; ++i) {
|
||||
uint64_t accel_struct_size;
|
||||
if (i == written_accel_struct_count - 1)
|
||||
accel_struct_size = (uint64_t)(chunk_info_offset - accel_struct_offsets[i]);
|
||||
accel_struct_size = (uint64_t)(ray_history_offset - accel_struct_offsets[i]);
|
||||
else
|
||||
accel_struct_size = (uint64_t)(accel_struct_offsets[i + 1] - accel_struct_offsets[i]);
|
||||
|
||||
|
|
@ -1227,6 +1559,7 @@ cleanup:
|
|||
fclose(file);
|
||||
|
||||
free(hash_entries);
|
||||
free(ray_history_offsets);
|
||||
free(accel_struct_offsets);
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue