diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index e6a9b015865..f6ee69fb9a0 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -2173,43 +2173,56 @@ tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf) return u_trace_end_iterator(&cmdbuf->trace); } } + VkResult -tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs, - struct u_trace **trace_copy) +tu_create_copy_timestamp_cs(struct tu_u_trace_submission_data *submission_data, + struct tu_cmd_buffer **cmd_buffers, + uint32_t cmd_buffer_count, + uint32_t trace_chunks_to_copy) { - *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc, - sizeof(struct tu_cs), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + submission_data->last_buffer_with_tracepoints = -1; - if (*cs == NULL) { - return VK_ERROR_OUT_OF_HOST_MEMORY; + tu_device *device = cmd_buffers[0]->device; + uint32_t cs_size = trace_chunks_to_copy * 6 * 2 + 3; + + if (!list_is_empty(&device->copy_timestamp_cs_pool)) { + submission_data->timestamp_copy_data = + list_first_entry(&device->copy_timestamp_cs_pool, + struct tu_copy_timestamp_data, node); + list_del(&submission_data->timestamp_copy_data->node); + } else { + submission_data->timestamp_copy_data = + (struct tu_copy_timestamp_data *) vk_zalloc( + &device->vk.alloc, sizeof(struct tu_copy_timestamp_data), 8, + VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + tu_cs_init(&submission_data->timestamp_copy_data->cs, device, + TU_CS_MODE_GROW, cs_size, "trace copy timestamp cs"); + u_trace_init(&submission_data->timestamp_copy_data->trace, + &device->trace_context); } - tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW, - list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs"); + tu_cs *cs = &submission_data->timestamp_copy_data->cs; - tu_cs_begin(*cs); + tu_cs_begin(cs); - tu_cs_emit_wfi(*cs); - tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0); + tu_cs_emit_wfi(cs); + tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0); - *trace_copy = (struct u_trace *) vk_zalloc( - &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8, - VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + for (uint32_t i = 0; i < cmd_buffer_count; i++) { + struct tu_cmd_buffer *cmdbuf = cmd_buffers[i]; - if (*trace_copy == NULL) { - return VK_ERROR_OUT_OF_HOST_MEMORY; + if (!u_trace_has_points(&cmdbuf->trace) || + (cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) + continue; + + u_trace_clone_append( + tu_cmd_begin_iterator(cmdbuf), tu_cmd_end_iterator(cmdbuf), + &submission_data->timestamp_copy_data->trace, cs, tu_copy_buffer); } - u_trace_init(*trace_copy, cmdbuf->trace.utctx); - u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf), - tu_cmd_end_iterator(cmdbuf), - *trace_copy, *cs, - tu_copy_buffer); - - tu_cs_emit_wfi(*cs); - - tu_cs_end(*cs); + tu_cs_emit_wfi(cs); + tu_cs_end(cs); return VK_SUCCESS; } @@ -2231,13 +2244,14 @@ tu_u_trace_submission_data_create( } struct tu_u_trace_submission_data *data = *submission_data; + uint32_t trace_chunks_to_copy = 0; - data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc( + data->trace_per_cmd_buffer = (struct u_trace **) vk_zalloc( &device->vk.alloc, - cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8, + cmd_buffer_count * sizeof(struct u_trace *), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); - if (!data->cmd_trace_data) { + if (!data->trace_per_cmd_buffer) { goto fail; } @@ -2258,19 +2272,16 @@ tu_u_trace_submission_data_create( * single-use. Therefor we have to copy trace points and create * a new timestamp buffer on every submit of reusable command buffer. */ - if (tu_create_copy_timestamp_cs(cmdbuf, - &data->cmd_trace_data[i].timestamp_copy_cs, - &data->cmd_trace_data[i].trace) != VK_SUCCESS) { - goto fail; - } - - assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1); + trace_chunks_to_copy += list_length(&cmdbuf->trace.trace_chunks); } else { - data->cmd_trace_data[i].trace = &cmdbuf->trace; + data->trace_per_cmd_buffer[i] = &cmdbuf->trace; } } - assert(data->last_buffer_with_tracepoints != -1); + if (trace_chunks_to_copy > 0) { + tu_create_copy_timestamp_cs(data, cmd_buffers, cmd_buffer_count, + trace_chunks_to_copy); + } return VK_SUCCESS; @@ -2281,20 +2292,31 @@ fail: return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); } +void +tu_free_copy_timestamp_data(struct tu_device *device, + struct tu_copy_timestamp_data *data) +{ + if (list_is_linked(&data->node)) + list_del(&data->node); + tu_cs_finish(&data->cs); + u_trace_fini(&data->trace); + vk_free(&device->vk.alloc, data); +} + void tu_u_trace_submission_data_finish( struct tu_device *device, struct tu_u_trace_submission_data *submission_data) { - for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) { - /* Only if we had to create a copy of trace we should free it */ - struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i]; - if (cmd_data->timestamp_copy_cs) { - tu_cs_finish(cmd_data->timestamp_copy_cs); - vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs); - - u_trace_fini(cmd_data->trace); - vk_free(&device->vk.alloc, cmd_data->trace); + if (submission_data->timestamp_copy_data) { + if (u_trace_enabled(&device->trace_context)) { + tu_cs_reset(&submission_data->timestamp_copy_data->cs); + u_trace_fini(&submission_data->timestamp_copy_data->trace); + list_addtail(&submission_data->timestamp_copy_data->node, + &device->copy_timestamp_cs_pool); + } else { + tu_free_copy_timestamp_data(device, + submission_data->timestamp_copy_data); } } @@ -2305,7 +2327,7 @@ tu_u_trace_submission_data_finish( mtx_unlock(&device->kgsl_profiling_mutex); } - vk_free(&device->vk.alloc, submission_data->cmd_trace_data); + vk_free(&device->vk.alloc, submission_data->trace_per_cmd_buffer); vk_free(&device->vk.alloc, submission_data); } @@ -2866,6 +2888,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, tu_gpu_tracepoint_config_variable(); device->submit_count = 0; + list_inithead(&device->copy_timestamp_cs_pool); u_trace_context_init(&device->trace_context, device, sizeof(uint64_t), 12, @@ -3025,6 +3048,12 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) if (device->null_accel_struct_bo) tu_bo_finish(device, device->null_accel_struct_bo); + list_for_each_entry_safe(struct tu_copy_timestamp_data, data, + &device->copy_timestamp_cs_pool, node) + { + tu_free_copy_timestamp_data(device, data); + } + if (device->trace_suballoc) { tu_bo_suballocator_finish(device->trace_suballoc); vk_free(&device->vk.alloc, device->trace_suballoc); diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index 3e71c82da65..d4701e97391 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -442,6 +442,7 @@ struct tu_device uint64_t fault_count; struct u_trace_context trace_context; + struct list_head copy_timestamp_cs_pool; #ifdef HAVE_PERFETTO struct tu_perfetto_state perfetto; @@ -557,16 +558,16 @@ tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream, void *ts_to, uint64_t to_offset_B, uint64_t size_B); - VkResult -tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs, - struct u_trace **trace_copy); +tu_create_copy_timestamp_cs(struct tu_u_trace_submission_data *submission_data, + struct tu_cmd_buffer **cmd_buffers, + uint32_t cmd_buffer_count, + uint32_t trace_chunks_to_copy); -/* If we copy trace and timestamps we will have to free them. */ -struct tu_u_trace_cmd_data -{ - struct tu_cs *timestamp_copy_cs; - struct u_trace *trace; +struct tu_copy_timestamp_data { + struct list_head node; + struct tu_cs cs; + struct u_trace trace; }; /* Data necessary to retrieve timestamps and clean all @@ -584,7 +585,9 @@ struct tu_u_trace_submission_data uint32_t cmd_buffer_count; uint32_t last_buffer_with_tracepoints; - struct tu_u_trace_cmd_data *cmd_trace_data; + void *mem_ctx; + struct u_trace **trace_per_cmd_buffer; + struct tu_copy_timestamp_data *timestamp_copy_data; /* GPU time is reset on GPU power cycle and the GPU time * offset may change between submissions due to power cycle. diff --git a/src/freedreno/vulkan/tu_queue.cc b/src/freedreno/vulkan/tu_queue.cc index f99072715a9..8fee3663e1e 100644 --- a/src/freedreno/vulkan/tu_queue.cc +++ b/src/freedreno/vulkan/tu_queue.cc @@ -124,11 +124,10 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) cs->entry_count); if (u_trace_submission_data && - u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) { - struct tu_cs_entry *trace_cs_entry = - &u_trace_submission_data->cmd_trace_data[i] - .timestamp_copy_cs->entries[0]; - submit_add_entries(device, submit, &dump_cmds, trace_cs_entry, 1); + u_trace_submission_data->timestamp_copy_data) { + struct tu_cs *cs = &u_trace_submission_data->timestamp_copy_data->cs; + submit_add_entries(device, submit, &dump_cmds, cs->entries, + cs->entry_count); } } @@ -209,15 +208,15 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) for (uint32_t i = 0; i < u_trace_submission_data->cmd_buffer_count; i++) { bool free_data = i == u_trace_submission_data->last_buffer_with_tracepoints; - if (u_trace_submission_data->cmd_trace_data[i].trace) - u_trace_flush(u_trace_submission_data->cmd_trace_data[i].trace, + if (u_trace_submission_data->trace_per_cmd_buffer[i]) + u_trace_flush(u_trace_submission_data->trace_per_cmd_buffer[i], u_trace_submission_data, queue->device->vk.current_frame, free_data); - - if (!u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) { - /* u_trace is owned by cmd_buffer */ - u_trace_submission_data->cmd_trace_data[i].trace = NULL; - } + } + if (u_trace_submission_data->timestamp_copy_data) { + u_trace_flush(&u_trace_submission_data->timestamp_copy_data->trace, + u_trace_submission_data, queue->device->vk.current_frame, + true); } }