mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 00:00:11 +01:00
tu: Cache copy timestamp cs to avoid allocations on submit
For non-ONE_TIME_SUBMIT command buffers we have to create CS that copies u_trace timestamps. Having cs pool allows us to avoid allocations in most cases. Together with previous commit, BO allocations on submit would be rare during normal u_trace tracing. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35637>
This commit is contained in:
parent
a1b9aa3385
commit
6e5944ec4b
3 changed files with 100 additions and 69 deletions
|
|
@ -2173,43 +2173,56 @@ tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
|
|||
return u_trace_end_iterator(&cmdbuf->trace);
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
|
||||
struct u_trace **trace_copy)
|
||||
tu_create_copy_timestamp_cs(struct tu_u_trace_submission_data *submission_data,
|
||||
struct tu_cmd_buffer **cmd_buffers,
|
||||
uint32_t cmd_buffer_count,
|
||||
uint32_t trace_chunks_to_copy)
|
||||
{
|
||||
*cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
|
||||
sizeof(struct tu_cs), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
submission_data->last_buffer_with_tracepoints = -1;
|
||||
|
||||
if (*cs == NULL) {
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
tu_device *device = cmd_buffers[0]->device;
|
||||
uint32_t cs_size = trace_chunks_to_copy * 6 * 2 + 3;
|
||||
|
||||
if (!list_is_empty(&device->copy_timestamp_cs_pool)) {
|
||||
submission_data->timestamp_copy_data =
|
||||
list_first_entry(&device->copy_timestamp_cs_pool,
|
||||
struct tu_copy_timestamp_data, node);
|
||||
list_del(&submission_data->timestamp_copy_data->node);
|
||||
} else {
|
||||
submission_data->timestamp_copy_data =
|
||||
(struct tu_copy_timestamp_data *) vk_zalloc(
|
||||
&device->vk.alloc, sizeof(struct tu_copy_timestamp_data), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
|
||||
tu_cs_init(&submission_data->timestamp_copy_data->cs, device,
|
||||
TU_CS_MODE_GROW, cs_size, "trace copy timestamp cs");
|
||||
u_trace_init(&submission_data->timestamp_copy_data->trace,
|
||||
&device->trace_context);
|
||||
}
|
||||
|
||||
tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
|
||||
list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs");
|
||||
tu_cs *cs = &submission_data->timestamp_copy_data->cs;
|
||||
|
||||
tu_cs_begin(*cs);
|
||||
tu_cs_begin(cs);
|
||||
|
||||
tu_cs_emit_wfi(*cs);
|
||||
tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
|
||||
tu_cs_emit_wfi(cs);
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
*trace_copy = (struct u_trace *) vk_zalloc(
|
||||
&cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
for (uint32_t i = 0; i < cmd_buffer_count; i++) {
|
||||
struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
|
||||
|
||||
if (*trace_copy == NULL) {
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
if (!u_trace_has_points(&cmdbuf->trace) ||
|
||||
(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
|
||||
continue;
|
||||
|
||||
u_trace_clone_append(
|
||||
tu_cmd_begin_iterator(cmdbuf), tu_cmd_end_iterator(cmdbuf),
|
||||
&submission_data->timestamp_copy_data->trace, cs, tu_copy_buffer);
|
||||
}
|
||||
|
||||
u_trace_init(*trace_copy, cmdbuf->trace.utctx);
|
||||
u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
|
||||
tu_cmd_end_iterator(cmdbuf),
|
||||
*trace_copy, *cs,
|
||||
tu_copy_buffer);
|
||||
|
||||
tu_cs_emit_wfi(*cs);
|
||||
|
||||
tu_cs_end(*cs);
|
||||
tu_cs_emit_wfi(cs);
|
||||
tu_cs_end(cs);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
@ -2231,13 +2244,14 @@ tu_u_trace_submission_data_create(
|
|||
}
|
||||
|
||||
struct tu_u_trace_submission_data *data = *submission_data;
|
||||
uint32_t trace_chunks_to_copy = 0;
|
||||
|
||||
data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
|
||||
data->trace_per_cmd_buffer = (struct u_trace **) vk_zalloc(
|
||||
&device->vk.alloc,
|
||||
cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
|
||||
cmd_buffer_count * sizeof(struct u_trace *), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
|
||||
|
||||
if (!data->cmd_trace_data) {
|
||||
if (!data->trace_per_cmd_buffer) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
|
|
@ -2258,19 +2272,16 @@ tu_u_trace_submission_data_create(
|
|||
* single-use. Therefor we have to copy trace points and create
|
||||
* a new timestamp buffer on every submit of reusable command buffer.
|
||||
*/
|
||||
if (tu_create_copy_timestamp_cs(cmdbuf,
|
||||
&data->cmd_trace_data[i].timestamp_copy_cs,
|
||||
&data->cmd_trace_data[i].trace) != VK_SUCCESS) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
|
||||
trace_chunks_to_copy += list_length(&cmdbuf->trace.trace_chunks);
|
||||
} else {
|
||||
data->cmd_trace_data[i].trace = &cmdbuf->trace;
|
||||
data->trace_per_cmd_buffer[i] = &cmdbuf->trace;
|
||||
}
|
||||
}
|
||||
|
||||
assert(data->last_buffer_with_tracepoints != -1);
|
||||
if (trace_chunks_to_copy > 0) {
|
||||
tu_create_copy_timestamp_cs(data, cmd_buffers, cmd_buffer_count,
|
||||
trace_chunks_to_copy);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
|
|
@ -2281,20 +2292,31 @@ fail:
|
|||
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
void
|
||||
tu_free_copy_timestamp_data(struct tu_device *device,
|
||||
struct tu_copy_timestamp_data *data)
|
||||
{
|
||||
if (list_is_linked(&data->node))
|
||||
list_del(&data->node);
|
||||
tu_cs_finish(&data->cs);
|
||||
u_trace_fini(&data->trace);
|
||||
vk_free(&device->vk.alloc, data);
|
||||
}
|
||||
|
||||
void
|
||||
tu_u_trace_submission_data_finish(
|
||||
struct tu_device *device,
|
||||
struct tu_u_trace_submission_data *submission_data)
|
||||
{
|
||||
for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
|
||||
/* Only if we had to create a copy of trace we should free it */
|
||||
struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
|
||||
if (cmd_data->timestamp_copy_cs) {
|
||||
tu_cs_finish(cmd_data->timestamp_copy_cs);
|
||||
vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
|
||||
|
||||
u_trace_fini(cmd_data->trace);
|
||||
vk_free(&device->vk.alloc, cmd_data->trace);
|
||||
if (submission_data->timestamp_copy_data) {
|
||||
if (u_trace_enabled(&device->trace_context)) {
|
||||
tu_cs_reset(&submission_data->timestamp_copy_data->cs);
|
||||
u_trace_fini(&submission_data->timestamp_copy_data->trace);
|
||||
list_addtail(&submission_data->timestamp_copy_data->node,
|
||||
&device->copy_timestamp_cs_pool);
|
||||
} else {
|
||||
tu_free_copy_timestamp_data(device,
|
||||
submission_data->timestamp_copy_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2305,7 +2327,7 @@ tu_u_trace_submission_data_finish(
|
|||
mtx_unlock(&device->kgsl_profiling_mutex);
|
||||
}
|
||||
|
||||
vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
|
||||
vk_free(&device->vk.alloc, submission_data->trace_per_cmd_buffer);
|
||||
vk_free(&device->vk.alloc, submission_data);
|
||||
}
|
||||
|
||||
|
|
@ -2866,6 +2888,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
tu_gpu_tracepoint_config_variable();
|
||||
|
||||
device->submit_count = 0;
|
||||
list_inithead(&device->copy_timestamp_cs_pool);
|
||||
u_trace_context_init(&device->trace_context, device,
|
||||
sizeof(uint64_t),
|
||||
12,
|
||||
|
|
@ -3025,6 +3048,12 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
|
|||
if (device->null_accel_struct_bo)
|
||||
tu_bo_finish(device, device->null_accel_struct_bo);
|
||||
|
||||
list_for_each_entry_safe(struct tu_copy_timestamp_data, data,
|
||||
&device->copy_timestamp_cs_pool, node)
|
||||
{
|
||||
tu_free_copy_timestamp_data(device, data);
|
||||
}
|
||||
|
||||
if (device->trace_suballoc) {
|
||||
tu_bo_suballocator_finish(device->trace_suballoc);
|
||||
vk_free(&device->vk.alloc, device->trace_suballoc);
|
||||
|
|
|
|||
|
|
@ -442,6 +442,7 @@ struct tu_device
|
|||
uint64_t fault_count;
|
||||
|
||||
struct u_trace_context trace_context;
|
||||
struct list_head copy_timestamp_cs_pool;
|
||||
|
||||
#ifdef HAVE_PERFETTO
|
||||
struct tu_perfetto_state perfetto;
|
||||
|
|
@ -557,16 +558,16 @@ tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
|
|||
void *ts_to, uint64_t to_offset_B,
|
||||
uint64_t size_B);
|
||||
|
||||
|
||||
VkResult
|
||||
tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
|
||||
struct u_trace **trace_copy);
|
||||
tu_create_copy_timestamp_cs(struct tu_u_trace_submission_data *submission_data,
|
||||
struct tu_cmd_buffer **cmd_buffers,
|
||||
uint32_t cmd_buffer_count,
|
||||
uint32_t trace_chunks_to_copy);
|
||||
|
||||
/* If we copy trace and timestamps we will have to free them. */
|
||||
struct tu_u_trace_cmd_data
|
||||
{
|
||||
struct tu_cs *timestamp_copy_cs;
|
||||
struct u_trace *trace;
|
||||
struct tu_copy_timestamp_data {
|
||||
struct list_head node;
|
||||
struct tu_cs cs;
|
||||
struct u_trace trace;
|
||||
};
|
||||
|
||||
/* Data necessary to retrieve timestamps and clean all
|
||||
|
|
@ -584,7 +585,9 @@ struct tu_u_trace_submission_data
|
|||
|
||||
uint32_t cmd_buffer_count;
|
||||
uint32_t last_buffer_with_tracepoints;
|
||||
struct tu_u_trace_cmd_data *cmd_trace_data;
|
||||
void *mem_ctx;
|
||||
struct u_trace **trace_per_cmd_buffer;
|
||||
struct tu_copy_timestamp_data *timestamp_copy_data;
|
||||
|
||||
/* GPU time is reset on GPU power cycle and the GPU time
|
||||
* offset may change between submissions due to power cycle.
|
||||
|
|
|
|||
|
|
@ -124,11 +124,10 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
|
|||
cs->entry_count);
|
||||
|
||||
if (u_trace_submission_data &&
|
||||
u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
|
||||
struct tu_cs_entry *trace_cs_entry =
|
||||
&u_trace_submission_data->cmd_trace_data[i]
|
||||
.timestamp_copy_cs->entries[0];
|
||||
submit_add_entries(device, submit, &dump_cmds, trace_cs_entry, 1);
|
||||
u_trace_submission_data->timestamp_copy_data) {
|
||||
struct tu_cs *cs = &u_trace_submission_data->timestamp_copy_data->cs;
|
||||
submit_add_entries(device, submit, &dump_cmds, cs->entries,
|
||||
cs->entry_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -209,15 +208,15 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit)
|
|||
|
||||
for (uint32_t i = 0; i < u_trace_submission_data->cmd_buffer_count; i++) {
|
||||
bool free_data = i == u_trace_submission_data->last_buffer_with_tracepoints;
|
||||
if (u_trace_submission_data->cmd_trace_data[i].trace)
|
||||
u_trace_flush(u_trace_submission_data->cmd_trace_data[i].trace,
|
||||
if (u_trace_submission_data->trace_per_cmd_buffer[i])
|
||||
u_trace_flush(u_trace_submission_data->trace_per_cmd_buffer[i],
|
||||
u_trace_submission_data, queue->device->vk.current_frame,
|
||||
free_data);
|
||||
|
||||
if (!u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
|
||||
/* u_trace is owned by cmd_buffer */
|
||||
u_trace_submission_data->cmd_trace_data[i].trace = NULL;
|
||||
}
|
||||
}
|
||||
if (u_trace_submission_data->timestamp_copy_data) {
|
||||
u_trace_flush(&u_trace_submission_data->timestamp_copy_data->trace,
|
||||
u_trace_submission_data, queue->device->vk.current_frame,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue