diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 7ef7ee88637..514487e4c21 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -2719,6 +2719,7 @@ fail_global_bo_map: TU_RMV(resource_destroy, device, device->global_bo); tu_bo_finish(device, device->global_bo); vk_free(&device->vk.alloc, device->submit_bo_list); + util_dynarray_fini(&device->dump_bo_list); fail_global_bo: ir3_compiler_destroy(device->compiler); util_sparse_array_finish(&device->bo_map); @@ -2823,6 +2824,7 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) pthread_cond_destroy(&device->timeline_cond); _mesa_hash_table_destroy(device->bo_sizes, NULL); vk_free(&device->vk.alloc, device->submit_bo_list); + util_dynarray_fini(&device->dump_bo_list); vk_device_finish(&device->vk); vk_free(&device->vk.alloc, device); } @@ -3426,6 +3428,36 @@ tu_debug_bos_print_stats(struct tu_device *dev) mtx_unlock(&dev->bo_mutex); } +void +tu_dump_bo_init(struct tu_device *dev, struct tu_bo *bo) +{ + bo->dump_bo_list_idx = ~0; + + if (!FD_RD_DUMP(ENABLE)) + return; + + mtx_lock(&dev->bo_mutex); + uint32_t idx = + util_dynarray_num_elements(&dev->dump_bo_list, struct tu_bo *); + bo->dump_bo_list_idx = idx; + util_dynarray_append(&dev->dump_bo_list, struct tu_bo *, bo); + mtx_unlock(&dev->bo_mutex); +} + +void +tu_dump_bo_del(struct tu_device *dev, struct tu_bo *bo) +{ + if (bo->dump_bo_list_idx != ~0) { + mtx_lock(&dev->bo_mutex); + struct tu_bo *exchanging_bo = + util_dynarray_pop(&dev->dump_bo_list, struct tu_bo *); + *util_dynarray_element(&dev->dump_bo_list, struct tu_bo *, + bo->dump_bo_list_idx) = exchanging_bo; + exchanging_bo->dump_bo_list_idx = bo->dump_bo_list_idx; + mtx_unlock(&dev->bo_mutex); + } +} + void tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo) diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index ac9dc35b51c..27652f82652 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -347,6 +347,8 @@ struct tu_device struct drm_msm_gem_submit_bo *submit_bo_list; /* map bo handles to bo list index: */ uint32_t submit_bo_count, submit_bo_list_size; + /* bo list for dumping: */ + struct util_dynarray dump_bo_list; mtx_t bo_mutex; /* protects imported BOs creation/freeing */ struct u_rwlock dma_bo_lock; @@ -581,4 +583,10 @@ tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo); void tu_debug_bos_print_stats(struct tu_device *dev); +void +tu_dump_bo_init(struct tu_device *dev, struct tu_bo *bo); +void +tu_dump_bo_del(struct tu_device *dev, struct tu_bo *bo); + + #endif /* TU_DEVICE_H */ diff --git a/src/freedreno/vulkan/tu_knl.cc b/src/freedreno/vulkan/tu_knl.cc index bc797b32cbf..80e3900c3a9 100644 --- a/src/freedreno/vulkan/tu_knl.cc +++ b/src/freedreno/vulkan/tu_knl.cc @@ -53,6 +53,8 @@ tu_bo_init_new_explicit_iova(struct tu_device *dev, (*out_bo)->iova, (*out_bo)->size, VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT); + (*out_bo)->dump = flags & TU_BO_ALLOC_ALLOW_DUMP; + return VK_SUCCESS; } @@ -73,7 +75,7 @@ tu_bo_init_dmabuf(struct tu_device *dev, */ if (dev->physical_device->has_cached_non_coherent_memory) (*bo)->cached_non_coherent = true; - + return VK_SUCCESS; } @@ -208,6 +210,8 @@ if (!(DETECT_ARCH_AARCH64 || DETECT_ARCH_X86 || DETECT_ARCH_X86_64)) void tu_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo) { dev->instance->knl->bo_allow_dump(dev, bo); + + p_atomic_set(&bo->dump, true); } void diff --git a/src/freedreno/vulkan/tu_knl.h b/src/freedreno/vulkan/tu_knl.h index 5914de2e7c4..b41f203b04b 100644 --- a/src/freedreno/vulkan/tu_knl.h +++ b/src/freedreno/vulkan/tu_knl.h @@ -58,6 +58,7 @@ struct tu_bo { int32_t refcnt; uint32_t submit_bo_list_idx; + uint32_t dump_bo_list_idx; #ifdef TU_HAS_KGSL /* We have to store fd returned by ion_fd_data @@ -71,6 +72,8 @@ struct tu_bo { bool never_unmap : 1; bool cached_non_coherent : 1; + bool dump; + /* Pointer to the vk_object_base associated with the BO * for the purposes of VK_EXT_device_address_binding_report */ diff --git a/src/freedreno/vulkan/tu_knl_drm.cc b/src/freedreno/vulkan/tu_knl_drm.cc index ec64249d2b9..27916d0fee8 100644 --- a/src/freedreno/vulkan/tu_knl_drm.cc +++ b/src/freedreno/vulkan/tu_knl_drm.cc @@ -77,6 +77,7 @@ tu_drm_bo_finish(struct tu_device *dev, struct tu_bo *bo) TU_RMV(bo_destroy, dev, bo); tu_debug_bos_del(dev, bo); + tu_dump_bo_del(dev, bo); mtx_lock(&dev->bo_mutex); dev->submit_bo_count--; diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index e98b960e289..5388b321e2b 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -537,6 +537,8 @@ tu_bo_init(struct tu_device *dev, mtx_unlock(&dev->bo_mutex); + tu_dump_bo_init(dev, bo); + TU_RMV(bo_allocate, dev, bo); return VK_SUCCESS; @@ -798,7 +800,6 @@ msm_queue_submit(struct tu_queue *queue, void *_submit, (struct tu_msm_queue_submit *)_submit; struct drm_msm_gem_submit_syncobj *in_syncobjs, *out_syncobjs; struct drm_msm_gem_submit req; - uint32_t submit_idx = queue->device->submit_count; uint64_t gpu_offset = 0; uint32_t entry_count = util_dynarray_num_elements(&submit->commands, struct drm_msm_gem_submit_cmd); @@ -889,46 +890,6 @@ msm_queue_submit(struct tu_queue *queue, void *_submit, .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj), }; - if (req.nr_cmds && FD_RD_DUMP(ENABLE) && - fd_rd_output_begin(&queue->device->rd_output, submit_idx)) { - struct tu_device *device = queue->device; - struct fd_rd_output *rd_output = &device->rd_output; - - if (FD_RD_DUMP(FULL)) { - VkResult result = tu_wait_fence(device, queue->msm_queue_id, queue->fence, ~0); - if (result != VK_SUCCESS) { - mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u", - device->device_idx, queue->msm_queue_id, 0); - } - } - - fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 8); - fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8); - - for (unsigned i = 0; i < device->submit_bo_count; i++) { - struct drm_msm_gem_submit_bo bo = device->submit_bo_list[i]; - struct tu_bo *tu_bo = tu_device_lookup_bo(device, bo.handle); - uint64_t iova = bo.presumed; - - uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 }; - fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12); - if (bo.flags & MSM_SUBMIT_BO_DUMP || FD_RD_DUMP(FULL)) { - tu_bo_map(device, tu_bo, NULL); /* note: this would need locking to be safe */ - fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size); - } - } - - util_dynarray_foreach (&submit->commands, struct drm_msm_gem_submit_cmd, - cmd) { - uint64_t iova = device->submit_bo_list[cmd->submit_idx].presumed + cmd->submit_offset; - uint32_t size = cmd->size >> 2; - uint32_t buf[3] = { iova, size, iova >> 32 }; - fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12); - } - - fd_rd_output_end(rd_output); - } - ret = drmCommandWriteRead(queue->device->fd, DRM_MSM_GEM_SUBMIT, &req, sizeof(req)); diff --git a/src/freedreno/vulkan/tu_knl_drm_virtio.cc b/src/freedreno/vulkan/tu_knl_drm_virtio.cc index ab466dae950..c4630f3ea48 100644 --- a/src/freedreno/vulkan/tu_knl_drm_virtio.cc +++ b/src/freedreno/vulkan/tu_knl_drm_virtio.cc @@ -582,6 +582,8 @@ tu_bo_init(struct tu_device *dev, mtx_unlock(&dev->bo_mutex); + tu_dump_bo_init(dev, bo); + return VK_SUCCESS; } diff --git a/src/freedreno/vulkan/tu_knl_kgsl.cc b/src/freedreno/vulkan/tu_knl_kgsl.cc index 26284929e15..b6e17fd4ebd 100644 --- a/src/freedreno/vulkan/tu_knl_kgsl.cc +++ b/src/freedreno/vulkan/tu_knl_kgsl.cc @@ -261,9 +261,9 @@ kgsl_bo_init(struct tu_device *dev, * and the CPU mapping must stay fixed for the lifetime of the BO. */ bo->never_unmap = true; - } + tu_dump_bo_init(dev, bo); *out_bo = bo; @@ -321,6 +321,8 @@ kgsl_bo_init_dmabuf(struct tu_device *dev, .shared_fd = os_dupfd_cloexec(fd), }; + tu_dump_bo_init(dev, bo); + *out_bo = bo; return VK_SUCCESS; @@ -380,6 +382,7 @@ kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo) TU_RMV(bo_destroy, dev, bo); tu_debug_bos_del(dev, bo); + tu_dump_bo_del(dev, bo); struct kgsl_gpumem_free_id req = { .id = bo->gem_handle diff --git a/src/freedreno/vulkan/tu_queue.cc b/src/freedreno/vulkan/tu_queue.cc index dcefcb94ce7..6c5632ba311 100644 --- a/src/freedreno/vulkan/tu_queue.cc +++ b/src/freedreno/vulkan/tu_queue.cc @@ -51,12 +51,27 @@ tu_get_submitqueue_priority(const struct tu_physical_device *pdevice, return priority; } +static void +submit_add_entries(struct tu_device *dev, void *submit, + struct util_dynarray *dump_cmds, + struct tu_cs_entry *entries, unsigned num_entries) +{ + tu_submit_add_entries(dev, submit, entries, num_entries); + if (FD_RD_DUMP(ENABLE)) { + util_dynarray_append_array(dump_cmds, struct tu_cs_entry, entries, + num_entries); + } +} + static VkResult queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) { struct tu_queue *queue = list_entry(_queue, struct tu_queue, vk); struct tu_device *device = queue->device; bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context); + struct util_dynarray dump_cmds; + + util_dynarray_init(&dump_cmds, NULL); uint32_t perf_pass_index = device->perfcntrs_pass_cs_entries ? vk_submit->perf_pass_index : ~0; @@ -102,28 +117,71 @@ queue_submit(struct vk_queue *_queue, struct vk_queue_submit *vk_submit) struct tu_cs_entry *perf_cs_entry = &cmd_buffer->device->perfcntrs_pass_cs_entries[perf_pass_index]; - tu_submit_add_entries(device, submit, perf_cs_entry, 1); + submit_add_entries(device, submit, &dump_cmds, perf_cs_entry, 1); } - tu_submit_add_entries(device, submit, cs->entries, - cs->entry_count); + submit_add_entries(device, submit, &dump_cmds, cs->entries, + cs->entry_count); if (u_trace_submission_data && u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) { struct tu_cs_entry *trace_cs_entry = &u_trace_submission_data->cmd_trace_data[i] .timestamp_copy_cs->entries[0]; - tu_submit_add_entries(device, submit, trace_cs_entry, 1); + submit_add_entries(device, submit, &dump_cmds, trace_cs_entry, 1); } } if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) { struct tu_cs *autotune_cs = tu_autotune_on_submit( device, &device->autotune, cmd_buffers, cmdbuf_count); - tu_submit_add_entries(device, submit, autotune_cs->entries, - autotune_cs->entry_count); + submit_add_entries(device, submit, &dump_cmds, autotune_cs->entries, + autotune_cs->entry_count); } + if (cmdbuf_count && FD_RD_DUMP(ENABLE) && + fd_rd_output_begin(&queue->device->rd_output, + queue->device->submit_count)) { + struct tu_device *device = queue->device; + struct fd_rd_output *rd_output = &device->rd_output; + + if (FD_RD_DUMP(FULL)) { + VkResult result = tu_queue_wait_fence(queue, queue->fence, ~0); + if (result != VK_SUCCESS) { + mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u", + device->device_idx, queue->msm_queue_id, 0); + } + } + + fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 8); + fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8); + + mtx_lock(&device->bo_mutex); + util_dynarray_foreach (&device->dump_bo_list, struct tu_bo *, bo_ptr) { + struct tu_bo *bo = *bo_ptr; + uint64_t iova = bo->iova; + + uint32_t buf[3] = { iova, bo->size, iova >> 32 }; + fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12); + if (bo->dump || FD_RD_DUMP(FULL)) { + tu_bo_map(device, bo, NULL); /* note: this would need locking to be safe */ + fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, bo->map, bo->size); + } + } + mtx_unlock(&device->bo_mutex); + + util_dynarray_foreach (&dump_cmds, struct tu_cs_entry, cmd) { + uint64_t iova = cmd->bo->iova + cmd->offset; + uint32_t size = cmd->size >> 2; + uint32_t buf[3] = { iova, size, iova >> 32 }; + fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12); + } + + fd_rd_output_end(rd_output); + } + + util_dynarray_fini(&dump_cmds); + result = tu_queue_submit(queue, submit, vk_submit->waits, vk_submit->wait_count, vk_submit->signals, vk_submit->signal_count,