diff --git a/docs/drivers/freedreno.rst b/docs/drivers/freedreno.rst index 422864a3a46..b842327834a 100644 --- a/docs/drivers/freedreno.rst +++ b/docs/drivers/freedreno.rst @@ -369,9 +369,8 @@ Command Stream Capture ^^^^^^^^^^^^^^^^^^^^^^ During Mesa development, it's often useful to look at the command streams we -send to the kernel. Mesa itself doesn't implement a way to stream them out -(though it maybe should!). Instead, we have an interface for the kernel to -capture all submitted command streams: +send to the kernel. We have an interface for the kernel to capture all +submitted command streams: .. code-block:: sh @@ -391,6 +390,28 @@ probably want to cause a crash in the GPU during a frame of interest so that a single GPU core dump is generated. Emitting ``0xdeadbeef`` in the CS should be enough to cause a fault. +``fd_rd_output`` facilities provide support for generating the command stream +capture from inside Mesa. Different ``FD_RD_DUMP`` options are available: + +- ``enable`` simply enables dumping the command stream on each submit for a + given logical device. When a more advanced option is specified, ``enable`` is + implied as specified. +- ``combine`` will combine all dumps into a single file instead of writing the + dump for each submit into a standalone file. +- ``full`` will dump every buffer object, which is necessary for replays of + command streams (see below). +- ``trigger`` will establish a trigger file through which dumps can be better + controlled. Writing a positive integer value into the file will enable dumping + of that many subsequent submits. Writing -1 will enable dumping of submits + until disabled. Writing 0 (or any other value) will disable dumps. + +Output dump files and trigger file (when enabled) are hard-coded to be placed +under ``/tmp``, or ``/data/local/tmp`` under Android. + +Functionality is generic to any Freedreno-based backend, but is currently only +integrated in the MSM backend of Turnip. Using the existing ``TU_DEBUG=rd`` +option will translate to ``FD_RD_DUMP=enable``. + Capturing Hang RD +++++++++++++++++ diff --git a/src/freedreno/common/freedreno_rd_output.c b/src/freedreno/common/freedreno_rd_output.c new file mode 100644 index 00000000000..135e181d694 --- /dev/null +++ b/src/freedreno/common/freedreno_rd_output.c @@ -0,0 +1,250 @@ +/* + * Copyright © 2024 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#include "freedreno_rd_output.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "c11/threads.h" +#include "util/log.h" +#include "util/u_atomic.h" +#include "util/u_debug.h" + +#ifdef ANDROID +static const char *fd_rd_output_base_path = "/data/local/tmp"; +#else +static const char *fd_rd_output_base_path = "/tmp"; +#endif + +static const struct debug_control fd_rd_dump_options[] = { + { "enable", FD_RD_DUMP_ENABLE }, + { "combine", FD_RD_DUMP_COMBINE }, + { "full", FD_RD_DUMP_FULL }, + { "trigger", FD_RD_DUMP_TRIGGER }, + { NULL, 0 } +}; + +struct fd_rd_dump_env fd_rd_dump_env; + +static void +fd_rd_dump_env_init_once(void) +{ + fd_rd_dump_env.flags = parse_debug_string(os_get_option("FD_RD_DUMP"), + fd_rd_dump_options); + + /* If any of the more-detailed FD_RD_DUMP flags is enabled, the general + * FD_RD_DUMP_ENABLE flag should also implicitly be set. + */ + if (fd_rd_dump_env.flags & ~FD_RD_DUMP_ENABLE) + fd_rd_dump_env.flags |= FD_RD_DUMP_ENABLE; +} + +void +fd_rd_dump_env_init(void) +{ + static once_flag once = ONCE_FLAG_INIT; + call_once(&once, fd_rd_dump_env_init_once); +} + +static void +fd_rd_output_sanitize_name(char *name) +{ + /* The name string is null-terminated after being constructed via snprintf. + * Sanitize it by reducing to an underscore anything that's not a hyphen, + * underscore, dot or alphanumeric character. + */ + for (char *s = name; *s; ++s) { + if (isalnum(*s) || *s == '-' || *s == '_' || *s == '.') + continue; + *s = '_'; + } +} + +void +fd_rd_output_init(struct fd_rd_output *output, char* output_name) +{ + snprintf(output->name, sizeof(output->name), "%s", output_name); + fd_rd_output_sanitize_name(output->name); + + output->combine = false; + output->file = NULL; + output->trigger_fd = -1; + output->trigger_count = 0; + + if (FD_RD_DUMP(COMBINE)) { + output->combine = true; + + char file_path[256]; + snprintf(file_path, sizeof(file_path), "%s/%s_combined.rd", + fd_rd_output_base_path, output->name); + output->file = gzopen(file_path, "w"); + } + + if (FD_RD_DUMP(TRIGGER)) { + char file_path[256]; + snprintf(file_path, sizeof(file_path), "%s/%s_trigger", + fd_rd_output_base_path, output->name); + output->trigger_fd = open(file_path, O_RDWR | O_CREAT | O_TRUNC, 0600); + } +} + +void +fd_rd_output_fini(struct fd_rd_output *output) +{ + if (output->file != NULL) { + assert(output->combine); + gzclose(output->file); + } + + if (output->trigger_fd >= 0) { + close(output->trigger_fd); + + /* Remove the trigger file. The filename is reconstructed here + * instead of having to spend memory to store it in the struct. + */ + char file_path[256]; + snprintf(file_path, sizeof(file_path), "%s/%s_trigger", + fd_rd_output_base_path, output->name); + unlink(file_path); + } +} + +static void +fd_rd_output_update_trigger_count(struct fd_rd_output *output) +{ + assert(FD_RD_DUMP(TRIGGER)); + + /* Retrieve the trigger file size, only attempt to update the trigger + * value if anything was actually written to that file. + */ + struct stat stat; + if (fstat(output->trigger_fd, &stat) != 0) { + mesa_loge("[fd_rd_output] failed to acccess the %s trigger file", + output->name); + return; + } + + if (stat.st_size == 0) + return; + + char trigger_data[32]; + int ret = read(output->trigger_fd, trigger_data, sizeof(trigger_data)); + if (ret < 0) { + mesa_loge("[fd_rd_output] failed to read from the %s trigger file", + output->name); + return; + } + int num_read = MIN2(ret, sizeof(trigger_data) - 1); + + /* After reading from it, the trigger file should be reset, which means + * moving the file offset to the start of the file as well as truncating + * it to zero bytes. + */ + if (lseek(output->trigger_fd, 0, SEEK_SET) < 0) { + mesa_loge("[fd_rd_output] failed to reset the %s trigger file position", + output->name); + return; + } + + if (ftruncate(output->trigger_fd, 0) < 0) { + mesa_loge("[fd_rd_output] failed to truncate the %s trigger file", + output->name); + return; + } + + /* Try to decode the count value through strtol. -1 translates to UINT_MAX + * and keeps generating dumps until disabled. Any positive value will + * allow generating dumps for that many submits. Any other value will + * disable any further generation of RD dumps. + */ + trigger_data[num_read] = '\0'; + int32_t value = strtol(trigger_data, NULL, 0); + + if (value == -1) { + output->trigger_count = UINT_MAX; + mesa_logi("[fd_rd_output] %s trigger enabling RD dumps until disabled", + output->name); + } else if (value > 0) { + output->trigger_count = (uint32_t) value; + mesa_logi("[fd_rd_output] %s trigger enabling RD dumps for next %u submissions", + output->name, output->trigger_count); + } else { + output->trigger_count = 0; + mesa_logi("[fd_rd_output] %s trigger disabling RD dumps", output->name); + } +} + +bool +fd_rd_output_begin(struct fd_rd_output *output, uint32_t submit_idx) +{ + assert(output->combine ^ (output->file == NULL)); + + if (FD_RD_DUMP(TRIGGER)) { + fd_rd_output_update_trigger_count(output); + + if (output->trigger_count == 0) + return false; + /* UINT_MAX corresponds to generating dumps until disabled. */ + if (output->trigger_count != UINT_MAX) + --output->trigger_count; + } + + if (output->combine) + return true; + + char file_path[256]; + snprintf(file_path, sizeof(file_path), "%s/%s_%.5d.rd", + fd_rd_output_base_path, output->name, submit_idx); + output->file = gzopen(file_path, "w"); + return true; +} + +static void +fd_rd_output_write(struct fd_rd_output *output, const void *buffer, int size) +{ + const uint8_t *pos = (uint8_t *) buffer; + while (size > 0) { + int ret = gzwrite(output->file, pos, size); + if (ret < 0) { + mesa_loge("[fd_rd_output] failed to write to compressed output: %s", + gzerror(output->file, NULL)); + return; + } + pos += ret; + size -= ret; + } +} + +void +fd_rd_output_write_section(struct fd_rd_output *output, enum rd_sect_type type, + const void *buffer, int size) +{ + fd_rd_output_write(output, &type, 4); + fd_rd_output_write(output, &size, 4); + fd_rd_output_write(output, buffer, size); +} + +void +fd_rd_output_end(struct fd_rd_output *output) +{ + assert(output->file != NULL); + + /* When combining output, flush the gzip stream on each submit. This should + * store all the data before any problem during the submit itself occurs. + */ + if (output->combine) { + gzflush(output->file, Z_FINISH); + return; + } + + gzclose(output->file); + output->file = NULL; +} diff --git a/src/freedreno/common/freedreno_rd_output.h b/src/freedreno/common/freedreno_rd_output.h new file mode 100644 index 00000000000..0c6058ac933 --- /dev/null +++ b/src/freedreno/common/freedreno_rd_output.h @@ -0,0 +1,66 @@ +/* + * Copyright © 2024 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#ifndef __FREEDRENO_RD_OUTPUT_H__ +#define __FREEDRENO_RD_OUTPUT_H__ + +#include +#include +#include + +#include "redump.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum fd_rd_dump_flags { + FD_RD_DUMP_ENABLE = 1 << 0, + FD_RD_DUMP_COMBINE = 1 << 1, + FD_RD_DUMP_FULL = 1 << 2, + FD_RD_DUMP_TRIGGER = 1 << 3, +}; + +struct fd_rd_dump_env { + uint32_t flags; +}; + +extern struct fd_rd_dump_env fd_rd_dump_env; + +#define FD_RD_DUMP(name) unlikely(fd_rd_dump_env.flags & FD_RD_DUMP_##name) + +void +fd_rd_dump_env_init(void); + +struct fd_rd_output { + char name[128]; + bool combine; + gzFile file; + + int trigger_fd; + uint32_t trigger_count; +}; + +void +fd_rd_output_init(struct fd_rd_output *output, char* output_name); + +void +fd_rd_output_fini(struct fd_rd_output *output); + +bool +fd_rd_output_begin(struct fd_rd_output *output, uint32_t submit_idx); + +void +fd_rd_output_write_section(struct fd_rd_output *output, enum rd_sect_type type, + const void *buffer, int size); + +void +fd_rd_output_end(struct fd_rd_output *output); + +#ifdef __cplusplus +} +#endif + +#endif /* __FREEDRENO_RD_OUTPUT_H__ */ diff --git a/src/freedreno/common/meson.build b/src/freedreno/common/meson.build index 26ce856bc88..20a54e0adf9 100644 --- a/src/freedreno/common/meson.build +++ b/src/freedreno/common/meson.build @@ -38,6 +38,8 @@ libfreedreno_common = static_library( 'freedreno_dev_info.c', 'freedreno_dev_info.h', 'freedreno_pm4.h', + 'freedreno_rd_output.c', + 'freedreno_rd_output.h', 'freedreno_uuid.c', 'freedreno_uuid.h', 'freedreno_guardband.h', diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 1b95e5f2821..8367a8a54a2 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -21,6 +21,7 @@ #include "util/hex.h" #include "util/driconf.h" #include "util/os_misc.h" +#include "util/u_process.h" #include "vk_shader_module.h" #include "vk_sampler.h" #include "vk_util.h" @@ -2219,6 +2220,7 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, device->instance = physical_device->instance; device->physical_device = physical_device; + device->device_idx = device->physical_device->device_count++; result = tu_drm_device_init(device); if (result != VK_SUCCESS) { @@ -2492,6 +2494,26 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice, tu_breadcrumbs_init(device); + if (FD_RD_DUMP(ENABLE)) { + struct vk_app_info *app_info = &device->instance->vk.app_info; + const char *app_name_str = app_info->app_name ? + app_info->app_name : util_get_process_name(); + const char *engine_name_str = app_info->engine_name ? + app_info->engine_name : "unknown-engine"; + + char app_name[64]; + snprintf(app_name, sizeof(app_name), "%s", app_name_str); + + char engine_name[32]; + snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str); + + char output_name[128]; + snprintf(output_name, sizeof(output_name), "tu_%s.%s_device%u", + app_name, engine_name, device->device_idx); + + fd_rd_output_init(&device->rd_output, output_name); + } + *pDevice = tu_device_to_handle(device); return VK_SUCCESS; @@ -2547,6 +2569,9 @@ tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) if (!device) return; + if (FD_RD_DUMP(ENABLE)) + fd_rd_output_fini(&device->rd_output); + tu_breadcrumbs_finish(device); u_trace_context_fini(&device->trace_context); diff --git a/src/freedreno/vulkan/tu_device.h b/src/freedreno/vulkan/tu_device.h index e0434148211..2015339873c 100644 --- a/src/freedreno/vulkan/tu_device.h +++ b/src/freedreno/vulkan/tu_device.h @@ -20,6 +20,7 @@ #include "tu_suballoc.h" #include "tu_util.h" +#include "common/freedreno_rd_output.h" #include "util/vma.h" #include "util/u_vector.h" @@ -123,6 +124,8 @@ struct tu_physical_device struct vk_sync_type syncobj_type; struct vk_sync_timeline_type timeline_type; const struct vk_sync_type *sync_types[3]; + + uint32_t device_count; }; VK_DEFINE_HANDLE_CASTS(tu_physical_device, vk.base, VkPhysicalDevice, VK_OBJECT_TYPE_PHYSICAL_DEVICE) @@ -253,6 +256,7 @@ struct tu_device int queue_count[TU_MAX_QUEUE_FAMILIES]; struct tu_physical_device *physical_device; + uint32_t device_idx; int fd; struct ir3_compiler *compiler; @@ -397,6 +401,8 @@ struct tu_device bool use_z24uint_s8uint; bool use_lrz; + + struct fd_rd_output rd_output; }; VK_DEFINE_HANDLE_CASTS(tu_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE) diff --git a/src/freedreno/vulkan/tu_knl_drm_msm.cc b/src/freedreno/vulkan/tu_knl_drm_msm.cc index 581a65e0e5b..810c17e6d1c 100644 --- a/src/freedreno/vulkan/tu_knl_drm_msm.cc +++ b/src/freedreno/vulkan/tu_knl_drm_msm.cc @@ -867,7 +867,7 @@ tu_queue_build_msm_gem_submit_cmds(struct tu_queue *queue, static VkResult tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) { - queue->device->submit_count++; + uint32_t submit_idx = queue->device->submit_count++; struct tu_cs *autotune_cs = NULL; if (submit->autotune_fence) { @@ -910,39 +910,43 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit) .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj), }; - if (TU_DEBUG(RD)) { + if (FD_RD_DUMP(ENABLE) && fd_rd_output_begin(&queue->device->rd_output, submit_idx)) { struct tu_device *device = queue->device; - static uint32_t submit_idx; - char path[32]; - sprintf(path, "%.5d.rd", p_atomic_inc_return(&submit_idx)); - int rd = open(path, O_CLOEXEC | O_WRONLY | O_CREAT | O_TRUNC, 0777); - if (rd >= 0) { - rd_write_section(rd, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 4); + struct fd_rd_output *rd_output = &device->rd_output; - rd_write_section(rd, RD_CMD, "tu-dump", 8); - - for (unsigned i = 0; i < device->bo_count; i++) { - struct drm_msm_gem_submit_bo bo = device->bo_list[i]; - struct tu_bo *tu_bo = tu_device_lookup_bo(device, bo.handle); - uint64_t iova = bo.presumed; - - uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 }; - rd_write_section(rd, RD_GPUADDR, buf, 12); - if (bo.flags & MSM_SUBMIT_BO_DUMP) { - msm_bo_map(device, tu_bo); /* note: this would need locking to be safe */ - rd_write_section(rd, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size); - } + if (FD_RD_DUMP(FULL)) { + VkResult result = tu_wait_fence(device, queue->msm_queue_id, queue->fence, ~0); + if (result != VK_SUCCESS) { + mesa_loge("FD_RD_DUMP_FULL: wait on previous submission for device %u and queue %d failed: %u", + device->device_idx, queue->msm_queue_id, 0); } - - for (unsigned i = 0; i < req.nr_cmds; i++) { - struct drm_msm_gem_submit_cmd *cmd = &submit->cmds[i]; - uint64_t iova = device->bo_list[cmd->submit_idx].presumed + cmd->submit_offset; - uint32_t size = cmd->size >> 2; - uint32_t buf[3] = { iova, size, iova >> 32 }; - rd_write_section(rd, RD_CMDSTREAM_ADDR, buf, 12); - } - close(rd); } + + fd_rd_output_write_section(rd_output, RD_CHIP_ID, &device->physical_device->dev_id.chip_id, 4); + fd_rd_output_write_section(rd_output, RD_CMD, "tu-dump", 8); + + for (unsigned i = 0; i < device->bo_count; i++) { + struct drm_msm_gem_submit_bo bo = device->bo_list[i]; + struct tu_bo *tu_bo = tu_device_lookup_bo(device, bo.handle); + uint64_t iova = bo.presumed; + + uint32_t buf[3] = { iova, tu_bo->size, iova >> 32 }; + fd_rd_output_write_section(rd_output, RD_GPUADDR, buf, 12); + if (bo.flags & MSM_SUBMIT_BO_DUMP || FD_RD_DUMP(FULL)) { + msm_bo_map(device, tu_bo); /* note: this would need locking to be safe */ + fd_rd_output_write_section(rd_output, RD_BUFFER_CONTENTS, tu_bo->map, tu_bo->size); + } + } + + for (unsigned i = 0; i < req.nr_cmds; i++) { + struct drm_msm_gem_submit_cmd *cmd = &submit->cmds[i]; + uint64_t iova = device->bo_list[cmd->submit_idx].presumed + cmd->submit_offset; + uint32_t size = cmd->size >> 2; + uint32_t buf[3] = { iova, size, iova >> 32 }; + fd_rd_output_write_section(rd_output, RD_CMDSTREAM_ADDR, buf, 12); + } + + fd_rd_output_end(rd_output); } int ret = drmCommandWriteRead(queue->device->fd, diff --git a/src/freedreno/vulkan/tu_util.cc b/src/freedreno/vulkan/tu_util.cc index 793e81b4a0f..8a69d3aa2a2 100644 --- a/src/freedreno/vulkan/tu_util.cc +++ b/src/freedreno/vulkan/tu_util.cc @@ -8,6 +8,7 @@ #include #include +#include "common/freedreno_rd_output.h" #include "util/u_math.h" #include "util/timespec.h" #include "vk_enum_to_str.h" @@ -54,11 +55,19 @@ tu_env_init_once(void) if (TU_DEBUG(STARTUP)) mesa_logi("TU_DEBUG=0x%x", tu_env.debug); + + /* TU_DEBUG=rd functionality was moved to fd_rd_output. This debug option + * should translate to the basic-level FD_RD_DUMP_ENABLE option. + */ + if (TU_DEBUG(RD)) + fd_rd_dump_env.flags |= FD_RD_DUMP_ENABLE; } void tu_env_init(void) { + fd_rd_dump_env_init(); + static once_flag once = ONCE_FLAG_INIT; call_once(&once, tu_env_init_once); }