mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 03:00:11 +01:00
radv: dump address binding report with RADV_DEBUG=hang
This contains much more info than the BO history from the winsys and it will be helpful for debugging. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32403>
This commit is contained in:
parent
1ae6fcfbaf
commit
1b68a92c59
4 changed files with 135 additions and 3 deletions
|
|
@ -31,6 +31,7 @@ there are a couple of files:
|
|||
|
||||
* ``*.spv``: SPIR-V binaries of the pipeline that was bound when the hang
|
||||
occurred.
|
||||
* ``addr_binding_report.log``: VK_EXT_address_binding_report logs.
|
||||
* ``app_info.log``: ``VkApplicationInfo`` fields.
|
||||
* ``bo_history.log``: A list of every GPU memory allocation and deallocation.
|
||||
If the GPU hang was caused by a page fault, you can use
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@
|
|||
#endif
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "spirv/nir_spirv.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "util/os_time.h"
|
||||
#include "ac_debug.h"
|
||||
|
|
@ -27,7 +28,9 @@
|
|||
#include "radv_pipeline_rt.h"
|
||||
#include "radv_shader.h"
|
||||
#include "sid.h"
|
||||
#include "spirv/nir_spirv.h"
|
||||
|
||||
#include "vk_common_entrypoints.h"
|
||||
#include "vk_enum_to_str.h"
|
||||
|
||||
#define COLOR_RESET "\033[0m"
|
||||
#define COLOR_RED "\033[31m"
|
||||
|
|
@ -37,6 +40,105 @@
|
|||
|
||||
#define RADV_DUMP_DIR "radv_dumps"
|
||||
|
||||
static void
|
||||
radv_dump_address_binding_report(const struct radv_address_binding_report *report, FILE *f)
|
||||
{
|
||||
fprintf(f, "timestamp=%llu, VA=%.16llx-%.16llx, binding_type=%s, object_type=%s, object_handle=0x%llx\n",
|
||||
(long long)report->timestamp, (long long)report->va, (long long)(report->va + report->size),
|
||||
(report->binding_type == VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT) ? "bind" : "unbind",
|
||||
vk_ObjectType_to_str(report->object_type), (long long)report->object_handle);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_dump_address_binding_reports(struct radv_device *device, FILE *f)
|
||||
{
|
||||
struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
|
||||
|
||||
simple_mtx_lock(&tracker->mtx);
|
||||
util_dynarray_foreach (&tracker->reports, struct radv_address_binding_report, report)
|
||||
radv_dump_address_binding_report(report, f);
|
||||
simple_mtx_unlock(&tracker->mtx);
|
||||
}
|
||||
|
||||
static VkBool32 VKAPI_PTR
|
||||
radv_address_binding_callback(VkDebugUtilsMessageSeverityFlagBitsEXT message_severity,
|
||||
VkDebugUtilsMessageTypeFlagsEXT message_types,
|
||||
const VkDebugUtilsMessengerCallbackDataEXT *callback_data, void *userdata)
|
||||
{
|
||||
struct radv_address_binding_tracker *tracker = userdata;
|
||||
const VkDeviceAddressBindingCallbackDataEXT *data;
|
||||
|
||||
if (!callback_data)
|
||||
return VK_FALSE;
|
||||
|
||||
data = vk_find_struct_const(callback_data->pNext, DEVICE_ADDRESS_BINDING_CALLBACK_DATA_EXT);
|
||||
if (!data)
|
||||
return VK_FALSE;
|
||||
|
||||
simple_mtx_lock(&tracker->mtx);
|
||||
|
||||
for (uint32_t i = 0; i < callback_data->objectCount; i++) {
|
||||
struct radv_address_binding_report report = {
|
||||
.timestamp = os_time_get_nano(),
|
||||
.va = data->baseAddress & ((1ull << 48) - 1),
|
||||
.size = data->size,
|
||||
.flags = data->flags,
|
||||
.binding_type = data->bindingType,
|
||||
.object_handle = callback_data->pObjects[i].objectHandle,
|
||||
.object_type = callback_data->pObjects[i].objectType,
|
||||
};
|
||||
|
||||
util_dynarray_append(&tracker->reports, struct radv_address_binding_report, report);
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&tracker->mtx);
|
||||
|
||||
return VK_FALSE;
|
||||
}
|
||||
|
||||
static bool
|
||||
radv_init_adress_binding_report(struct radv_device *device)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
VkResult result;
|
||||
|
||||
device->addr_binding_tracker = calloc(1, sizeof(*device->addr_binding_tracker));
|
||||
if (!device->addr_binding_tracker)
|
||||
return false;
|
||||
|
||||
simple_mtx_init(&device->addr_binding_tracker->mtx, mtx_plain);
|
||||
util_dynarray_init(&device->addr_binding_tracker->reports, NULL);
|
||||
|
||||
VkDebugUtilsMessengerCreateInfoEXT create_info = {
|
||||
.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT,
|
||||
.pUserData = device->addr_binding_tracker,
|
||||
.pfnUserCallback = radv_address_binding_callback,
|
||||
.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_DEVICE_ADDRESS_BINDING_BIT_EXT,
|
||||
};
|
||||
|
||||
result = vk_common_CreateDebugUtilsMessengerEXT(radv_instance_to_handle(instance), &create_info, NULL,
|
||||
&device->addr_binding_tracker->messenger);
|
||||
if (result != VK_SUCCESS)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_finish_address_binding_report(struct radv_device *device)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
struct radv_address_binding_tracker *tracker = device->addr_binding_tracker;
|
||||
|
||||
util_dynarray_fini(&tracker->reports);
|
||||
simple_mtx_destroy(&tracker->mtx);
|
||||
|
||||
vk_common_DestroyDebugUtilsMessengerEXT(radv_instance_to_handle(instance), tracker->messenger, NULL);
|
||||
free(device->addr_binding_tracker);
|
||||
}
|
||||
|
||||
bool
|
||||
radv_init_trace(struct radv_device *device)
|
||||
{
|
||||
|
|
@ -58,6 +160,9 @@ radv_init_trace(struct radv_device *device)
|
|||
if (!device->trace_data)
|
||||
return false;
|
||||
|
||||
if (!radv_init_adress_binding_report(device))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -66,6 +171,9 @@ radv_finish_trace(struct radv_device *device)
|
|||
{
|
||||
struct radeon_winsys *ws = device->ws;
|
||||
|
||||
if (device->addr_binding_tracker)
|
||||
radv_finish_address_binding_report(device);
|
||||
|
||||
if (unlikely(device->trace_bo)) {
|
||||
ws->buffer_make_resident(ws, device->trace_bo, false);
|
||||
radv_bo_destroy(device, NULL, device->trace_bo);
|
||||
|
|
@ -728,6 +836,7 @@ enum radv_device_fault_chunk {
|
|||
RADV_DEVICE_FAULT_CHUNK_REGISTERS,
|
||||
RADV_DEVICE_FAULT_CHUNK_BO_RANGES,
|
||||
RADV_DEVICE_FAULT_CHUNK_BO_HISTORY,
|
||||
RADV_DEVICE_FAULT_CHUNK_ADDR_BINDING_REPORT,
|
||||
RADV_DEVICE_FAULT_CHUNK_VM_FAULT,
|
||||
RADV_DEVICE_FAULT_CHUNK_APP_INFO,
|
||||
RADV_DEVICE_FAULT_CHUNK_GPU_INFO,
|
||||
|
|
@ -801,8 +910,9 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
|
|||
char *ptr;
|
||||
size_t size;
|
||||
} chunks[RADV_DEVICE_FAULT_CHUNK_COUNT] = {
|
||||
{"trace"}, {"pipeline"}, {"umr_waves"}, {"umr_ring"}, {"registers"}, {"bo_ranges"},
|
||||
{"bo_history"}, {"vm_fault"}, {"app_info"}, {"gpu_info"}, {"dmesg"},
|
||||
{"trace"}, {"pipeline"}, {"umr_waves"}, {"umr_ring"},
|
||||
{"registers"}, {"bo_ranges"}, {"bo_history"}, {"addr_binding_report"},
|
||||
{"vm_fault"}, {"app_info"}, {"gpu_info"}, {"dmesg"},
|
||||
};
|
||||
|
||||
char *wave_dump = NULL;
|
||||
|
|
@ -846,6 +956,9 @@ radv_check_gpu_hangs(struct radv_queue *queue, const struct radv_winsys_submit_i
|
|||
case RADV_DEVICE_FAULT_CHUNK_BO_HISTORY:
|
||||
device->ws->dump_bo_log(device->ws, f);
|
||||
break;
|
||||
case RADV_DEVICE_FAULT_CHUNK_ADDR_BINDING_REPORT:
|
||||
radv_dump_address_binding_reports(device, f);
|
||||
break;
|
||||
case RADV_DEVICE_FAULT_CHUNK_VM_FAULT:
|
||||
if (vm_fault_occurred)
|
||||
radv_dump_vm_fault(device, &fault_info, f);
|
||||
|
|
|
|||
|
|
@ -137,4 +137,20 @@ struct radv_trace_data {
|
|||
VkDispatchIndirectCommand indirect_dispatch;
|
||||
};
|
||||
|
||||
struct radv_address_binding_report {
|
||||
uint64_t timestamp; /* CPU timestamp */
|
||||
uint64_t va;
|
||||
uint64_t size;
|
||||
VkDeviceAddressBindingFlagsEXT flags;
|
||||
VkDeviceAddressBindingTypeEXT binding_type;
|
||||
uint64_t object_handle;
|
||||
VkObjectType object_type;
|
||||
};
|
||||
|
||||
struct radv_address_binding_tracker {
|
||||
VkDebugUtilsMessengerEXT messenger;
|
||||
struct util_dynarray reports;
|
||||
simple_mtx_t mtx;
|
||||
};
|
||||
|
||||
#endif /* RADV_DEBUG_H */
|
||||
|
|
|
|||
|
|
@ -540,6 +540,8 @@ struct radv_device {
|
|||
/* PSO cache stats */
|
||||
simple_mtx_t pso_cache_stats_mtx;
|
||||
struct radv_pso_cache_stats pso_cache_stats[RADV_PIPELINE_TYPE_COUNT];
|
||||
|
||||
struct radv_address_binding_tracker *addr_binding_tracker;
|
||||
};
|
||||
|
||||
VK_DEFINE_HANDLE_CASTS(radv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue