mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-19 15:48:19 +02:00
The changes as part of the Contexts state now include: **** Contexts **** [HWCTX].replay_offset: 0x0 [HWCTX].replay_length: 0xd000 and the changes as part of the VM state now include: **** VM state **** VM.uapi_flags: 0x1 [40000].length: 0x2000 [40000].properties: read_write|bo|mem_region=0x1|pat_index=2|cpu_caching=1 [40000].data: &-)\3!!E9mzzzzzzzzzz In order to be able to replay a GPU hang from a devcore dump file new properties have been added describing the offset and the length of the affected hw context as well as a global VM flag and several VMA property types: memory region, bo caching, pat index, memory permission and memory type. Signed-off-by: Carlos Santa <carlos.santa@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34829>
193 lines
6.3 KiB
C
193 lines
6.3 KiB
C
/*
|
|
* Copyright 2024 Intel Corporation
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "error2hangdump_xe.h"
|
|
|
|
#include <inttypes.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "error_decode_xe_lib.h"
|
|
#include "error2hangdump_lib.h"
|
|
#include "intel/common/intel_gem.h"
|
|
#include "error2hangdump_xe_lib.h"
|
|
#include "intel/dev/intel_device_info.h"
|
|
#include "util/macros.h"
|
|
|
|
void
|
|
read_xe_data_file(FILE *dump_file, FILE *hang_dump_file, bool verbose)
|
|
{
|
|
enum xe_topic xe_topic = XE_TOPIC_UNKNOWN;
|
|
uint32_t *vm_entry_data = NULL;
|
|
uint32_t vm_entry_len = 0;
|
|
struct xe_vm xe_vm;
|
|
char *line = NULL;
|
|
size_t line_size;
|
|
struct {
|
|
uint64_t *addrs;
|
|
uint8_t len;
|
|
} batch_buffers = { .addrs = NULL, .len = 0 };
|
|
uint32_t i;
|
|
|
|
write_header(hang_dump_file);
|
|
error_decode_xe_vm_init(&xe_vm);
|
|
|
|
while (getline(&line, &line_size, dump_file) > 0) {
|
|
if (error_decode_xe_decode_topic(line, &xe_topic))
|
|
continue;
|
|
|
|
switch (xe_topic) {
|
|
case XE_TOPIC_JOB: {
|
|
uint64_t u64_value;
|
|
|
|
if (error_decode_xe_read_u64_hexacimal_parameter(line, "batch_addr[", &u64_value)) {
|
|
batch_buffers.addrs = realloc(batch_buffers.addrs, sizeof(uint64_t) * (batch_buffers.len + 1));
|
|
batch_buffers.addrs[batch_buffers.len] = intel_48b_address(u64_value);
|
|
batch_buffers.len++;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case XE_TOPIC_GUC_CT:
|
|
/*
|
|
* Workaround bug in the kernel that would put the exec queue dump
|
|
* in the wrong place, under "GuC CT" topic.
|
|
*/
|
|
case XE_TOPIC_CONTEXT: {
|
|
enum xe_vm_topic_type type;
|
|
const char *value_ptr;
|
|
char binary_name[64];
|
|
|
|
uint64_t u64_value;
|
|
|
|
if (error_decode_xe_read_u64_hexacimal_parameter(line, "[HWCTX].replay_offset", &u64_value)) {
|
|
error_decode_xe_vm_hw_ctx_set_offset(&xe_vm, u64_value);
|
|
break;
|
|
}
|
|
|
|
if (error_decode_xe_read_u64_hexacimal_parameter(line, "[HWCTX].replay_length", &u64_value)) {
|
|
/* replay_length is implicitly contained in size, so we don't need to save it */
|
|
break;
|
|
}
|
|
|
|
if (error_decode_xe_binary_line(line, binary_name, sizeof(binary_name), &type, &value_ptr)) {
|
|
if (strncmp(binary_name, "HWCTX", strlen("HWCTX")) != 0)
|
|
break;
|
|
|
|
switch (type) {
|
|
case XE_VM_TOPIC_TYPE_DATA:
|
|
if (!error_decode_xe_ascii85_decode_allocated(value_ptr, vm_entry_data, vm_entry_len))
|
|
printf("Failed to parse HWCTX data\n");
|
|
break;
|
|
case XE_VM_TOPIC_TYPE_LENGTH: {
|
|
vm_entry_len = strtoul(value_ptr, NULL, 0);
|
|
vm_entry_data = calloc(1, vm_entry_len);
|
|
if (!vm_entry_data) {
|
|
printf("Out of memory to allocate a buffer to store content of HWCTX\n");
|
|
break;
|
|
}
|
|
|
|
error_decode_xe_vm_hw_ctx_set(&xe_vm, vm_entry_len, vm_entry_data);
|
|
break;
|
|
}
|
|
case XE_VM_TOPIC_TYPE_ERROR:
|
|
printf("HWCTX not present in dump, content will be zeroed: %s\n", line);
|
|
break;
|
|
default:
|
|
printf("Not expected line in HWCTX: %s", line);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
case XE_TOPIC_VM: {
|
|
enum xe_vm_topic_type type;
|
|
const char *value_ptr;
|
|
uint64_t address;
|
|
|
|
type = error_decode_xe_read_vm_line(line, &address, &value_ptr);
|
|
switch (type) {
|
|
case XE_VM_TOPIC_TYPE_GLOBAL_VM_FLAGS: {
|
|
uint32_t vm_flags = strtoul(value_ptr, NULL, 0);
|
|
write_xe_vm_flags(hang_dump_file, vm_flags);
|
|
break;
|
|
}
|
|
case XE_VM_TOPIC_TYPE_DATA: {
|
|
if (!error_decode_xe_ascii85_decode_allocated(value_ptr, vm_entry_data, vm_entry_len))
|
|
printf("Failed to parse VMA 0x%" PRIx64 " data\n", address);
|
|
break;
|
|
}
|
|
case XE_VM_TOPIC_TYPE_LENGTH: {
|
|
vm_entry_len = strtoul(value_ptr, NULL, 0);
|
|
vm_entry_data = calloc(1, vm_entry_len);
|
|
if (!vm_entry_data) {
|
|
printf("Out of memory to allocate a buffer to store content of VMA 0x%" PRIx64 "\n", address);
|
|
break;
|
|
}
|
|
|
|
break;
|
|
}
|
|
case XE_VM_TOPIC_TYPE_PROPERTY: {
|
|
struct xe_vma_properties props = {0};
|
|
if (!error_decode_xe_read_vm_property_line(&props, value_ptr)) {
|
|
printf("xe_vm_properties failed for VMA 0x%" PRIx64 "\n", address);
|
|
}
|
|
if (!error_decode_xe_vm_append(&xe_vm, address, vm_entry_len, &props, vm_entry_data)) {
|
|
printf("xe_vm_append() failed for VMA 0x%" PRIx64 "\n", address);
|
|
}
|
|
break;
|
|
}
|
|
case XE_VM_TOPIC_TYPE_ERROR:
|
|
printf("VMA 0x%" PRIx64 " not present in dump, content will be zeroed: %s\n", address, line);
|
|
break;
|
|
default:
|
|
printf("Not expected line in VM state: %s", line);
|
|
}
|
|
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (verbose) {
|
|
fprintf(stdout, "BOs found:\n");
|
|
for (i = 0; i < xe_vm.entries_len; i++) {
|
|
struct xe_vm_entry *entry = &xe_vm.entries[i];
|
|
|
|
fprintf(stdout, "\taddr=0x%016" PRIx64 " size=%" PRIu32 "\n", entry->address, entry->length);
|
|
}
|
|
}
|
|
|
|
fail_if(!batch_buffers.len, "Failed to find batch buffer.\n");
|
|
fail_if(!xe_vm.hw_context.length, "Failed to find HW image buffer.\n");
|
|
|
|
for (i = 0; i < xe_vm.entries_len; i++) {
|
|
struct xe_vm_entry *entry = &xe_vm.entries[i];
|
|
const char *name = "user";
|
|
uint32_t j;
|
|
|
|
for (j = 0; j < batch_buffers.len; j++) {
|
|
if (batch_buffers.addrs[j] == entry->address)
|
|
name = "batch";
|
|
}
|
|
|
|
write_xe_buffer(hang_dump_file, entry->address, entry->data, entry->length, &entry->props, name);
|
|
}
|
|
|
|
fprintf(stderr, "writing image buffer size=0x%016" PRIx32 "\n", xe_vm.hw_context.length);
|
|
write_hw_image_buffer(hang_dump_file, xe_vm.hw_context.data, xe_vm.hw_context.length,
|
|
xe_vm.hw_context.address);
|
|
|
|
for (i = 0; i < batch_buffers.len; i++) {
|
|
write_exec(hang_dump_file, batch_buffers.addrs[i]);
|
|
}
|
|
|
|
free(batch_buffers.addrs);
|
|
free(line);
|
|
error_decode_xe_vm_fini(&xe_vm);
|
|
}
|