diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h index 6308c0d6994..e61fe0da8e7 100644 --- a/include/drm-uapi/xe_drm.h +++ b/include/drm-uapi/xe_drm.h @@ -204,9 +204,12 @@ struct drm_xe_ext_set_property { /** @pad: MBZ */ __u32 pad; - /** @value: property value */ - __u64 value; - + union { + /** @value: property value */ + __u64 value; + /** @ptr: pointer to user value */ + __u64 ptr; + }; /** @reserved: Reserved */ __u64 reserved[2]; }; @@ -1249,6 +1252,7 @@ struct drm_xe_exec_queue_create { #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1 #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE 2 +#define DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE 3 /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; diff --git a/src/intel/common/intel_hang_dump.h b/src/intel/common/intel_hang_dump.h index 7ddc339a23d..ffbc3232202 100644 --- a/src/intel/common/intel_hang_dump.h +++ b/src/intel/common/intel_hang_dump.h @@ -36,15 +36,32 @@ extern "C" { /* TODO: Consider compression? ZSTD_error_dstSize_tooSmall */ -#define INTEL_HANG_DUMP_VERSION (1) +#define INTEL_HANG_DUMP_VERSION (2) #define INTEL_HANG_DUMP_MAGIC (0x4245012345676463) enum intel_hang_dump_block_type { - INTEL_HANG_DUMP_BLOCK_TYPE_HEADER = 1, - INTEL_HANG_DUMP_BLOCK_TYPE_BO = 2, - INTEL_HANG_DUMP_BLOCK_TYPE_MAP = 3, - INTEL_HANG_DUMP_BLOCK_TYPE_EXEC = 4, - INTEL_HANG_DUMP_BLOCK_TYPE_HW_IMAGE = 5, + INTEL_HANG_DUMP_BLOCK_TYPE_HEADER = 1, + INTEL_HANG_DUMP_BLOCK_TYPE_BO = 2, + INTEL_HANG_DUMP_BLOCK_TYPE_MAP = 3, + INTEL_HANG_DUMP_BLOCK_TYPE_EXEC = 4, + INTEL_HANG_DUMP_BLOCK_TYPE_HW_IMAGE = 5, + INTEL_HANG_DUMP_BLOCK_TYPE_VM_FLAGS = 6, +}; + +enum intel_hang_dump_block_mem_permission { + INTEL_HANG_DUMP_BLOCK_MEM_TYPE_READ_ONLY = 1, + INTEL_HANG_DUMP_BLOCK_MEM_TYPE_READ_WRITE = 2, +}; + +enum intel_hang_dump_block_mem_type { + INTEL_HANG_DUMP_BLOCK_MEM_TYPE_BO = 1, + INTEL_HANG_DUMP_BLOCK_MEM_TYPE_USERPTR = 2, + INTEL_HANG_DUMP_BLOCK_MEM_TYPE_NULL_SPARSE = 3, +}; + +enum intel_hang_dump_block_cpu_caching_mode { + INTEL_HANG_DUMP_BLOCK_CPU_CACHING_MODE_WB = 1, + INTEL_HANG_DUMP_BLOCK_CPU_CACHING_MODE_WC = 2, }; struct intel_hang_dump_block_base { @@ -53,6 +70,30 @@ struct intel_hang_dump_block_base { uint32_t pad; }; +struct intel_hang_dump_block_vm_flags { + struct intel_hang_dump_block_base base; + + /* Flags used when creating a VM, defaults to scratch page */ + uint32_t vm_flags; +}; + +struct intel_hang_dump_block_vm_properties { + /* Two options: 'read_only' or 'read_write' */ + enum intel_hang_dump_block_mem_permission mem_permission; + + /* Three options: 'userptr', 'null_sparse' or 'bo' */ + enum intel_hang_dump_block_mem_type mem_type; + + /* Bit mask to specify where the memory is located */ + uint32_t mem_region; + + /* Corresponds to the value setup upon VM bind */ + uint32_t pat_index; + + /* Indicates BO caching properties */ + enum intel_hang_dump_block_cpu_caching_mode cpu_caching; +}; + struct intel_hang_dump_block_header { struct intel_hang_dump_block_base base; @@ -64,6 +105,7 @@ struct intel_hang_dump_block_header { struct intel_hang_dump_block_bo { struct intel_hang_dump_block_base base; + struct intel_hang_dump_block_vm_properties props; /* Helpful */ char name[64]; @@ -103,6 +145,9 @@ struct intel_hang_dump_block_hw_image { /* Buffer size */ uint64_t size; + /* PPGTT location */ + uint64_t offset; + /* Data follows */ }; @@ -113,6 +158,7 @@ union intel_hang_dump_block_all { struct intel_hang_dump_block_map map; struct intel_hang_dump_block_exec exec; struct intel_hang_dump_block_hw_image hw_img; + struct intel_hang_dump_block_vm_flags vm_flags; }; #ifdef __cplusplus diff --git a/src/intel/tools/aubinator_error_decode_xe.c b/src/intel/tools/aubinator_error_decode_xe.c index a83ed3de752..13028af72cf 100644 --- a/src/intel/tools/aubinator_error_decode_xe.c +++ b/src/intel/tools/aubinator_error_decode_xe.c @@ -272,12 +272,17 @@ read_xe_data_file(FILE *file, print_line = false; type = error_decode_xe_read_vm_line(line, &address, &value_ptr); switch (type) { + case XE_VM_TOPIC_TYPE_GLOBAL_VM_FLAGS: { + printf("VM.uapi_flags are ignored and not parsed: %s", line); + break; + } case XE_VM_TOPIC_TYPE_DATA: { if (!error_decode_xe_ascii85_decode_allocated(value_ptr, vm_entry_data, vm_entry_len)) printf("Failed to parse VMA 0x%" PRIx64 " data\n", address); break; } case XE_VM_TOPIC_TYPE_LENGTH: { + struct xe_vma_properties props = {0}; vm_entry_len = strtoul(value_ptr, NULL, 0); vm_entry_data = calloc(1, vm_entry_len); if (!vm_entry_data) { @@ -285,12 +290,16 @@ read_xe_data_file(FILE *file, printf("Aborting decode process due to insufficient memory\n"); goto cleanup; } - if (!error_decode_xe_vm_append(&xe_vm, address, vm_entry_len, vm_entry_data)) { + if (!error_decode_xe_vm_append(&xe_vm, address, vm_entry_len, &props, vm_entry_data)) { printf("xe_vm_append() failed for VMA 0x%" PRIx64 "\n", address); break; } break; } + case XE_VM_TOPIC_TYPE_PROPERTY: { + /* VMA properties are simply ignored and not parsed inside aubinator_error_decode. */ + break; + } case XE_VM_TOPIC_TYPE_ERROR: printf("VMA 0x%" PRIx64 " not present in dump, content will be zeroed: %s\n", address, line); break; diff --git a/src/intel/tools/error2hangdump.c b/src/intel/tools/error2hangdump.c index 93c9bcff30a..9f2f1c7a0d0 100644 --- a/src/intel/tools/error2hangdump.c +++ b/src/intel/tools/error2hangdump.c @@ -273,7 +273,7 @@ read_i915_data_file(FILE *err_file, FILE *hang_file, bool verbose, enum intel_en write_buffer(hang_file, batch_bo->addr, batch_bo->data, batch_bo->size, "batch"); fprintf(stderr, "writing image buffer 0x%016"PRIx64" size=0x%016"PRIx64"\n", hw_image_bo->addr, hw_image_bo->size); - write_hw_image_buffer(hang_file, hw_image_bo->data, hw_image_bo->size); + write_hw_image_buffer(hang_file, hw_image_bo->data, hw_image_bo->size, 0); write_exec(hang_file, batch_bo->addr); /* Cleanup */ diff --git a/src/intel/tools/error2hangdump_lib.c b/src/intel/tools/error2hangdump_lib.c index 055856ba3e1..948a8e8a211 100644 --- a/src/intel/tools/error2hangdump_lib.c +++ b/src/intel/tools/error2hangdump_lib.c @@ -42,13 +42,14 @@ write_buffer(FILE *f, } void -write_hw_image_buffer(FILE *f, const void *data, uint64_t size) +write_hw_image_buffer(FILE *f, const void *data, uint64_t size, uint64_t offset) { struct intel_hang_dump_block_hw_image header = { .base = { .type = INTEL_HANG_DUMP_BLOCK_TYPE_HW_IMAGE, }, .size = size, + .offset = offset, }; fwrite(&header, sizeof(header), 1, f); diff --git a/src/intel/tools/error2hangdump_lib.h b/src/intel/tools/error2hangdump_lib.h index 33ead7ae7da..6da6a9c2e42 100644 --- a/src/intel/tools/error2hangdump_lib.h +++ b/src/intel/tools/error2hangdump_lib.h @@ -35,5 +35,5 @@ _fail(const char *prefix, const char *format, ...) void write_header(FILE *f); void write_buffer(FILE *f, uint64_t offset, const void *data, uint64_t size, const char *name); -void write_hw_image_buffer(FILE *f, const void *data, uint64_t size); +void write_hw_image_buffer(FILE *f, const void *data, uint64_t size, uint64_t offset); void write_exec(FILE *f, uint64_t offset); diff --git a/src/intel/tools/error2hangdump_xe.c b/src/intel/tools/error2hangdump_xe.c index ad46644df0e..a898400057a 100644 --- a/src/intel/tools/error2hangdump_xe.c +++ b/src/intel/tools/error2hangdump_xe.c @@ -13,6 +13,7 @@ #include "error_decode_xe_lib.h" #include "error2hangdump_lib.h" #include "intel/common/intel_gem.h" +#include "error2hangdump_xe_lib.h" #include "intel/dev/intel_device_info.h" #include "util/macros.h" @@ -31,6 +32,7 @@ read_xe_data_file(FILE *dump_file, FILE *hang_dump_file, bool verbose) } batch_buffers = { .addrs = NULL, .len = 0 }; uint32_t i; + write_header(hang_dump_file); error_decode_xe_vm_init(&xe_vm); while (getline(&line, &line_size, dump_file) > 0) { @@ -59,6 +61,18 @@ read_xe_data_file(FILE *dump_file, FILE *hang_dump_file, bool verbose) const char *value_ptr; char binary_name[64]; + uint64_t u64_value; + + if (error_decode_xe_read_u64_hexacimal_parameter(line, "[HWCTX].replay_offset", &u64_value)) { + error_decode_xe_vm_hw_ctx_set_offset(&xe_vm, u64_value); + break; + } + + if (error_decode_xe_read_u64_hexacimal_parameter(line, "[HWCTX].replay_length", &u64_value)) { + /* replay_length is implicitly contained in size, so we don't need to save it */ + break; + } + if (error_decode_xe_binary_line(line, binary_name, sizeof(binary_name), &type, &value_ptr)) { if (strncmp(binary_name, "HWCTX", strlen("HWCTX")) != 0) break; @@ -96,6 +110,11 @@ read_xe_data_file(FILE *dump_file, FILE *hang_dump_file, bool verbose) type = error_decode_xe_read_vm_line(line, &address, &value_ptr); switch (type) { + case XE_VM_TOPIC_TYPE_GLOBAL_VM_FLAGS: { + uint32_t vm_flags = strtoul(value_ptr, NULL, 0); + write_xe_vm_flags(hang_dump_file, vm_flags); + break; + } case XE_VM_TOPIC_TYPE_DATA: { if (!error_decode_xe_ascii85_decode_allocated(value_ptr, vm_entry_data, vm_entry_len)) printf("Failed to parse VMA 0x%" PRIx64 " data\n", address); @@ -108,7 +127,15 @@ read_xe_data_file(FILE *dump_file, FILE *hang_dump_file, bool verbose) printf("Out of memory to allocate a buffer to store content of VMA 0x%" PRIx64 "\n", address); break; } - if (!error_decode_xe_vm_append(&xe_vm, address, vm_entry_len, vm_entry_data)) { + + break; + } + case XE_VM_TOPIC_TYPE_PROPERTY: { + struct xe_vma_properties props = {0}; + if (!error_decode_xe_read_vm_property_line(&props, value_ptr)) { + printf("xe_vm_properties failed for VMA 0x%" PRIx64 "\n", address); + } + if (!error_decode_xe_vm_append(&xe_vm, address, vm_entry_len, &props, vm_entry_data)) { printf("xe_vm_append() failed for VMA 0x%" PRIx64 "\n", address); } break; @@ -149,11 +176,12 @@ read_xe_data_file(FILE *dump_file, FILE *hang_dump_file, bool verbose) name = "batch"; } - write_buffer(hang_dump_file, entry->address, entry->data, entry->length, name); + write_xe_buffer(hang_dump_file, entry->address, entry->data, entry->length, &entry->props, name); } fprintf(stderr, "writing image buffer size=0x%016" PRIx32 "\n", xe_vm.hw_context.length); - write_hw_image_buffer(hang_dump_file, xe_vm.hw_context.data, xe_vm.hw_context.length); + write_hw_image_buffer(hang_dump_file, xe_vm.hw_context.data, xe_vm.hw_context.length, + xe_vm.hw_context.address); for (i = 0; i < batch_buffers.len; i++) { write_exec(hang_dump_file, batch_buffers.addrs[i]); diff --git a/src/intel/tools/error2hangdump_xe.h b/src/intel/tools/error2hangdump_xe.h index 442242e5a54..cd76ff30dfc 100644 --- a/src/intel/tools/error2hangdump_xe.h +++ b/src/intel/tools/error2hangdump_xe.h @@ -7,5 +7,6 @@ #include #include +#include void read_xe_data_file(FILE *dump_file, FILE *hang_dump_file, bool verbose); diff --git a/src/intel/tools/error2hangdump_xe_lib.c b/src/intel/tools/error2hangdump_xe_lib.c new file mode 100644 index 00000000000..fc5d4838327 --- /dev/null +++ b/src/intel/tools/error2hangdump_xe_lib.c @@ -0,0 +1,51 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "error2hangdump_lib.h" +#include "error2hangdump_xe_lib.h" + +#include "common/intel_hang_dump.h" +#include "error_decode_xe_lib.h" + +void +write_xe_vm_flags(FILE *f, + uint32_t vm_flags) +{ + struct intel_hang_dump_block_vm_flags header = { + .base = { + .type = INTEL_HANG_DUMP_BLOCK_TYPE_VM_FLAGS, + }, + .vm_flags = vm_flags, + }; + fwrite(&header, sizeof(header), 1, f); +} + +void +write_xe_buffer(FILE *f, + uint64_t offset, + const void *data, + uint64_t size, + const struct xe_vma_properties *props, + const char *name) +{ + struct intel_hang_dump_block_bo header = { + .base = { + .type = INTEL_HANG_DUMP_BLOCK_TYPE_BO, + }, + .props = { + .mem_type = props->mem_type, + .mem_permission = props->mem_permission, + .mem_region = props->mem_region, + .pat_index = props->pat_index, + .cpu_caching = props->cpu_caching, + }, + .offset = offset, + .size = size, + }; + snprintf(header.name, sizeof(header.name), "%s", name); + + fwrite(&header, sizeof(header), 1, f); + fwrite(data, size, 1, f); +} diff --git a/src/intel/tools/error2hangdump_xe_lib.h b/src/intel/tools/error2hangdump_xe_lib.h new file mode 100644 index 00000000000..a3a4016702d --- /dev/null +++ b/src/intel/tools/error2hangdump_xe_lib.h @@ -0,0 +1,17 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include "error_decode_xe_lib.h" + +#include +#include +#include +#include + +void write_xe_vm_flags(FILE *f, uint32_t vm_flags); +void write_xe_buffer(FILE *f, uint64_t offset, const void *data, + uint64_t size, const struct xe_vma_properties *props, const char *name); diff --git a/src/intel/tools/error_decode_xe_lib.c b/src/intel/tools/error_decode_xe_lib.c index c0d840e6f56..aaa13ac6e9b 100644 --- a/src/intel/tools/error_decode_xe_lib.c +++ b/src/intel/tools/error_decode_xe_lib.c @@ -97,6 +97,21 @@ error_decode_xe_decode_topic(const char *line, enum xe_topic *new_topic) return topic_changed; } +/* return type of VM state topic lines like 'VM.uapi_flags: 0x1' and points + * value_ptr to first char of data of topic type + */ +static enum xe_vm_topic_type +error_decode_xe_read_vm_flags_line(const char *line, const char **vm_value_ptr) +{ + enum xe_vm_topic_type type = XE_VM_TOPIC_TYPE_GLOBAL_VM_FLAGS; + + for (; *line != ':'; line++); + + *vm_value_ptr = line + 2; + + return type; +} + /* return type of VM topic lines like '[200000].data: x...' and points * value_ptr to first char of data of topic type */ @@ -105,8 +120,15 @@ error_decode_xe_read_vm_line(const char *line, uint64_t *address, const char **v { enum xe_vm_topic_type type; char text_addr[64]; + const char *vm_flags_value_ptr; int i; + if (*line == 'V') { + type = error_decode_xe_read_vm_flags_line(line, &vm_flags_value_ptr); + *value_ptr = vm_flags_value_ptr; + return type; + } + if (*line != '[') return XE_VM_TOPIC_TYPE_UNKNOWN; @@ -125,11 +147,14 @@ error_decode_xe_read_vm_line(const char *line, uint64_t *address, const char **v case 'l': type = XE_VM_TOPIC_TYPE_LENGTH; break; + case 'p': + type = XE_VM_TOPIC_TYPE_PROPERTY; + break; case 'e': type = XE_VM_TOPIC_TYPE_ERROR; break; default: - printf("type char: %c\n", *line); + printf("type char: %c, VM topic is unknown\n", *line); return XE_VM_TOPIC_TYPE_UNKNOWN; } @@ -139,6 +164,97 @@ error_decode_xe_read_vm_line(const char *line, uint64_t *address, const char **v return type; } +/* parses a line: '[40000].properties: read_write|bo|mem_region=0x1|pat_index=0|cpu_caching=1' + * and populates a struct from the properties being extracted, returns true on success. + */ +bool +error_decode_xe_read_vm_property_line(struct xe_vma_properties *props, const char *line) +{ + enum xe_vma_property_type property_type = XE_VMA_TOPIC_PROPERTY_PERMISSION; + + while (*line != '\0') { + char property[64], property_value[64]; + int property_len = 0; + int value_len = 0; + + while (*line != '|' && *line != '=' && *line != '\0') { + property[property_len++] = *line; + line++; + } + property[property_len] = 0; + + if (*line == '=') { + line++; + while (*line != '|' && *line != '\0') { + property_value[value_len++] = *line; + line++; + } + property_value[value_len] = 0; + } + + switch (property_type) { + case XE_VMA_TOPIC_PROPERTY_PERMISSION: + if (strcmp("read", property) == 0) { + props->mem_permission = INTEL_HANG_DUMP_BLOCK_MEM_TYPE_READ_ONLY; + } else if (strcmp("read_write", property) == 0) { + props->mem_permission = INTEL_HANG_DUMP_BLOCK_MEM_TYPE_READ_WRITE; + } else { + printf("Error unknown permission property: %s\n", property); + return false; + } + break; + case XE_VMA_TOPIC_PROPERTY_TYPE: + if (strcmp("bo", property) == 0) { + props->mem_type = INTEL_HANG_DUMP_BLOCK_MEM_TYPE_BO; + } else if (strcmp("userptr", property) == 0) { + props->mem_type = INTEL_HANG_DUMP_BLOCK_MEM_TYPE_USERPTR; + } else if (strcmp("null_sparse", property) == 0) { + props->mem_type = INTEL_HANG_DUMP_BLOCK_MEM_TYPE_NULL_SPARSE; + } else { + printf("Error unknown vma type: %s\n", property); + return false; + } + break; + case XE_VMA_TOPIC_PROPERTY_MEM_REGION: + if (strcmp("mem_region", property) != 0) { + printf("Error: mismatch in VMA property string name %s - expected 'mem_region'\n", property); + return false; + } + props->mem_region = strtoul(property_value, NULL, 0); + break; + case XE_VMA_TOPIC_PROPERTY_PAT_INDEX: + if (strcmp("pat_index", property) != 0) { + printf("Error: mismatch in VMA property string name: %s - expected 'pat_index'\n", property); + return false; + } + props->pat_index = strtoul(property_value, NULL, 0); + break; + case XE_VMA_TOPIC_PROPERTY_CPU_CACHING: + if (strcmp("cpu_caching", property) != 0) { + printf("Error: mismatch in VMA property string name: %s - expected 'cpu_caching'\n", property); + return false; + } + props->cpu_caching = strtoul(property_value, NULL, 0); + if (props->cpu_caching != INTEL_HANG_DUMP_BLOCK_CPU_CACHING_MODE_WB && + props->cpu_caching != INTEL_HANG_DUMP_BLOCK_CPU_CACHING_MODE_WC) { + printf("Error unknown cpu caching: %s\n", property_value); + return false; + } + break; + default: + printf("Error unknown VMA property type: %s\n", property); + return false; + } + + property_type++; + if (*line == '|') { + line++; + } + } + + return true; +} + /* return true if line is a binary line. * name is set with binary name, type is set with line binary type and * value_ptr with line binary value(length, error or data). @@ -208,8 +324,8 @@ void error_decode_xe_vm_fini(struct xe_vm *xe_vm) } static void -xe_vm_entry_set(struct xe_vm_entry *entry, const uint64_t address, - const uint32_t length, const uint32_t *data) +xe_vm_entry_set(struct xe_vm_entry *entry, const uint64_t address, const uint32_t length, + const struct xe_vma_properties *props, const uint32_t *data) { /* Newer versions of Xe KMD will give us the canonical VMA address while * older will give us 48b address. @@ -219,13 +335,21 @@ xe_vm_entry_set(struct xe_vm_entry *entry, const uint64_t address, entry->address = intel_48b_address(address); entry->length = length; entry->data = data; + memcpy(&entry->props, props, sizeof(struct xe_vma_properties)); } void error_decode_xe_vm_hw_ctx_set(struct xe_vm *xe_vm, const uint32_t length, const uint32_t *data) { - xe_vm_entry_set(&xe_vm->hw_context, 0, length, data); + struct xe_vma_properties props = {0}; + + xe_vm_entry_set(&xe_vm->hw_context, 0, length, &props, data); +} + +void error_decode_xe_vm_hw_ctx_set_offset(struct xe_vm *xe_vm, uint64_t offset) +{ + xe_vm->hw_context.address = offset; } /* @@ -233,15 +357,17 @@ error_decode_xe_vm_hw_ctx_set(struct xe_vm *xe_vm, const uint32_t length, */ bool error_decode_xe_vm_append(struct xe_vm *xe_vm, const uint64_t address, - const uint32_t length, const uint32_t *data) + const uint32_t length, + const struct xe_vma_properties *props, + const uint32_t *data) { size_t len = sizeof(*xe_vm->entries) * (xe_vm->entries_len + 1); xe_vm->entries = realloc(xe_vm->entries, len); + if (!xe_vm->entries) return false; - - xe_vm_entry_set(&xe_vm->entries[xe_vm->entries_len], address, length, data); + xe_vm_entry_set(&xe_vm->entries[xe_vm->entries_len], address, length, props, data); xe_vm->entries_len++; return true; } diff --git a/src/intel/tools/error_decode_xe_lib.h b/src/intel/tools/error_decode_xe_lib.h index 40a84d8a57e..018fd5a888e 100644 --- a/src/intel/tools/error_decode_xe_lib.h +++ b/src/intel/tools/error_decode_xe_lib.h @@ -7,6 +7,7 @@ #include #include +#include "common/intel_hang_dump.h" enum xe_topic { XE_TOPIC_DEVICE = 0, @@ -20,15 +21,36 @@ enum xe_topic { enum xe_vm_topic_type { XE_VM_TOPIC_TYPE_UNKNOWN = 0, + XE_VM_TOPIC_TYPE_GLOBAL_VM_FLAGS, XE_VM_TOPIC_TYPE_LENGTH, XE_VM_TOPIC_TYPE_DATA, + XE_VM_TOPIC_TYPE_PROPERTY, XE_VM_TOPIC_TYPE_ERROR, }; +enum xe_vma_property_type { + XE_VMA_TOPIC_PROPERTY_PERMISSION = 0, + XE_VMA_TOPIC_PROPERTY_TYPE, + XE_VMA_TOPIC_PROPERTY_MEM_REGION, + XE_VMA_TOPIC_PROPERTY_PAT_INDEX, + XE_VMA_TOPIC_PROPERTY_CPU_CACHING, + XE_VMA_TOPIC_PROPERTY_UNKNOWN, + XE_VMA_TOPIC_PROPERTY_ERROR, +}; + +struct xe_vma_properties { + uint32_t mem_permission; + uint32_t mem_type; + uint32_t mem_region; + uint32_t pat_index; + uint32_t cpu_caching; +}; + struct xe_vm_entry { uint64_t address; uint32_t length; const uint32_t *data; + struct xe_vma_properties props; }; struct xe_vm { @@ -48,12 +70,15 @@ bool error_decode_xe_read_engine_name(const char *line, char *ring_name); bool error_decode_xe_decode_topic(const char *line, enum xe_topic *new_topic); enum xe_vm_topic_type error_decode_xe_read_vm_line(const char *line, uint64_t *address, const char **value_ptr); +bool error_decode_xe_read_vm_property_line(struct xe_vma_properties *props, const char *line); bool error_decode_xe_binary_line(const char *line, char *name, int name_len, enum xe_vm_topic_type *type, const char **value_ptr); void error_decode_xe_vm_init(struct xe_vm *xe_vm); void error_decode_xe_vm_fini(struct xe_vm *xe_vm); void error_decode_xe_vm_hw_ctx_set(struct xe_vm *xe_vm, const uint32_t length, const uint32_t *data); -bool error_decode_xe_vm_append(struct xe_vm *xe_vm, const uint64_t address, const uint32_t length, const uint32_t *data); +void error_decode_xe_vm_hw_ctx_set_offset(struct xe_vm *xe_vm, uint64_t offset); +bool error_decode_xe_vm_append(struct xe_vm *xe_vm, const uint64_t address, const uint32_t length, + const struct xe_vma_properties *props, const uint32_t *data); const struct xe_vm_entry *error_decode_xe_vm_entry_get(struct xe_vm *xe_vm, const uint64_t address); uint32_t *error_decode_xe_vm_entry_address_get_data(const struct xe_vm_entry *entry, const uint64_t address); uint32_t error_decode_xe_vm_entry_address_get_len(const struct xe_vm_entry *entry, const uint64_t address); diff --git a/src/intel/tools/intel_hang_replay.c b/src/intel/tools/intel_hang_replay.c index b30977ac429..80632813cdb 100644 --- a/src/intel/tools/intel_hang_replay.c +++ b/src/intel/tools/intel_hang_replay.c @@ -1,5 +1,5 @@ /* - * Copyright © 2022 Intel Corporation + * Copyright © 2025 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -52,6 +52,9 @@ #include "util/u_dynarray.h" #include "util/u_math.h" +#include "intel_hang_replay_xe.h" +#include "intel_hang_replay_lib.h" + #include "intel_tools.h" static uint32_t @@ -181,20 +184,6 @@ gem_mmap_offset(int drm_fd, return map; } -static void -write_malloc_data(void *out_data, - int file_fd, - size_t size) -{ - size_t total_read_len = 0; - ssize_t read_len; - while (total_read_len < size && - (read_len = read(file_fd, out_data + total_read_len, size - total_read_len)) > 0) { - total_read_len += read_len; - } - assert(total_read_len == size); -} - static void write_gem_bo_data(int drm_fd, uint32_t gem_handle, @@ -215,12 +204,6 @@ write_gem_bo_data(int drm_fd, assert(total_read_len == size); } -static void -skip_data(int file_fd, size_t size) -{ - lseek(file_fd, size, SEEK_CUR); -} - static int get_drm_device(struct intel_device_info *devinfo) { @@ -251,22 +234,6 @@ get_drm_device(struct intel_device_info *devinfo) return fd; } -struct gem_bo { - off_t file_offset; - uint32_t gem_handle; - uint64_t offset; - uint64_t size; - bool hw_img; -}; - -static int -compare_bos(const void *b1, const void *b2) -{ - const struct gem_bo *gem_b1 = b1, *gem_b2 = b2; - - return gem_b2->size > gem_b1->size; -} - static void print_help(const char *filename, FILE *f) { @@ -276,6 +243,7 @@ print_help(const char *filename, FILE *f) fprintf(f, " -s, --shader ADDR print shader at ADDR\n"); fprintf(f, " -h, --help print this screen\n"); fprintf(f, " -a, --address ADDR Find BO containing ADDR\n"); + fprintf(f, " -D, --dumpable add DRM_XE_VM_BIND_FLAG_DUMPABLE to all VMA binds\n"); } static int @@ -310,15 +278,162 @@ execbuffer(int drm_fd, return ret; } +static int process_i915_dmp_file(int file_fd, int drm_fd, struct util_dynarray *buffers, + void *mem_ctx, struct intel_hang_dump_block_exec *init, + struct intel_hang_dump_block_exec *exec) { + void *hw_img = NULL; + uint32_t hw_img_size = 0; + + /* Allocate BOs populate them */ + uint64_t gem_allocated = 0; + util_dynarray_foreach(buffers, struct gem_bo, bo) { + lseek(file_fd, bo->file_offset, SEEK_SET); + if (bo->hw_img) { + hw_img = malloc(bo->size); + write_malloc_data(hw_img, file_fd, bo->size); + hw_img_size = bo->size; + } else { + bo->gem_handle = gem_create(drm_fd, bo->size); + write_gem_bo_data(drm_fd, bo->gem_handle, file_fd, bo->size); + } + + gem_allocated += bo->size; + } + + uint32_t ctx_id = gem_context_create(drm_fd); + if (ctx_id == 0) { + fprintf(stderr, "fail to create context: %s\n", strerror(errno)); + return EXIT_FAILURE; + } + + if (hw_img != NULL) { + if (!gem_context_set_hw_image(drm_fd, ctx_id, hw_img, hw_img_size)) { + fprintf(stderr, "fail to set context hw img: %s\n", strerror(errno)); + return EXIT_FAILURE; + } + } + + struct util_dynarray execbuffer_bos; + util_dynarray_init(&execbuffer_bos, mem_ctx); + + struct gem_bo *init_bo = NULL, *batch_bo = NULL; + + util_dynarray_foreach(buffers, struct gem_bo, bo) { + if (bo->offset <= init->offset && + (bo->offset + bo->size) > init->offset) { + init_bo = bo; + continue; + } + + if (bo->offset <= exec->offset && + (bo->offset + bo->size) > exec->offset) { + batch_bo = bo; + continue; + } + + if (bo->hw_img) + continue; + + struct drm_i915_gem_exec_object2 *execbuf_bo = + util_dynarray_grow(&execbuffer_bos, struct drm_i915_gem_exec_object2, 1); + *execbuf_bo = (struct drm_i915_gem_exec_object2) { + .handle = bo->gem_handle, + .relocation_count = 0, + .relocs_ptr = 0, + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | + EXEC_OBJECT_PINNED | + EXEC_OBJECT_CAPTURE, + .offset = intel_canonical_address(bo->offset), + }; + } + + assert(batch_bo != NULL); + + struct drm_i915_gem_exec_object2 *execbuf_bo = + util_dynarray_grow(&execbuffer_bos, struct drm_i915_gem_exec_object2, 1); + + int ret; + + if (init_bo) { + fprintf(stderr, "init: 0x%016"PRIx64"\n", init_bo->offset); + *execbuf_bo = (struct drm_i915_gem_exec_object2) { + .handle = init_bo->gem_handle, + .relocation_count = 0, + .relocs_ptr = 0, + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | + EXEC_OBJECT_PINNED | + EXEC_OBJECT_CAPTURE, + .offset = intel_canonical_address(init_bo->offset), + }; + ret = execbuffer(drm_fd, ctx_id, &execbuffer_bos, init_bo, init->offset); + if (ret != 0) { + fprintf(stderr, "initialization buffer failed to execute errno=%i\n", errno); + exit(-1); + } + } else { + fprintf(stderr, "no init BO\n"); + } + + if (batch_bo) { + fprintf(stderr, "exec: 0x%016"PRIx64" aperture=%.2fMb\n", batch_bo->offset, + gem_allocated / 1024.0 / 1024.0); + *execbuf_bo = (struct drm_i915_gem_exec_object2) { + .handle = batch_bo->gem_handle, + .relocation_count = 0, + .relocs_ptr = 0, + .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | + EXEC_OBJECT_PINNED | + EXEC_OBJECT_CAPTURE, + .offset = intel_canonical_address(batch_bo->offset), + }; + ret = execbuffer(drm_fd, ctx_id, &execbuffer_bos, batch_bo, exec->offset); + if (ret != 0) { + fprintf(stderr, "replayed buffer failed to execute errno=%i\n", errno); + exit(-1); + } else { + fprintf(stderr, "exec completed successfully\n"); + } + } else { + fprintf(stderr, "no exec BO\n"); + } + + return EXIT_SUCCESS; +} + +static int +replay_dmp_file(int file_fd, int drm_fd, const struct intel_device_info *devinfo, + struct util_dynarray *buffers, void *mem_ctx, + struct intel_hang_dump_block_exec *init, + struct intel_hang_dump_block_exec *exec, + uint32_t vm_flags, uint32_t bo_dumpable) +{ + /* Sort buffers by size */ + qsort(util_dynarray_begin(buffers), + util_dynarray_num_elements(buffers, struct gem_bo), + sizeof(struct gem_bo), + compare_bos); + + if (devinfo->kmd_type == INTEL_KMD_TYPE_I915) + return process_i915_dmp_file(file_fd, drm_fd, buffers, mem_ctx, init, exec); + else if (devinfo->kmd_type == INTEL_KMD_TYPE_XE) + return process_xe_dmp_file(file_fd, drm_fd, devinfo, buffers, mem_ctx, init, exec, + vm_flags, bo_dumpable); + else + fprintf(stderr, "driver is unknown, exiting\n"); + + return EXIT_FAILURE; +} + int main(int argc, char *argv[]) { - bool help = false, list = false; + bool help = false, list = false, bo_dumpable = false; const struct option aubinator_opts[] = { { "address", required_argument, NULL, 'a' }, { "dump", required_argument, NULL, 'd' }, { "shader", required_argument, NULL, 's' }, { "list", no_argument, NULL, 'l' }, + { "dumpable", no_argument, 0, 'D'}, { "help", no_argument, NULL, 'h' }, { NULL, 0, NULL, 0 }, }; @@ -331,8 +446,9 @@ main(int argc, char *argv[]) const char *file = NULL; uint64_t check_addr = -1; + uint32_t vm_flags = -1; int c, i; - while ((c = getopt_long(argc, argv, "a:d:hls:", aubinator_opts, &i)) != -1) { + while ((c = getopt_long(argc, argv, "a:d:hlDs:", aubinator_opts, &i)) != -1) { switch (c) { case 'a': check_addr = strtol(optarg, NULL, 0); @@ -352,6 +468,9 @@ main(int argc, char *argv[]) case 'l': list = true; break; + case 'D': + bo_dumpable = true; + break; default: break; } @@ -391,11 +510,12 @@ main(int argc, char *argv[]) sizeof(block_header.base)) { static const size_t block_size[] = { - [INTEL_HANG_DUMP_BLOCK_TYPE_HEADER] = sizeof(struct intel_hang_dump_block_header), - [INTEL_HANG_DUMP_BLOCK_TYPE_BO] = sizeof(struct intel_hang_dump_block_bo), - [INTEL_HANG_DUMP_BLOCK_TYPE_MAP] = sizeof(struct intel_hang_dump_block_map), - [INTEL_HANG_DUMP_BLOCK_TYPE_EXEC] = sizeof(struct intel_hang_dump_block_exec), - [INTEL_HANG_DUMP_BLOCK_TYPE_HW_IMAGE] = sizeof(struct intel_hang_dump_block_hw_image), + [INTEL_HANG_DUMP_BLOCK_TYPE_HEADER] = sizeof(struct intel_hang_dump_block_header), + [INTEL_HANG_DUMP_BLOCK_TYPE_BO] = sizeof(struct intel_hang_dump_block_bo), + [INTEL_HANG_DUMP_BLOCK_TYPE_MAP] = sizeof(struct intel_hang_dump_block_map), + [INTEL_HANG_DUMP_BLOCK_TYPE_EXEC] = sizeof(struct intel_hang_dump_block_exec), + [INTEL_HANG_DUMP_BLOCK_TYPE_HW_IMAGE] = sizeof(struct intel_hang_dump_block_hw_image), + [INTEL_HANG_DUMP_BLOCK_TYPE_VM_FLAGS] = sizeof(struct intel_hang_dump_block_vm_flags), }; assert(block_header.base.type < ARRAY_SIZE(block_size)); @@ -417,6 +537,11 @@ main(int argc, char *argv[]) .file_offset = lseek(file_fd, 0, SEEK_CUR), .offset = block_header.bo.offset, .size = block_header.bo.size, + .props.mem_region = block_header.bo.props.mem_region, + .props.pat_index = block_header.bo.props.pat_index, + .props.cpu_caching = block_header.bo.props.cpu_caching, + .props.mem_type = block_header.bo.props.mem_type, + .props.mem_permission = block_header.bo.props.mem_permission, }; total_vma += bo->size; skip_data(file_fd, bo->size); @@ -427,11 +552,17 @@ main(int argc, char *argv[]) break; } + /* Handle both i915 and Xe HW image blocks under the unified type. */ case INTEL_HANG_DUMP_BLOCK_TYPE_HW_IMAGE: { struct gem_bo *bo = util_dynarray_grow(&buffers, struct gem_bo, 1); + + /* The unified intel_hang_dump_block_hw_image now contains Xe-specific fields. + * For i915 dumps, these fields will be 0. + */ *bo = (struct gem_bo) { .file_offset = lseek(file_fd, 0, SEEK_CUR), - .offset = 0, + .gem_handle = 0, /* From Xe logic */ + .offset = block_header.hw_img.offset, .size = block_header.hw_img.size, .hw_img = true, }; @@ -460,6 +591,11 @@ main(int argc, char *argv[]) break; } + case INTEL_HANG_DUMP_BLOCK_TYPE_VM_FLAGS: { + vm_flags = block_header.vm_flags.vm_flags; + break; + } + case INTEL_HANG_DUMP_BLOCK_TYPE_EXEC: { if (init.offset == 0 && !has_hw_image) { if (list) @@ -524,128 +660,8 @@ main(int argc, char *argv[]) fprintf(stderr, "shader at 0x%016"PRIx64" not found\n", *addr); } - if (!list && util_dynarray_num_elements(&shader_addresses, uint64_t) == 0) { - /* Sort buffers by size */ - qsort(util_dynarray_begin(&buffers), - util_dynarray_num_elements(&buffers, struct gem_bo), - sizeof(struct gem_bo), - compare_bos); - - void *hw_img = NULL; - uint32_t hw_img_size = 0; - - /* Allocate BOs populate them */ - uint64_t gem_allocated = 0; - util_dynarray_foreach(&buffers, struct gem_bo, bo) { - lseek(file_fd, bo->file_offset, SEEK_SET); - if (bo->hw_img) { - hw_img = malloc(bo->size); - write_malloc_data(hw_img, file_fd, bo->size); - hw_img_size = bo->size; - } else { - bo->gem_handle = gem_create(drm_fd, bo->size); - write_gem_bo_data(drm_fd, bo->gem_handle, file_fd, bo->size); - } - - gem_allocated += bo->size; - } - - uint32_t ctx_id = gem_context_create(drm_fd); - if (ctx_id == 0) { - fprintf(stderr, "fail to create context: %s\n", strerror(errno)); - return EXIT_FAILURE; - } - - if (hw_img != NULL) { - if (!gem_context_set_hw_image(drm_fd, ctx_id, hw_img, hw_img_size)) { - fprintf(stderr, "fail to set context hw img: %s\n", strerror(errno)); - return EXIT_FAILURE; - } - } - - struct util_dynarray execbuffer_bos; - util_dynarray_init(&execbuffer_bos, mem_ctx); - - struct gem_bo *init_bo = NULL, *batch_bo = NULL; - util_dynarray_foreach(&buffers, struct gem_bo, bo) { - if (bo->offset <= init.offset && - (bo->offset + bo->size) > init.offset) { - init_bo = bo; - continue; - } - - if (bo->offset <= exec.offset && - (bo->offset + bo->size) > exec.offset) { - batch_bo = bo; - continue; - } - - if (bo->hw_img) - continue; - - struct drm_i915_gem_exec_object2 *execbuf_bo = - util_dynarray_grow(&execbuffer_bos, struct drm_i915_gem_exec_object2, 1); - *execbuf_bo = (struct drm_i915_gem_exec_object2) { - .handle = bo->gem_handle, - .relocation_count = 0, - .relocs_ptr = 0, - .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | - EXEC_OBJECT_PINNED | - EXEC_OBJECT_CAPTURE, - .offset = intel_canonical_address(bo->offset), - }; - } - - assert(batch_bo != NULL); - - struct drm_i915_gem_exec_object2 *execbuf_bo = - util_dynarray_grow(&execbuffer_bos, struct drm_i915_gem_exec_object2, 1); - - int ret; - - if (init_bo) { - fprintf(stderr, "init: 0x%016"PRIx64"\n", init_bo->offset); - *execbuf_bo = (struct drm_i915_gem_exec_object2) { - .handle = init_bo->gem_handle, - .relocation_count = 0, - .relocs_ptr = 0, - .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | - EXEC_OBJECT_PINNED | - EXEC_OBJECT_CAPTURE, - .offset = intel_canonical_address(init_bo->offset), - }; - ret = execbuffer(drm_fd, ctx_id, &execbuffer_bos, init_bo, init.offset); - if (ret != 0) { - fprintf(stderr, "initialization buffer failed to execute errno=%i\n", errno); - exit(-1); - } - } else { - fprintf(stderr, "no init BO\n"); - } - - if (batch_bo) { - fprintf(stderr, "exec: 0x%016"PRIx64" aperture=%.2fMb\n", batch_bo->offset, - gem_allocated / 1024.0 / 1024.0); - *execbuf_bo = (struct drm_i915_gem_exec_object2) { - .handle = batch_bo->gem_handle, - .relocation_count = 0, - .relocs_ptr = 0, - .flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | - EXEC_OBJECT_PINNED | - EXEC_OBJECT_CAPTURE, - .offset = intel_canonical_address(batch_bo->offset), - }; - ret = execbuffer(drm_fd, ctx_id, &execbuffer_bos, batch_bo, exec.offset); - if (ret != 0) { - fprintf(stderr, "replayed buffer failed to execute errno=%i\n", errno); - exit(-1); - } else { - fprintf(stderr, "exec completed successfully\n"); - } - } else { - fprintf(stderr, "no exec BO\n"); - } - } + if (!list && util_dynarray_num_elements(&shader_addresses, uint64_t) == 0) + replay_dmp_file(file_fd, drm_fd, &devinfo, &buffers, mem_ctx, &init, &exec, vm_flags, bo_dumpable); close(drm_fd); close(file_fd); diff --git a/src/intel/tools/intel_hang_replay_lib.c b/src/intel/tools/intel_hang_replay_lib.c new file mode 100644 index 00000000000..1ff5271fb19 --- /dev/null +++ b/src/intel/tools/intel_hang_replay_lib.c @@ -0,0 +1,34 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "intel_hang_replay_lib.h" + +int +compare_bos(const void *b1, const void *b2) +{ + const struct gem_bo *gem_b1 = b1, *gem_b2 = b2; + + return gem_b2->size > gem_b1->size; +} + +void +skip_data(int file_fd, size_t size) +{ + lseek(file_fd, size, SEEK_CUR); +} + +void +write_malloc_data(void *out_data, + int file_fd, + size_t size) +{ + size_t total_read_len = 0; + ssize_t read_len; + while (total_read_len < size && + (read_len = read(file_fd, out_data + total_read_len, size - total_read_len)) > 0) { + total_read_len += read_len; + } + assert(total_read_len == size); +} diff --git a/src/intel/tools/intel_hang_replay_lib.h b/src/intel/tools/intel_hang_replay_lib.h new file mode 100644 index 00000000000..c3232d1cb17 --- /dev/null +++ b/src/intel/tools/intel_hang_replay_lib.h @@ -0,0 +1,28 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/intel_hang_dump.h" + +struct gem_bo { + off_t file_offset; + uint32_t gem_handle; + uint64_t offset; + uint64_t size; + bool hw_img; + struct intel_hang_dump_block_vm_properties props; +}; + +int compare_bos(const void *b1, const void *b2); +void skip_data(int file_fd, size_t size); +void write_malloc_data(void *out_data, int file_fd, size_t size); diff --git a/src/intel/tools/intel_hang_replay_xe.c b/src/intel/tools/intel_hang_replay_xe.c new file mode 100644 index 00000000000..4c9523976c2 --- /dev/null +++ b/src/intel/tools/intel_hang_replay_xe.c @@ -0,0 +1,421 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "intel/perf/intel_perf_private.h" +#include "intel_hang_replay_xe.h" +#include "drm-uapi/xe_drm.h" +#include "common/intel_gem.h" +#include "intel_hang_replay_lib.h" + +static int syncobj_wait(int drm_fd, uint32_t *handles, uint32_t count, uint64_t abs_timeout_nsec, + uint32_t flags) +{ + struct drm_syncobj_wait wait = {}; + int err = 0; + + wait.handles = to_user_pointer(handles); + wait.timeout_nsec = abs_timeout_nsec; + wait.count_handles = count; + wait.flags = flags; + + if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait)) + err = -errno; + + return err; +} + +static int +syncobj_create(int drm_fd) +{ + struct drm_syncobj_create create = {}; + + if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_CREATE, &create)) + return -errno; + + return create.handle; +} + +static int +syncobj_destroy(int drm_fd, uint32_t handle) +{ + struct drm_syncobj_destroy destroy = {}; + int err = 0; + + destroy.handle = handle; + if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_DESTROY, &destroy)) + err = -errno; + + return err; +} + +static int +syncobj_reset(int drm_fd, uint32_t *handles, uint32_t count) +{ + struct drm_syncobj_array array = {}; + int err = 0; + + array.handles = to_user_pointer(handles); + array.count_handles = count; + if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_RESET, &array)) + err = -errno; + + return err; +} + +static int +xe_bo_create(int drm_fd, uint32_t vm, struct gem_bo *bo) +{ + struct drm_xe_gem_create create = { + .vm_id = vm, + .size = bo->size, + .placement = bo->props.mem_region, + .flags = 0, + .cpu_caching = bo->props.cpu_caching, + }; + + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_GEM_CREATE, &create)) + return -errno; + + return create.handle; +} + +static int +xe_bo_destroy(int drm_fd, uint32_t bo_handle) +{ + struct drm_gem_close close_bo = { + .handle = bo_handle, + }; + + if (intel_ioctl(drm_fd, DRM_IOCTL_GEM_CLOSE, &close_bo)) + return -errno; + + return 0; +} + +static int +xe_vm_bind(int drm_fd, uint32_t vm, struct gem_bo *bo, uint64_t obj_offset, uint32_t op, uint32_t flags, + struct drm_xe_sync *sync, uint32_t num_syncs) +{ + struct drm_xe_vm_bind bind = { + .vm_id = vm, + .num_binds = 1, + .bind.obj = bo->gem_handle, + .bind.obj_offset = obj_offset, + .bind.range = bo->size, + .bind.addr = bo->offset, + .bind.op = op, + .bind.flags = flags, + .num_syncs = num_syncs, + .syncs = (uintptr_t)sync, + .bind.pat_index = bo->props.pat_index, + }; + + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_VM_BIND, &bind)) + return -errno; + + return 0; +} + +static uint32_t +xe_vm_create(int drm_fd, uint32_t flags) +{ + struct drm_xe_vm_create create = { + .flags = flags, + }; + + /* Mesa enforces the flag but it may go away at some point */ + if (flags != (flags | DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)) { + create.flags = flags | DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE; + fprintf(stderr, "DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE flag is now being set.\n"); + } + + if (flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) { + fprintf(stderr, "Long running VM is not supported, aborting.\n"); + exit(EXIT_FAILURE); + } + + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_VM_CREATE, &create)) { + fprintf(stderr, "vm creation failed, aborting\n"); + exit(EXIT_FAILURE); + } + + return create.vm_id; +} + +static int +xe_vm_destroy(int drm_fd, uint32_t vm) +{ + struct drm_xe_vm_destroy destroy = { + .vm_id = vm, + }; + + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_VM_DESTROY, &destroy)) + return -errno; + + return 0; +} + +static uint32_t +xe_exec_queue_create(int drm_fd, uint32_t vm, uint16_t width, uint16_t num_placements, + struct drm_xe_engine_class_instance *instance, uint64_t ext) +{ + struct drm_xe_exec_queue_create create = { + .extensions = ext, + .vm_id = vm, + .width = width, + .num_placements = num_placements, + .instances = to_user_pointer(instance), + }; + + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create)) { + fprintf(stderr, "exec_queue creation failed, aborting\n"); + exit(EXIT_FAILURE); + } + + return create.exec_queue_id; +} + +static int +xe_exec_queue_destroy(int drm_fd, uint32_t exec_queue) +{ + struct drm_xe_exec_queue_destroy destroy = { + .exec_queue_id = exec_queue, + }; + + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &destroy)) + return -errno; + + return 0; +} + +static void* +gem_xe_mmap_offset(int drm_fd, uint32_t bo, size_t size, uint32_t pat_index, + const struct intel_device_info *devinfo) +{ + void *addr = MAP_FAILED; + + struct drm_xe_gem_mmap_offset mmo = { + .handle = bo, + }; + + if (pat_index == devinfo->pat.compressed.index || + pat_index == devinfo->pat.compressed_scanout.index) { + fprintf(stderr, + "Warning: compressed BOs (PAT index %u) are not supported at the moment.\n" + "Effort to support compressed BOs: https://patchwork.freedesktop.org/patch/663902\n", + pat_index); + } + + /* Get the fake offset back */ + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo) == 0) + addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, drm_fd, mmo.offset); + + if (addr == MAP_FAILED) + fprintf(stderr, "xe GEM mmap failed\n"); + + return addr; +} + +static void +write_xe_bo_data(int drm_fd, uint32_t bo, int file_fd, size_t size, uint32_t pat_index, + const struct intel_device_info *devinfo) +{ + void *map = gem_xe_mmap_offset(drm_fd, bo, size, pat_index, devinfo); + assert(map != MAP_FAILED); + + write_malloc_data(map, file_fd, size); + + munmap(map, size); +} + +static void * +load_userptr_data(int file_fd, uint64_t bo_size) +{ + void *map = malloc(bo_size); + if (!map) { + fprintf(stderr, "Failed to allocate memory for USERPTR BO\n"); + return NULL; + } + write_malloc_data(map, file_fd, bo_size); + + return map; +} + + +static uint32_t +xe_create_exec_queue_and_set_hw_image(int drm_fd, uint32_t vm, const void *hw_img_data, + uint32_t img_size) +{ + /* TODO: add additional information in the intel_hang_dump_block_exec & + * intel_hang_dump_block_hw_image structures to specify the engine and use + * the correct engine here. For now let's use the Render engine. + */ + struct drm_xe_engine_class_instance instance = { + .engine_class = DRM_XE_ENGINE_CLASS_RENDER, + }; + + struct drm_xe_ext_set_property ext = { + .base.next_extension = 0, + .base.name = DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY, + .property = DRM_XE_EXEC_QUEUE_SET_HANG_REPLAY_STATE, + .value = img_size, + .ptr = (uint64_t)(uintptr_t)hw_img_data, + }; + + return xe_exec_queue_create(drm_fd, vm, 1, 1, &instance, to_user_pointer(&ext)); +} + +static int +xe_exec(int drm_fd, struct drm_xe_exec *exec) +{ + if (intel_ioctl(drm_fd, DRM_IOCTL_XE_EXEC, exec)) { + fprintf(stderr, "xe_exec failed, aborting\n"); + exit(EXIT_FAILURE); + } + + return 0; +} + +bool +process_xe_dmp_file(int file_fd, int drm_fd, const struct intel_device_info *devinfo, + struct util_dynarray *buffers, void *mem_ctx, + struct intel_hang_dump_block_exec *init, + struct intel_hang_dump_block_exec *block_exec, + uint32_t vm_flags, uint32_t bo_dumpable) +{ + void *hw_img = NULL; + uint32_t hw_img_size = 0; + uint32_t exec_queue = 0; + struct drm_xe_sync sync = { + .type = DRM_XE_SYNC_TYPE_SYNCOBJ, + .flags = DRM_XE_SYNC_FLAG_SIGNAL, + .handle = syncobj_create(drm_fd), + }; + struct drm_xe_exec exec = { + .num_syncs = 1, + .syncs = to_user_pointer(&sync), + .num_batch_buffer = 1, + }; + const uint32_t dumpable_bit = bo_dumpable ? DRM_XE_VM_BIND_FLAG_DUMPABLE : 0; + + uint32_t vm = xe_vm_create(drm_fd, vm_flags); + + /* Allocate BOs populate them */ + uint64_t gem_allocated = 0; + int i = 0; + int bo_counter = 0; + util_dynarray_foreach(buffers, struct gem_bo, bo) { + if (!bo->hw_img) + bo_counter++; + } + util_dynarray_foreach(buffers, struct gem_bo, bo) { + uint32_t ops = 0; + uint32_t flags = dumpable_bit; + uint64_t obj_offset = 0; + int ret; + + lseek(file_fd, bo->file_offset, SEEK_SET); + + if (bo->hw_img) { + hw_img = malloc(bo->size); + write_malloc_data(hw_img, file_fd, bo->size); + hw_img_size = bo->size; + continue; + } + + if (bo->props.mem_type == INTEL_HANG_DUMP_BLOCK_MEM_TYPE_NULL_SPARSE) { + ops = DRM_XE_VM_BIND_OP_MAP; + flags = DRM_XE_VM_BIND_FLAG_NULL; + bo->gem_handle = 0; + } else if (bo->props.mem_permission == INTEL_HANG_DUMP_BLOCK_MEM_TYPE_READ_ONLY) { + ret = bo->gem_handle = xe_bo_create(drm_fd, vm, bo); + if (ret < 0) { + fprintf(stderr, "Failed to create BO for read-only block (addr: 0x%llx, size: 0x%llx). Exiting. Error: %d\n", + (unsigned long long)bo->offset, (unsigned long long)bo->size, ret); + syncobj_destroy(drm_fd, sync.handle); + xe_vm_destroy(drm_fd, vm); + return EXIT_FAILURE; + } + write_xe_bo_data(drm_fd, bo->gem_handle, file_fd, bo->size, bo->props.pat_index, devinfo); + ops = DRM_XE_VM_BIND_OP_MAP; + flags |= DRM_XE_VM_BIND_FLAG_READONLY; + } else if (bo->props.mem_type == INTEL_HANG_DUMP_BLOCK_MEM_TYPE_USERPTR) { + ops = DRM_XE_VM_BIND_OP_MAP_USERPTR; + + /* Allocate host memory and load BO content into it */ + void *map = load_userptr_data(file_fd, bo->size); + if (!map) { + fprintf(stderr, "Failed to allocate/load USERPTR BO data, skipping bind.\n"); + continue; + } + bo->offset = (uint64_t)(uintptr_t)map; + bo->gem_handle = 0; + } else { + ret = bo->gem_handle = xe_bo_create(drm_fd, vm, bo); + if (ret < 0) { + fprintf(stderr, "Failed to create BO (addr: 0x%llx, size: 0x%llx). Exiting. Error: %d\n", + (unsigned long long)bo->offset, (unsigned long long)bo->size, ret); + syncobj_destroy(drm_fd, sync.handle); + xe_vm_destroy(drm_fd, vm); + return EXIT_FAILURE; + } + write_xe_bo_data(drm_fd, bo->gem_handle, file_fd, bo->size, bo->props.pat_index, devinfo); + ops = DRM_XE_VM_BIND_OP_MAP; + } + i++; + ret = xe_vm_bind(drm_fd, vm, bo, obj_offset, ops, flags, &sync, i == bo_counter ? 1 : 0); + if (ret < 0) { + fprintf(stderr, "Failed to bind BO (addr: 0x%llx) to VM. Exiting. Error: %d\n", + (unsigned long long)bo->offset, ret); + syncobj_destroy(drm_fd, sync.handle); + xe_vm_destroy(drm_fd, vm); + return EXIT_FAILURE; + } + gem_allocated += bo->size; + } + + if (hw_img) { + exec_queue = xe_create_exec_queue_and_set_hw_image(drm_fd, vm, hw_img, hw_img_size); + if (exec_queue == 0) { + fprintf(stderr, "error: dump file didn't include a hw image context, exiting... %s\n", strerror(errno)); + return EXIT_FAILURE; + } + } + + exec.exec_queue_id = exec_queue; + exec.address = block_exec->offset; + + /* wait for last bind */ + syncobj_wait(drm_fd, &sync.handle, 1, INT64_MAX, 0); + syncobj_reset(drm_fd, &sync.handle, 1); + + xe_exec(drm_fd, &exec); + syncobj_wait(drm_fd, &sync.handle, 1, INT64_MAX, 0); + + syncobj_destroy(drm_fd, sync.handle); + xe_exec_queue_destroy(drm_fd, exec.exec_queue_id); + xe_vm_destroy(drm_fd, vm); + + if (hw_img) + free(hw_img); + + /* Clean up GEM BO handles created during replay. */ + util_dynarray_foreach(buffers, struct gem_bo, bo) { + if (bo->gem_handle != 0) { + xe_bo_destroy(drm_fd, bo->gem_handle); + } + } + + /* Clean up host memory allocated for USERPTR binds. */ + util_dynarray_foreach(buffers, struct gem_bo, bo) { + if (bo->props.mem_type == INTEL_HANG_DUMP_BLOCK_MEM_TYPE_USERPTR) { + /* bo->offset holds the host memory address (map) */ + void *map = (void *)(uintptr_t)bo->offset; + free(map); + } + } + + return EXIT_SUCCESS; +} diff --git a/src/intel/tools/intel_hang_replay_xe.h b/src/intel/tools/intel_hang_replay_xe.h new file mode 100644 index 00000000000..9846881311d --- /dev/null +++ b/src/intel/tools/intel_hang_replay_xe.h @@ -0,0 +1,19 @@ +/* + * Copyright 2025 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include + +#include "util/u_dynarray.h" +#include "common/intel_hang_dump.h" +#include "intel/dev/intel_device_info.h" + +bool process_xe_dmp_file(int file_fd, int drm_fd, const struct intel_device_info *devinfo, + struct util_dynarray *buffers, void *mem_ctx, + struct intel_hang_dump_block_exec *init, + struct intel_hang_dump_block_exec *exec, + uint32_t vm_uapi_flags, uint32_t bo_dumpable); diff --git a/src/intel/tools/meson.build b/src/intel/tools/meson.build index beea51f6660..4557e345d84 100644 --- a/src/intel/tools/meson.build +++ b/src/intel/tools/meson.build @@ -83,6 +83,8 @@ error2hangdump = executable( files('error2hangdump.c', 'error2hangdump_lib.c', 'error2hangdump_lib.h', + 'error2hangdump_xe_lib.c', + 'error2hangdump_xe_lib.h', 'error2hangdump_xe.c', 'error2hangdump_xe.h', 'error_decode_lib.c', @@ -99,6 +101,10 @@ error2hangdump = executable( intel_hang_replay = executable( 'intel_hang_replay', files('intel_hang_replay.c'), + files('intel_hang_replay_lib.c'), + files('intel_hang_replay_lib.h'), + files('intel_hang_replay_xe.c'), + files('intel_hang_replay_xe.h'), dependencies : [idep_intel_dev, idep_libintel_common, dep_libdrm, dep_dl, dep_thread, dep_m, idep_libintel_tools], include_directories : [inc_include, inc_src, inc_intel],