From 0a965c0bcef5e6962399aca0585161ec9ec45d7c Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 28 Apr 2026 14:58:40 +0300 Subject: [PATCH] anv: add a shader-dump debug option Will use this with EU stall monitor. Signed-off-by: Lionel Landwerlin Reviewed-by: Felix DeGrood Part-of: --- src/intel/vulkan/anv_device.c | 8 +++ src/intel/vulkan/anv_instance.c | 1 + src/intel/vulkan/anv_pipeline_cache.c | 7 +- src/intel/vulkan/anv_private.h | 13 +++- src/intel/vulkan/anv_shader.c | 98 +++++++++++++++++++++++++-- src/intel/vulkan/anv_shader_heap.c | 24 +------ 6 files changed, 118 insertions(+), 33 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index aa18154947b..cb1d044f0a4 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -1123,6 +1123,12 @@ VkResult anv_CreateDevice( goto fail_trtt; } + result = anv_device_init_shader_dump(device); + if (result != VK_SUCCESS) { + result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_rt_shaders; + } + anv_device_init_blorp(device); anv_device_init_border_colors(device); @@ -1215,6 +1221,8 @@ VkResult anv_CreateDevice( anv_device_finish_blorp(device); anv_device_finish_astc_emu(device); anv_device_finish_internal_kernels(device); + anv_device_finish_shader_dump(device); + fail_rt_shaders: anv_device_finish_rt_shaders(device); fail_trtt: anv_device_finish_trtt(device); diff --git a/src/intel/vulkan/anv_instance.c b/src/intel/vulkan/anv_instance.c index 5d2ba4484e7..89d52005b01 100644 --- a/src/intel/vulkan/anv_instance.c +++ b/src/intel/vulkan/anv_instance.c @@ -124,6 +124,7 @@ static const struct debug_control debug_control[] = { { "sparse-trtt", ANV_DEBUG_SPARSE_TRTT}, { "video-decode", ANV_DEBUG_VIDEO_DECODE}, { "video-encode", ANV_DEBUG_VIDEO_ENCODE}, + { "shader-dump", ANV_DEBUG_SHADER_DUMP}, { "shader-hash", ANV_DEBUG_SHADER_HASH}, { "shader-print", ANV_DEBUG_SHADER_PRINT}, { NULL, 0 } diff --git a/src/intel/vulkan/anv_pipeline_cache.c b/src/intel/vulkan/anv_pipeline_cache.c index 3dc6caf913b..b246de7454c 100644 --- a/src/intel/vulkan/anv_pipeline_cache.c +++ b/src/intel/vulkan/anv_pipeline_cache.c @@ -149,11 +149,8 @@ anv_shader_internal_create(struct anv_device *device, return NULL; } - anv_shader_heap_upload(&device->shader_heap, - shader->kernel, - shader->code, - shader->prog_data, - shader->stats->dispatch_width); + anv_shader_heap_upload(&device->shader_heap, shader->kernel, + shader->code, kernel_size); return shader; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 6fb20836071..9128207936c 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1296,9 +1296,10 @@ void anv_shader_heap_free(struct anv_shader_heap *heap, struct anv_shader_alloc void anv_shader_heap_upload(struct anv_shader_heap *heap, struct anv_shader_alloc alloc, - const void *data, - const struct brw_stage_prog_data *prog_data, - uint32_t dispatch_width); + const void *data, uint64_t size); + +VkResult anv_device_init_shader_dump(struct anv_device *device); +void anv_device_finish_shader_dump(struct anv_device *device); struct anv_shader_group_rt_replay { uint64_t general; @@ -1797,6 +1798,7 @@ enum anv_debug { ANV_DEBUG_NO_SLAB = BITFIELD_BIT(8), ANV_DEBUG_DESCRIPTOR_DIRTY = BITFIELD_BIT(9), ANV_DEBUG_SHADER_PRINT = BITFIELD_BIT(10), + ANV_DEBUG_SHADER_DUMP = BITFIELD_BIT(11), }; extern enum anv_debug anv_debug; @@ -2850,6 +2852,11 @@ struct anv_device { struct vk_acceleration_structure_build_args build_args; } accel_struct_build; + struct { + simple_mtx_t mutex; + debug_archiver *archive; + } shader_dump; + struct vk_meta_device meta_device; struct pb_slabs bo_slabs[3]; diff --git a/src/intel/vulkan/anv_shader.c b/src/intel/vulkan/anv_shader.c index a0c25c33f49..bf43d3456af 100644 --- a/src/intel/vulkan/anv_shader.c +++ b/src/intel/vulkan/anv_shader.c @@ -8,8 +8,98 @@ #include "nir/nir_serialize.h" #include "compiler/brw/brw_disasm.h" +#include "mda/debug_archiver.h" #include "util/shader_stats.h" + +VkResult +anv_device_init_shader_dump(struct anv_device *device) +{ + if (!ANV_DEBUG(SHADER_DUMP) && !INTEL_DEBUG(DEBUG_SHADERS_LINENO)) + return VK_SUCCESS; + + /* No filename -> stdout */ + if (ANV_DEBUG(SHADER_DUMP)) { + device->shader_dump.archive = + debug_archiver_open(NULL, "anv-shaders", ""); + if (device->shader_dump.archive == NULL) + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + + simple_mtx_init(&device->shader_dump.mutex, mtx_plain); + + return VK_SUCCESS; +} + +void +anv_device_finish_shader_dump(struct anv_device *device) +{ + if (!ANV_DEBUG(SHADER_DUMP) && !INTEL_DEBUG(DEBUG_SHADERS_LINENO)) + return; + + debug_archiver_finish_file(device->shader_dump.archive); + + simple_mtx_destroy(&device->shader_dump.mutex); +} + +static void +anv_device_dump_shader_variant(struct anv_device *device, + struct anv_shader *shader, + const char *variant, + uint32_t code_offset) +{ + FILE *f = stderr; + + simple_mtx_lock(&device->shader_dump.mutex); + + if (device->shader_dump.archive != NULL) { + char filename[80]; + snprintf(filename, sizeof(filename), "0x%08x-%s%s", + shader->prog_data->source_hash, + _mesa_shader_stage_to_abbrev(shader->vk.stage), + variant); + f = debug_archiver_start_file(device->shader_dump.archive, filename); + int64_t _offset = shader->kernel.offset; + brw_disassemble_with_errors(&device->physical->compiler->isa, + shader->code, code_offset, + &_offset, f); + debug_archiver_finish_file(device->shader_dump.archive); + } else { + brw_disassemble_with_lineno(&device->physical->compiler->isa, + shader->vk.stage, -1, + shader->prog_data->source_hash, + shader->code, code_offset, + shader->kernel.offset, + stderr); + } + + simple_mtx_unlock(&device->shader_dump.mutex); +} + +static void +anv_device_maybe_dump_shader(struct anv_device *device, struct anv_shader *shader) +{ + if (!ANV_DEBUG(SHADER_DUMP) && !INTEL_DEBUG(DEBUG_SHADERS_LINENO)) + return; + + if (intel_shader_dump_filter && + intel_shader_dump_filter != shader->prog_data->source_hash) + return; + + if (shader->vk.stage == MESA_SHADER_FRAGMENT) { + const struct brw_fs_prog_data *fs_prog_data = get_shader_fs_prog_data(shader); + + if (fs_prog_data->dispatch_8 || fs_prog_data->dispatch_multi) + anv_device_dump_shader_variant(device, shader, "-8", 0); + if (fs_prog_data->dispatch_16) + anv_device_dump_shader_variant(device, shader, "-16", fs_prog_data->prog_offset_16); + if (fs_prog_data->dispatch_32) + anv_device_dump_shader_variant(device, shader, "-32", fs_prog_data->prog_offset_32); + } else { + anv_device_dump_shader_variant(device, shader, "", 0); + } +} + static void anv_shader_destroy(struct vk_device *vk_device, struct vk_shader *vk_shader, @@ -748,11 +838,12 @@ anv_shader_create(struct anv_device *device, if (result != VK_SUCCESS) goto error_state; + anv_device_maybe_dump_shader(device, shader); + anv_shader_heap_upload(&device->shader_heap, shader->kernel, reloc.relocated_code, - shader->prog_data, - shader->stats->dispatch_width); + shader_data->prog_data.base.program_size); anv_shader_reloc_end(&reloc); @@ -922,8 +1013,7 @@ anv_replay_rt_shader_group(struct vk_device *vk_device, anv_shader_heap_upload(&device->shader_heap, shader->replay_kernel, reloc.relocated_code, - shader->prog_data, - shader->stats->dispatch_width); + shader->prog_data->program_size); anv_shader_reloc_end(&reloc); } diff --git a/src/intel/vulkan/anv_shader_heap.c b/src/intel/vulkan/anv_shader_heap.c index 39b89d88093..af1948edbce 100644 --- a/src/intel/vulkan/anv_shader_heap.c +++ b/src/intel/vulkan/anv_shader_heap.c @@ -189,15 +189,12 @@ anv_shader_heap_free(struct anv_shader_heap *heap, struct anv_shader_alloc alloc void anv_shader_heap_upload(struct anv_shader_heap *heap, struct anv_shader_alloc alloc, - const void *data, - const struct brw_stage_prog_data *prog_data, - uint32_t dispatch_width) + const void *data, uint64_t size) { - uint64_t data_size = prog_data->program_size; const uint32_t bo_begin_idx = shader_bo_index( heap, heap->va_range.addr + alloc.offset); const uint32_t bo_end_idx = shader_bo_index( - heap, heap->va_range.addr + alloc.offset + data_size - 1); + heap, heap->va_range.addr + alloc.offset + size - 1); const uint64_t upload_addr = heap->va_range.addr + alloc.offset; for (uint32_t i = MIN2(bo_begin_idx, bo_end_idx); @@ -207,23 +204,8 @@ anv_shader_heap_upload(struct anv_shader_heap *heap, const uint32_t data_offset = upload_addr - (heap->bos[i].addr + bo_offset); const uint64_t copy_size = - MIN2(heap->bos[i].size - bo_offset, data_size - data_offset); + MIN2(heap->bos[i].size - bo_offset, size - data_offset); memcpy(heap->bos[i].bo->map + bo_offset, data, copy_size); } - if (INTEL_DEBUG(DEBUG_SHADERS_LINENO)) { - if (!intel_shader_dump_filter || - (intel_shader_dump_filter && intel_shader_dump_filter == prog_data->source_hash)) { - int start = 0; - /* dump each simd variant of shader */ - while (start < data_size) { - brw_disassemble_with_lineno(&heap->device->physical->compiler->isa, - prog_data->stage, -1, - prog_data->source_hash, data, start, - alloc.offset, stderr); - start += align64(brw_disassemble_find_end(&heap->device->physical->compiler->isa, - data, start), 64); - } - } - } }