anv: add a shader-dump debug option

Will use this with EU stall monitor.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Felix DeGrood <felix.j.degrood@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/41244>
This commit is contained in:
Lionel Landwerlin 2026-04-28 14:58:40 +03:00 committed by Marge Bot
parent 3951a00d86
commit 0a965c0bce
6 changed files with 118 additions and 33 deletions

View file

@ -1123,6 +1123,12 @@ VkResult anv_CreateDevice(
goto fail_trtt;
}
result = anv_device_init_shader_dump(device);
if (result != VK_SUCCESS) {
result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_rt_shaders;
}
anv_device_init_blorp(device);
anv_device_init_border_colors(device);
@ -1215,6 +1221,8 @@ VkResult anv_CreateDevice(
anv_device_finish_blorp(device);
anv_device_finish_astc_emu(device);
anv_device_finish_internal_kernels(device);
anv_device_finish_shader_dump(device);
fail_rt_shaders:
anv_device_finish_rt_shaders(device);
fail_trtt:
anv_device_finish_trtt(device);

View file

@ -124,6 +124,7 @@ static const struct debug_control debug_control[] = {
{ "sparse-trtt", ANV_DEBUG_SPARSE_TRTT},
{ "video-decode", ANV_DEBUG_VIDEO_DECODE},
{ "video-encode", ANV_DEBUG_VIDEO_ENCODE},
{ "shader-dump", ANV_DEBUG_SHADER_DUMP},
{ "shader-hash", ANV_DEBUG_SHADER_HASH},
{ "shader-print", ANV_DEBUG_SHADER_PRINT},
{ NULL, 0 }

View file

@ -149,11 +149,8 @@ anv_shader_internal_create(struct anv_device *device,
return NULL;
}
anv_shader_heap_upload(&device->shader_heap,
shader->kernel,
shader->code,
shader->prog_data,
shader->stats->dispatch_width);
anv_shader_heap_upload(&device->shader_heap, shader->kernel,
shader->code, kernel_size);
return shader;
}

View file

@ -1296,9 +1296,10 @@ void anv_shader_heap_free(struct anv_shader_heap *heap, struct anv_shader_alloc
void anv_shader_heap_upload(struct anv_shader_heap *heap,
struct anv_shader_alloc alloc,
const void *data,
const struct brw_stage_prog_data *prog_data,
uint32_t dispatch_width);
const void *data, uint64_t size);
VkResult anv_device_init_shader_dump(struct anv_device *device);
void anv_device_finish_shader_dump(struct anv_device *device);
struct anv_shader_group_rt_replay {
uint64_t general;
@ -1797,6 +1798,7 @@ enum anv_debug {
ANV_DEBUG_NO_SLAB = BITFIELD_BIT(8),
ANV_DEBUG_DESCRIPTOR_DIRTY = BITFIELD_BIT(9),
ANV_DEBUG_SHADER_PRINT = BITFIELD_BIT(10),
ANV_DEBUG_SHADER_DUMP = BITFIELD_BIT(11),
};
extern enum anv_debug anv_debug;
@ -2850,6 +2852,11 @@ struct anv_device {
struct vk_acceleration_structure_build_args build_args;
} accel_struct_build;
struct {
simple_mtx_t mutex;
debug_archiver *archive;
} shader_dump;
struct vk_meta_device meta_device;
struct pb_slabs bo_slabs[3];

View file

@ -8,8 +8,98 @@
#include "nir/nir_serialize.h"
#include "compiler/brw/brw_disasm.h"
#include "mda/debug_archiver.h"
#include "util/shader_stats.h"
VkResult
anv_device_init_shader_dump(struct anv_device *device)
{
if (!ANV_DEBUG(SHADER_DUMP) && !INTEL_DEBUG(DEBUG_SHADERS_LINENO))
return VK_SUCCESS;
/* No filename -> stdout */
if (ANV_DEBUG(SHADER_DUMP)) {
device->shader_dump.archive =
debug_archiver_open(NULL, "anv-shaders", "");
if (device->shader_dump.archive == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
}
simple_mtx_init(&device->shader_dump.mutex, mtx_plain);
return VK_SUCCESS;
}
void
anv_device_finish_shader_dump(struct anv_device *device)
{
if (!ANV_DEBUG(SHADER_DUMP) && !INTEL_DEBUG(DEBUG_SHADERS_LINENO))
return;
debug_archiver_finish_file(device->shader_dump.archive);
simple_mtx_destroy(&device->shader_dump.mutex);
}
static void
anv_device_dump_shader_variant(struct anv_device *device,
struct anv_shader *shader,
const char *variant,
uint32_t code_offset)
{
FILE *f = stderr;
simple_mtx_lock(&device->shader_dump.mutex);
if (device->shader_dump.archive != NULL) {
char filename[80];
snprintf(filename, sizeof(filename), "0x%08x-%s%s",
shader->prog_data->source_hash,
_mesa_shader_stage_to_abbrev(shader->vk.stage),
variant);
f = debug_archiver_start_file(device->shader_dump.archive, filename);
int64_t _offset = shader->kernel.offset;
brw_disassemble_with_errors(&device->physical->compiler->isa,
shader->code, code_offset,
&_offset, f);
debug_archiver_finish_file(device->shader_dump.archive);
} else {
brw_disassemble_with_lineno(&device->physical->compiler->isa,
shader->vk.stage, -1,
shader->prog_data->source_hash,
shader->code, code_offset,
shader->kernel.offset,
stderr);
}
simple_mtx_unlock(&device->shader_dump.mutex);
}
static void
anv_device_maybe_dump_shader(struct anv_device *device, struct anv_shader *shader)
{
if (!ANV_DEBUG(SHADER_DUMP) && !INTEL_DEBUG(DEBUG_SHADERS_LINENO))
return;
if (intel_shader_dump_filter &&
intel_shader_dump_filter != shader->prog_data->source_hash)
return;
if (shader->vk.stage == MESA_SHADER_FRAGMENT) {
const struct brw_fs_prog_data *fs_prog_data = get_shader_fs_prog_data(shader);
if (fs_prog_data->dispatch_8 || fs_prog_data->dispatch_multi)
anv_device_dump_shader_variant(device, shader, "-8", 0);
if (fs_prog_data->dispatch_16)
anv_device_dump_shader_variant(device, shader, "-16", fs_prog_data->prog_offset_16);
if (fs_prog_data->dispatch_32)
anv_device_dump_shader_variant(device, shader, "-32", fs_prog_data->prog_offset_32);
} else {
anv_device_dump_shader_variant(device, shader, "", 0);
}
}
static void
anv_shader_destroy(struct vk_device *vk_device,
struct vk_shader *vk_shader,
@ -748,11 +838,12 @@ anv_shader_create(struct anv_device *device,
if (result != VK_SUCCESS)
goto error_state;
anv_device_maybe_dump_shader(device, shader);
anv_shader_heap_upload(&device->shader_heap,
shader->kernel,
reloc.relocated_code,
shader->prog_data,
shader->stats->dispatch_width);
shader_data->prog_data.base.program_size);
anv_shader_reloc_end(&reloc);
@ -922,8 +1013,7 @@ anv_replay_rt_shader_group(struct vk_device *vk_device,
anv_shader_heap_upload(&device->shader_heap,
shader->replay_kernel,
reloc.relocated_code,
shader->prog_data,
shader->stats->dispatch_width);
shader->prog_data->program_size);
anv_shader_reloc_end(&reloc);
}

View file

@ -189,15 +189,12 @@ anv_shader_heap_free(struct anv_shader_heap *heap, struct anv_shader_alloc alloc
void
anv_shader_heap_upload(struct anv_shader_heap *heap,
struct anv_shader_alloc alloc,
const void *data,
const struct brw_stage_prog_data *prog_data,
uint32_t dispatch_width)
const void *data, uint64_t size)
{
uint64_t data_size = prog_data->program_size;
const uint32_t bo_begin_idx = shader_bo_index(
heap, heap->va_range.addr + alloc.offset);
const uint32_t bo_end_idx = shader_bo_index(
heap, heap->va_range.addr + alloc.offset + data_size - 1);
heap, heap->va_range.addr + alloc.offset + size - 1);
const uint64_t upload_addr = heap->va_range.addr + alloc.offset;
for (uint32_t i = MIN2(bo_begin_idx, bo_end_idx);
@ -207,23 +204,8 @@ anv_shader_heap_upload(struct anv_shader_heap *heap,
const uint32_t data_offset =
upload_addr - (heap->bos[i].addr + bo_offset);
const uint64_t copy_size =
MIN2(heap->bos[i].size - bo_offset, data_size - data_offset);
MIN2(heap->bos[i].size - bo_offset, size - data_offset);
memcpy(heap->bos[i].bo->map + bo_offset, data, copy_size);
}
if (INTEL_DEBUG(DEBUG_SHADERS_LINENO)) {
if (!intel_shader_dump_filter ||
(intel_shader_dump_filter && intel_shader_dump_filter == prog_data->source_hash)) {
int start = 0;
/* dump each simd variant of shader */
while (start < data_size) {
brw_disassemble_with_lineno(&heap->device->physical->compiler->isa,
prog_data->stage, -1,
prog_data->source_hash, data, start,
alloc.offset, stderr);
start += align64(brw_disassemble_find_end(&heap->device->physical->compiler->isa,
data, start), 64);
}
}
}
}