From afabf6e3504c1e6d9965d1b555eed779e9dc3af5 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Thu, 30 May 2024 11:34:23 +0300 Subject: [PATCH] anv: add a device generated command debug option It prints out the constant of the generated commands. $ ANV_DEBUG=dgc-dump ./deqp-vk -n dEQP-VK.dgc.ext.compute.smoke.4_sequences_device_local_from_host_preprocess_state_same_universal_queue Test case 'dEQP-VK.dgc.ext.compute.smoke.4_sequences_device_local_from_host_preprocess_state_same_universal_queue'.. call from 0xffffeffeffe04694 0x0000000400000000: MI_STORE_DATA_IMM 0x10000403 0x00000178 0x00000004 0xffe047b8 0xffffeffe 0x0000000400000014: MI_BATCH_BUFFER_START 0x18800101 0x00000020 0x00000004 0x0000000400000020: MI_ARB_CHECK 0x02800100 0x0000000400000024: MEDIA_CURBE_LOAD 0x70010002 0x00000000 0x00000020 0x40000180 0x0000000400000034: GPGPU_WALKER 0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x0000004c 0x00000000 0x00000001 0x0000ffff 0xffffffff 0x0000000400000070: MEDIA_STATE_FLUSH 0x70040000 0x00000000 0x0000000400000078: MEDIA_CURBE_LOAD 0x70010002 0x00000000 0x00000020 0x40001400 0x0000000400000088: GPGPU_WALKER 0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000017 0x00000000 0x00000000 0x00000001 0x00000000 0x00000001 0x0000ffff 0xffffffff 0x00000004000000c4: MEDIA_STATE_FLUSH 0x70040000 0x00000000 0x00000004000000cc: MEDIA_CURBE_LOAD 0x70010002 0x00000000 0x00000020 0x40002680 0x00000004000000dc: GPGPU_WALKER 0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x00000001 0x00000000 0x000000d5 0x0000ffff 0xffffffff 0x0000000400000118: MEDIA_STATE_FLUSH 0x70040000 0x00000000 0x0000000400000120: MEDIA_CURBE_LOAD 0x70010002 0x00000000 0x00000020 0x40003900 0x0000000400000130: GPGPU_WALKER 0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000001 0x00000000 0x00000000 0x000000dc 0x00000000 0x00000001 0x0000ffff 0xffffffff Pass (Pass) Signed-off-by: Lionel Landwerlin Acked-by: Alyssa Rosenzweig Part-of: --- src/intel/shaders/dgc_debug.cl | 144 +++++++++++++++++++++++ src/intel/shaders/meson.build | 1 + src/intel/vulkan/anv_instance.c | 1 + src/intel/vulkan/anv_internal_kernels.c | 35 ++++-- src/intel/vulkan/anv_internal_kernels.h | 10 ++ src/intel/vulkan/anv_private.h | 8 +- src/intel/vulkan/anv_util.c | 63 ++++++++++ src/intel/vulkan/genX_cmd_dgc.c | 21 ++++ src/intel/vulkan/genX_internal_kernels.c | 9 ++ 9 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 src/intel/shaders/dgc_debug.cl diff --git a/src/intel/shaders/dgc_debug.cl b/src/intel/shaders/dgc_debug.cl new file mode 100644 index 00000000000..d5b4b02b0f8 --- /dev/null +++ b/src/intel/shaders/dgc_debug.cl @@ -0,0 +1,144 @@ +/* + * Copyright 2026 Intel Corporation + * SPDX-License-Identifier: MIT + */ + +#include "libintel_shaders.h" + +#define CMD(cmd_type, cmd_subtype, _opcode, _subopcode) \ + (((cmd_type) << 29) | \ + ((cmd_subtype) << 27) | \ + ((_opcode) << 24) | \ + ((_subopcode) << 16)) +#define CMD5(cmd_type, cmd_subtype, _opcode, _subopcode, _variant) \ + (((cmd_type) << 29) | \ + ((cmd_subtype) << 27) | \ + ((_opcode) << 24) | \ + ((_subopcode) << 18) | \ + ((_variant) << 16)) + +void +genX(libanv_dgc_dump)(global uint32_t *cmd_base, + uint32_t n_dwords, + global void *call_addr) +{ + printf("call from 0x%016lx\n", call_addr); + uint32_t bbs_count = 0; + for (uint32_t i = 0; i < n_dwords && bbs_count < 2; ) { + uint32_t n_dwords = cmd_base[i] & 0xff; + uint32_t bias_dwords = 0; + printf("0x%016lx: ", cmd_base + i); + switch (cmd_base[i] & 0xffff0000) { + case CMD(3, 3, 3, 0): + printf("3DPRIMITIVE\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 3, 2): + printf("3DMESH_3D\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 21): + printf("3DSTATE_CONSTANT_VS\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 25): + printf("3DSTATE_CONSTANT_HS\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 26): + printf("3DSTATE_CONSTANT_DS\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 22): + printf("3DSTATE_CONSTANT_GS\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 23): + printf("3DSTATE_CONSTANT_PS\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 109): + printf("3DSTATE_CONSTANT_ALL\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 123): + printf("3DSTATE_MESH_SHADER_DATA\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 126): + printf("3DSTATE_TASK_SHADER_DATA\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 10): + printf("3DSTATE_INDEX_BUFFER\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 8): + printf("3DSTATE_VERTEX_BUFFERS\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 27): + printf("3DSTATE_HS\n"); + bias_dwords = 2; + break; + case CMD(3, 3, 0, 29): + printf("3DSTATE_DS\n"); + bias_dwords = 2; + break; + case CMD(3, 2, 0, 4): + printf("MEDIA_STATE_FLUSH\n"); + bias_dwords = 2; + break; + case CMD(3, 2, 0, 0): + printf("MEDIA_VFE_STATE\n"); + bias_dwords = 2; + break; + case CMD(3, 2, 0, 1): + printf("MEDIA_CURBE_LOAD\n"); + bias_dwords = 2; + break; + case CMD(3, 2, 0, 2): + printf("MEDIA_INTERFACE_DESCRIPTOR_LOAD\n"); + bias_dwords = 2; + break; + case CMD(3, 2, 1, 5): + printf("GPGPU_WALKER\n"); + bias_dwords = 2; + break; + case CMD5(3, 2, 2, 2, 0): + printf("COMPUTE_WALKER\n"); + bias_dwords = 2; + break; + case 0x10000000: + printf("MI_STORE_DATA_IMM\n"); + bias_dwords = 2; + break; + case 0x02800000: + printf("MI_ARB_CHECK\n"); + bias_dwords = 1; + break; + case 0x18800000: + printf("MI_BATCH_BUFFER_START\n"); + bias_dwords = 2; + bbs_count++; + break; + case 0x00000000: + printf("MI_NOOP\n"); + bias_dwords = 1; + break; + default: + printf("unknown : 0x%08x\n", cmd_base[i]); + return; + } + + printf(" "); + for (uint32_t j = 0; j < (bias_dwords + n_dwords); j++) { + if (j > 0 && (j % 8) == 0) + printf("\n "); + printf("0x%08x ", cmd_base[i + j]); + } + printf("\n"); + + i += bias_dwords + n_dwords; + } +} diff --git a/src/intel/shaders/meson.build b/src/intel/shaders/meson.build index 041d68af612..e20303ba7ac 100644 --- a/src/intel/shaders/meson.build +++ b/src/intel/shaders/meson.build @@ -21,6 +21,7 @@ endif intel_shader_files = files( 'libintel_shaders.h', 'dgc.cl', + 'dgc_debug.cl', 'generate.cl', 'generate_draws.cl', 'generate_draws_iris.cl', diff --git a/src/intel/vulkan/anv_instance.c b/src/intel/vulkan/anv_instance.c index ead9b4247d9..575f386aa45 100644 --- a/src/intel/vulkan/anv_instance.c +++ b/src/intel/vulkan/anv_instance.c @@ -119,6 +119,7 @@ static const driOptionDescription anv_dri_options[] = { static const struct debug_control debug_control[] = { { "bindless", ANV_DEBUG_BINDLESS}, { "desc-dirty", ANV_DEBUG_DESCRIPTOR_DIRTY}, + { "dgc-dump", ANV_DEBUG_DGC_DUMP}, { "experimental", ANV_DEBUG_EXPERIMENTAL}, { "no-gpl", ANV_DEBUG_NO_GPL}, { "no-slab", ANV_DEBUG_NO_SLAB}, diff --git a/src/intel/vulkan/anv_internal_kernels.c b/src/intel/vulkan/anv_internal_kernels.c index 086f6a6fad0..48d2fcd22b7 100644 --- a/src/intel/vulkan/anv_internal_kernels.c +++ b/src/intel/vulkan/anv_internal_kernels.c @@ -63,7 +63,8 @@ compile_shader(struct anv_device *device, const char *name, const void *hash_key, uint32_t hash_key_size, - uint32_t sends_count_expectation) + uint32_t sends_count_expectation, + const uint16_t local_size[3]) { const nir_shader_compiler_options *nir_options = &device->physical->compiler->nir_options[stage]; @@ -91,9 +92,13 @@ compile_shader(struct anv_device *device, NIR_PASS(_, nir, nir_split_per_member_structs); if (stage == MESA_SHADER_COMPUTE) { - nir->info.workgroup_size[0] = 16; - nir->info.workgroup_size[1] = 1; - nir->info.workgroup_size[2] = 1; + if (local_size[0] != 0) { + memcpy(nir->info.workgroup_size, local_size, sizeof(nir->info.workgroup_size)); + } else { + nir->info.workgroup_size[0] = 16; + nir->info.workgroup_size[1] = 1; + nir->info.workgroup_size[2] = 1; + } } struct brw_compiler *compiler = device->physical->compiler; @@ -259,8 +264,8 @@ anv_device_get_internal_shader(struct anv_device *device, } key; mesa_shader_stage stage; - - uint32_t send_count; + uint16_t local_size[3]; + uint32_t send_count; } internal_kernels[] = { [ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = { .key = { @@ -355,7 +360,22 @@ anv_device_get_internal_shader(struct anv_device *device, .key = { .name = "anv-dgc-rt-fragment", }, + .stage = MESA_SHADER_FRAGMENT, + .send_count = 0 /* too complex */, + }, + [ANV_INTERNAL_KERNEL_DGC_DUMP_COMPUTE] = { + .key = { + .name = "anv-dgc-dump-compute", + }, .stage = MESA_SHADER_COMPUTE, + .local_size = { 1, 1, 1 }, + .send_count = 0 /* too complex */, + }, + [ANV_INTERNAL_KERNEL_DGC_DUMP_FRAGMENT] = { + .key = { + .name = "anv-dgc-dump-fragment", + }, + .stage = MESA_SHADER_FRAGMENT, .send_count = 0 /* too complex */, }, }; @@ -385,7 +405,8 @@ anv_device_get_internal_shader(struct anv_device *device, internal_kernels[name].key.name, &internal_kernels[name].key, sizeof(internal_kernels[name].key), - internal_kernels[name].send_count); + internal_kernels[name].send_count, + internal_kernels[name].local_size); if (bin == NULL) return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, "Unable to compiler internal kernel"); diff --git a/src/intel/vulkan/anv_internal_kernels.h b/src/intel/vulkan/anv_internal_kernels.h index 6166c165bd3..8461e59c8c3 100644 --- a/src/intel/vulkan/anv_internal_kernels.h +++ b/src/intel/vulkan/anv_internal_kernels.h @@ -300,4 +300,14 @@ struct PACKED anv_dgc_rt_params { uint32_t flags; }; +struct PACKED anv_dgc_dump_params { + /* Preprocess command address */ + uint64_t cmd_addr; + + /* Number of dwords */ + uint32_t n_dwords; + + uint64_t call_addr; +}; + #endif /* ANV_GENERATED_INDIRECT_DRAWS_H */ diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 4c787c060fb..4d484b007ca 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1725,6 +1725,7 @@ enum anv_debug { ANV_DEBUG_SHADER_PRINT = BITFIELD_BIT(10), ANV_DEBUG_SHADER_DUMP = BITFIELD_BIT(11), ANV_DEBUG_EXPERIMENTAL = BITFIELD_BIT(12), + ANV_DEBUG_DGC_DUMP = BITFIELD_BIT(13), }; extern enum anv_debug anv_debug; @@ -1733,7 +1734,7 @@ extern enum anv_debug anv_debug; static inline bool anv_needs_printf_buffer(void) { - return ANV_DEBUG(SHADER_PRINT); + return ANV_DEBUG(SHADER_PRINT) || ANV_DEBUG(DGC_DUMP); } struct anv_instance { @@ -2480,6 +2481,8 @@ enum anv_internal_kernel_name { ANV_INTERNAL_KERNEL_DGC_CS_POSTPROCESS_COMPUTE, ANV_INTERNAL_KERNEL_DGC_RT_COMPUTE, ANV_INTERNAL_KERNEL_DGC_RT_FRAGMENT, + ANV_INTERNAL_KERNEL_DGC_DUMP_COMPUTE, + ANV_INTERNAL_KERNEL_DGC_DUMP_FRAGMENT, ANV_INTERNAL_KERNEL_COUNT, }; @@ -6674,6 +6677,9 @@ uint32_t anv_dgc_fill_gfx_layout(struct anv_dgc_gfx_layout *layout, const struct anv_indirect_command_layout *layout_obj, struct anv_shader ** const shaders); +void anv_cmd_buffer_dump_commands(struct anv_cmd_buffer *cmd_buffer, + uint64_t preprocess_cmd_addr, + uint32_t n_dwords); struct anv_vid_mem { struct anv_device_memory *mem; diff --git a/src/intel/vulkan/anv_util.c b/src/intel/vulkan/anv_util.c index 52964667595..e7d83a17795 100644 --- a/src/intel/vulkan/anv_util.c +++ b/src/intel/vulkan/anv_util.c @@ -31,6 +31,7 @@ #include #include "anv_private.h" +#include "anv_internal_kernels.h" #include "vk_enum_to_str.h" #include "compiler/brw/brw_nir_rt.h" @@ -604,3 +605,65 @@ anv_pipeline_bind_map_clone(struct anv_device *device, return bind_map; } + +void +anv_cmd_buffer_dump_commands(struct anv_cmd_buffer *cmd_buffer, + uint64_t preprocess_cmd_addr, + uint32_t n_dwords) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_shader_internal *generate_kernel; + VkResult ret = + anv_device_get_internal_shader(device, + anv_internal_kernel_variant( + cmd_buffer, DGC_DUMP), + &generate_kernel); + if (ret != VK_SUCCESS) { + anv_batch_set_error(&cmd_buffer->batch, ret); + return; + } + + anv_add_pending_pipe_bits(cmd_buffer, + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR, + VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + 0, + "pre gfx cmd dump"); + anv_genX(device->info, cmd_buffer_apply_pipe_flushes)(cmd_buffer); + + struct anv_simple_shader simple_state = { + .device = device, + .cmd_buffer = cmd_buffer, + .dynamic_state_stream = &cmd_buffer->dynamic_state_stream, + .general_state_stream = &cmd_buffer->general_state_stream, + .batch = &cmd_buffer->batch, + .kernel = generate_kernel, + }; + anv_genX(device->info, emit_simple_shader_init)(&simple_state); + + struct anv_dgc_dump_params *params; + struct anv_state push_data_state = + anv_genX(device->info, simple_shader_alloc_push)( + &simple_state, sizeof(*params)); + if (push_data_state.map == NULL) + return; + params = push_data_state.map; + + *params = (struct anv_dgc_dump_params) { + .cmd_addr = preprocess_cmd_addr, + .n_dwords = n_dwords, + .call_addr = anv_address_physical( + anv_batch_current_address(&cmd_buffer->batch)), + }; + + anv_genX(device->info, emit_simple_shader_dispatch)( + &simple_state, 1, push_data_state); + + anv_add_pending_pipe_bits(cmd_buffer, + VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR | + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR, + VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT, + 0, + "post gfx cmd dump"); + anv_genX(device->info, cmd_buffer_apply_pipe_flushes)(cmd_buffer); +} diff --git a/src/intel/vulkan/genX_cmd_dgc.c b/src/intel/vulkan/genX_cmd_dgc.c index 670ef0ae23a..cb9ea19af52 100644 --- a/src/intel/vulkan/genX_cmd_dgc.c +++ b/src/intel/vulkan/genX_cmd_dgc.c @@ -764,6 +764,13 @@ void genX(CmdExecuteGeneratedCommandsEXT)( GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 8; } + if (ANV_DEBUG(DGC_DUMP)) { + anv_cmd_buffer_dump_commands(cmd_buffer, + pGeneratedCommandsInfo->preprocessAddress, + pGeneratedCommandsInfo->maxSequenceCount * + layout->cmd_size / 4); + } + genX(cmd_buffer_flush_gfx)(cmd_buffer); if (pGeneratedCommandsInfo->sequenceCountAddress != 0) { @@ -921,6 +928,13 @@ void genX(CmdExecuteGeneratedCommandsEXT)( genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } + if (ANV_DEBUG(DGC_DUMP)) { + anv_cmd_buffer_dump_commands(cmd_buffer, + pGeneratedCommandsInfo->preprocessAddress, + pGeneratedCommandsInfo->maxSequenceCount * + layout->cmd_size / 4); + } + genX(cmd_buffer_flush_compute_state)(cmd_buffer, indirect_set); if (cmd_buffer->state.conditional_render_enabled) @@ -1036,6 +1050,13 @@ void genX(CmdExecuteGeneratedCommandsEXT)( genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); } + if (ANV_DEBUG(DGC_DUMP)) { + anv_cmd_buffer_dump_commands(cmd_buffer, + pGeneratedCommandsInfo->preprocessAddress, + pGeneratedCommandsInfo->maxSequenceCount * + layout->cmd_size / 4); + } + if (cmd_buffer->state.conditional_render_enabled) genX(cmd_emit_conditional_render_predicate)(cmd_buffer); diff --git a/src/intel/vulkan/genX_internal_kernels.c b/src/intel/vulkan/genX_internal_kernels.c index d0bbdf1d808..93ab74ec7bc 100644 --- a/src/intel/vulkan/genX_internal_kernels.c +++ b/src/intel/vulkan/genX_internal_kernels.c @@ -177,6 +177,15 @@ genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_ load_param(b, 64, struct anv_dgc_cs_params, return_addr), load_compute_index(b)); return sizeof(struct anv_dgc_cs_params); + + case ANV_INTERNAL_KERNEL_DGC_DUMP_COMPUTE: + case ANV_INTERNAL_KERNEL_DGC_DUMP_FRAGMENT: + genX(libanv_dgc_dump)( + b, + load_param(b, 64, struct anv_dgc_dump_params, cmd_addr), + load_param(b, 32, struct anv_dgc_dump_params, n_dwords), + load_param(b, 64, struct anv_dgc_dump_params, call_addr)); + return sizeof(struct anv_dgc_dump_params); #endif /* GFX_VER >= 11 */ #if GFX_VERx10 >= 125