anv: add a device generated command debug option

It prints out the constant of the generated commands.

$ ANV_DEBUG=dgc-dump ./deqp-vk -n dEQP-VK.dgc.ext.compute.smoke.4_sequences_device_local_from_host_preprocess_state_same_universal_queue

Test case 'dEQP-VK.dgc.ext.compute.smoke.4_sequences_device_local_from_host_preprocess_state_same_universal_queue'..
call from 0xffffeffeffe04694
0x0000000400000000: MI_STORE_DATA_IMM
   0x10000403 0x00000178 0x00000004 0xffe047b8 0xffffeffe
0x0000000400000014: MI_BATCH_BUFFER_START
   0x18800101 0x00000020 0x00000004
0x0000000400000020: MI_ARB_CHECK
   0x02800100
0x0000000400000024: MEDIA_CURBE_LOAD
   0x70010002 0x00000000 0x00000020 0x40000180
0x0000000400000034: GPGPU_WALKER
   0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000001
   0x00000000 0x00000000 0x0000004c 0x00000000 0x00000001 0x0000ffff 0xffffffff
0x0000000400000070: MEDIA_STATE_FLUSH
   0x70040000 0x00000000
0x0000000400000078: MEDIA_CURBE_LOAD
   0x70010002 0x00000000 0x00000020 0x40001400
0x0000000400000088: GPGPU_WALKER
   0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000017
   0x00000000 0x00000000 0x00000001 0x00000000 0x00000001 0x0000ffff 0xffffffff
0x00000004000000c4: MEDIA_STATE_FLUSH
   0x70040000 0x00000000
0x00000004000000cc: MEDIA_CURBE_LOAD
   0x70010002 0x00000000 0x00000020 0x40002680
0x00000004000000dc: GPGPU_WALKER
   0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000001
   0x00000000 0x00000000 0x00000001 0x00000000 0x000000d5 0x0000ffff 0xffffffff
0x0000000400000118: MEDIA_STATE_FLUSH
   0x70040000 0x00000000
0x0000000400000120: MEDIA_CURBE_LOAD
   0x70010002 0x00000000 0x00000020 0x40003900
0x0000000400000130: GPGPU_WALKER
   0x7105000d 0x00000000 0x00000000 0x00000000 0x40000003 0x00000000 0x00000000 0x00000001
   0x00000000 0x00000000 0x000000dc 0x00000000 0x00000001 0x0000ffff 0xffffffff
  Pass (Pass)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31384>
This commit is contained in:
Lionel Landwerlin 2024-05-30 11:34:23 +03:00 committed by Marge Bot
parent 50aee34651
commit afabf6e350
9 changed files with 284 additions and 8 deletions

View file

@ -0,0 +1,144 @@
/*
* Copyright 2026 Intel Corporation
* SPDX-License-Identifier: MIT
*/
#include "libintel_shaders.h"
#define CMD(cmd_type, cmd_subtype, _opcode, _subopcode) \
(((cmd_type) << 29) | \
((cmd_subtype) << 27) | \
((_opcode) << 24) | \
((_subopcode) << 16))
#define CMD5(cmd_type, cmd_subtype, _opcode, _subopcode, _variant) \
(((cmd_type) << 29) | \
((cmd_subtype) << 27) | \
((_opcode) << 24) | \
((_subopcode) << 18) | \
((_variant) << 16))
void
genX(libanv_dgc_dump)(global uint32_t *cmd_base,
uint32_t n_dwords,
global void *call_addr)
{
printf("call from 0x%016lx\n", call_addr);
uint32_t bbs_count = 0;
for (uint32_t i = 0; i < n_dwords && bbs_count < 2; ) {
uint32_t n_dwords = cmd_base[i] & 0xff;
uint32_t bias_dwords = 0;
printf("0x%016lx: ", cmd_base + i);
switch (cmd_base[i] & 0xffff0000) {
case CMD(3, 3, 3, 0):
printf("3DPRIMITIVE\n");
bias_dwords = 2;
break;
case CMD(3, 3, 3, 2):
printf("3DMESH_3D\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 21):
printf("3DSTATE_CONSTANT_VS\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 25):
printf("3DSTATE_CONSTANT_HS\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 26):
printf("3DSTATE_CONSTANT_DS\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 22):
printf("3DSTATE_CONSTANT_GS\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 23):
printf("3DSTATE_CONSTANT_PS\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 109):
printf("3DSTATE_CONSTANT_ALL\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 123):
printf("3DSTATE_MESH_SHADER_DATA\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 126):
printf("3DSTATE_TASK_SHADER_DATA\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 10):
printf("3DSTATE_INDEX_BUFFER\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 8):
printf("3DSTATE_VERTEX_BUFFERS\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 27):
printf("3DSTATE_HS\n");
bias_dwords = 2;
break;
case CMD(3, 3, 0, 29):
printf("3DSTATE_DS\n");
bias_dwords = 2;
break;
case CMD(3, 2, 0, 4):
printf("MEDIA_STATE_FLUSH\n");
bias_dwords = 2;
break;
case CMD(3, 2, 0, 0):
printf("MEDIA_VFE_STATE\n");
bias_dwords = 2;
break;
case CMD(3, 2, 0, 1):
printf("MEDIA_CURBE_LOAD\n");
bias_dwords = 2;
break;
case CMD(3, 2, 0, 2):
printf("MEDIA_INTERFACE_DESCRIPTOR_LOAD\n");
bias_dwords = 2;
break;
case CMD(3, 2, 1, 5):
printf("GPGPU_WALKER\n");
bias_dwords = 2;
break;
case CMD5(3, 2, 2, 2, 0):
printf("COMPUTE_WALKER\n");
bias_dwords = 2;
break;
case 0x10000000:
printf("MI_STORE_DATA_IMM\n");
bias_dwords = 2;
break;
case 0x02800000:
printf("MI_ARB_CHECK\n");
bias_dwords = 1;
break;
case 0x18800000:
printf("MI_BATCH_BUFFER_START\n");
bias_dwords = 2;
bbs_count++;
break;
case 0x00000000:
printf("MI_NOOP\n");
bias_dwords = 1;
break;
default:
printf("unknown : 0x%08x\n", cmd_base[i]);
return;
}
printf(" ");
for (uint32_t j = 0; j < (bias_dwords + n_dwords); j++) {
if (j > 0 && (j % 8) == 0)
printf("\n ");
printf("0x%08x ", cmd_base[i + j]);
}
printf("\n");
i += bias_dwords + n_dwords;
}
}

View file

@ -21,6 +21,7 @@ endif
intel_shader_files = files(
'libintel_shaders.h',
'dgc.cl',
'dgc_debug.cl',
'generate.cl',
'generate_draws.cl',
'generate_draws_iris.cl',

View file

@ -119,6 +119,7 @@ static const driOptionDescription anv_dri_options[] = {
static const struct debug_control debug_control[] = {
{ "bindless", ANV_DEBUG_BINDLESS},
{ "desc-dirty", ANV_DEBUG_DESCRIPTOR_DIRTY},
{ "dgc-dump", ANV_DEBUG_DGC_DUMP},
{ "experimental", ANV_DEBUG_EXPERIMENTAL},
{ "no-gpl", ANV_DEBUG_NO_GPL},
{ "no-slab", ANV_DEBUG_NO_SLAB},

View file

@ -63,7 +63,8 @@ compile_shader(struct anv_device *device,
const char *name,
const void *hash_key,
uint32_t hash_key_size,
uint32_t sends_count_expectation)
uint32_t sends_count_expectation,
const uint16_t local_size[3])
{
const nir_shader_compiler_options *nir_options =
&device->physical->compiler->nir_options[stage];
@ -91,9 +92,13 @@ compile_shader(struct anv_device *device,
NIR_PASS(_, nir, nir_split_per_member_structs);
if (stage == MESA_SHADER_COMPUTE) {
nir->info.workgroup_size[0] = 16;
nir->info.workgroup_size[1] = 1;
nir->info.workgroup_size[2] = 1;
if (local_size[0] != 0) {
memcpy(nir->info.workgroup_size, local_size, sizeof(nir->info.workgroup_size));
} else {
nir->info.workgroup_size[0] = 16;
nir->info.workgroup_size[1] = 1;
nir->info.workgroup_size[2] = 1;
}
}
struct brw_compiler *compiler = device->physical->compiler;
@ -259,8 +264,8 @@ anv_device_get_internal_shader(struct anv_device *device,
} key;
mesa_shader_stage stage;
uint32_t send_count;
uint16_t local_size[3];
uint32_t send_count;
} internal_kernels[] = {
[ANV_INTERNAL_KERNEL_GENERATED_DRAWS] = {
.key = {
@ -355,7 +360,22 @@ anv_device_get_internal_shader(struct anv_device *device,
.key = {
.name = "anv-dgc-rt-fragment",
},
.stage = MESA_SHADER_FRAGMENT,
.send_count = 0 /* too complex */,
},
[ANV_INTERNAL_KERNEL_DGC_DUMP_COMPUTE] = {
.key = {
.name = "anv-dgc-dump-compute",
},
.stage = MESA_SHADER_COMPUTE,
.local_size = { 1, 1, 1 },
.send_count = 0 /* too complex */,
},
[ANV_INTERNAL_KERNEL_DGC_DUMP_FRAGMENT] = {
.key = {
.name = "anv-dgc-dump-fragment",
},
.stage = MESA_SHADER_FRAGMENT,
.send_count = 0 /* too complex */,
},
};
@ -385,7 +405,8 @@ anv_device_get_internal_shader(struct anv_device *device,
internal_kernels[name].key.name,
&internal_kernels[name].key,
sizeof(internal_kernels[name].key),
internal_kernels[name].send_count);
internal_kernels[name].send_count,
internal_kernels[name].local_size);
if (bin == NULL)
return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
"Unable to compiler internal kernel");

View file

@ -300,4 +300,14 @@ struct PACKED anv_dgc_rt_params {
uint32_t flags;
};
struct PACKED anv_dgc_dump_params {
/* Preprocess command address */
uint64_t cmd_addr;
/* Number of dwords */
uint32_t n_dwords;
uint64_t call_addr;
};
#endif /* ANV_GENERATED_INDIRECT_DRAWS_H */

View file

@ -1725,6 +1725,7 @@ enum anv_debug {
ANV_DEBUG_SHADER_PRINT = BITFIELD_BIT(10),
ANV_DEBUG_SHADER_DUMP = BITFIELD_BIT(11),
ANV_DEBUG_EXPERIMENTAL = BITFIELD_BIT(12),
ANV_DEBUG_DGC_DUMP = BITFIELD_BIT(13),
};
extern enum anv_debug anv_debug;
@ -1733,7 +1734,7 @@ extern enum anv_debug anv_debug;
static inline bool anv_needs_printf_buffer(void)
{
return ANV_DEBUG(SHADER_PRINT);
return ANV_DEBUG(SHADER_PRINT) || ANV_DEBUG(DGC_DUMP);
}
struct anv_instance {
@ -2480,6 +2481,8 @@ enum anv_internal_kernel_name {
ANV_INTERNAL_KERNEL_DGC_CS_POSTPROCESS_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_RT_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_RT_FRAGMENT,
ANV_INTERNAL_KERNEL_DGC_DUMP_COMPUTE,
ANV_INTERNAL_KERNEL_DGC_DUMP_FRAGMENT,
ANV_INTERNAL_KERNEL_COUNT,
};
@ -6674,6 +6677,9 @@ uint32_t anv_dgc_fill_gfx_layout(struct anv_dgc_gfx_layout *layout,
const struct anv_indirect_command_layout *layout_obj,
struct anv_shader ** const shaders);
void anv_cmd_buffer_dump_commands(struct anv_cmd_buffer *cmd_buffer,
uint64_t preprocess_cmd_addr,
uint32_t n_dwords);
struct anv_vid_mem {
struct anv_device_memory *mem;

View file

@ -31,6 +31,7 @@
#include <sys/stat.h>
#include "anv_private.h"
#include "anv_internal_kernels.h"
#include "vk_enum_to_str.h"
#include "compiler/brw/brw_nir_rt.h"
@ -604,3 +605,65 @@ anv_pipeline_bind_map_clone(struct anv_device *device,
return bind_map;
}
void
anv_cmd_buffer_dump_commands(struct anv_cmd_buffer *cmd_buffer,
uint64_t preprocess_cmd_addr,
uint32_t n_dwords)
{
struct anv_device *device = cmd_buffer->device;
struct anv_shader_internal *generate_kernel;
VkResult ret =
anv_device_get_internal_shader(device,
anv_internal_kernel_variant(
cmd_buffer, DGC_DUMP),
&generate_kernel);
if (ret != VK_SUCCESS) {
anv_batch_set_error(&cmd_buffer->batch, ret);
return;
}
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
0,
"pre gfx cmd dump");
anv_genX(device->info, cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct anv_simple_shader simple_state = {
.device = device,
.cmd_buffer = cmd_buffer,
.dynamic_state_stream = &cmd_buffer->dynamic_state_stream,
.general_state_stream = &cmd_buffer->general_state_stream,
.batch = &cmd_buffer->batch,
.kernel = generate_kernel,
};
anv_genX(device->info, emit_simple_shader_init)(&simple_state);
struct anv_dgc_dump_params *params;
struct anv_state push_data_state =
anv_genX(device->info, simple_shader_alloc_push)(
&simple_state, sizeof(*params));
if (push_data_state.map == NULL)
return;
params = push_data_state.map;
*params = (struct anv_dgc_dump_params) {
.cmd_addr = preprocess_cmd_addr,
.n_dwords = n_dwords,
.call_addr = anv_address_physical(
anv_batch_current_address(&cmd_buffer->batch)),
};
anv_genX(device->info, emit_simple_shader_dispatch)(
&simple_state, 1, push_data_state);
anv_add_pending_pipe_bits(cmd_buffer,
VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR |
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR,
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
0,
"post gfx cmd dump");
anv_genX(device->info, cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}

View file

@ -764,6 +764,13 @@ void genX(CmdExecuteGeneratedCommandsEXT)(
GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 8;
}
if (ANV_DEBUG(DGC_DUMP)) {
anv_cmd_buffer_dump_commands(cmd_buffer,
pGeneratedCommandsInfo->preprocessAddress,
pGeneratedCommandsInfo->maxSequenceCount *
layout->cmd_size / 4);
}
genX(cmd_buffer_flush_gfx)(cmd_buffer);
if (pGeneratedCommandsInfo->sequenceCountAddress != 0) {
@ -921,6 +928,13 @@ void genX(CmdExecuteGeneratedCommandsEXT)(
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
if (ANV_DEBUG(DGC_DUMP)) {
anv_cmd_buffer_dump_commands(cmd_buffer,
pGeneratedCommandsInfo->preprocessAddress,
pGeneratedCommandsInfo->maxSequenceCount *
layout->cmd_size / 4);
}
genX(cmd_buffer_flush_compute_state)(cmd_buffer, indirect_set);
if (cmd_buffer->state.conditional_render_enabled)
@ -1036,6 +1050,13 @@ void genX(CmdExecuteGeneratedCommandsEXT)(
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
}
if (ANV_DEBUG(DGC_DUMP)) {
anv_cmd_buffer_dump_commands(cmd_buffer,
pGeneratedCommandsInfo->preprocessAddress,
pGeneratedCommandsInfo->maxSequenceCount *
layout->cmd_size / 4);
}
if (cmd_buffer->state.conditional_render_enabled)
genX(cmd_emit_conditional_render_predicate)(cmd_buffer);

View file

@ -177,6 +177,15 @@ genX(call_internal_shader)(nir_builder *b, enum anv_internal_kernel_name shader_
load_param(b, 64, struct anv_dgc_cs_params, return_addr),
load_compute_index(b));
return sizeof(struct anv_dgc_cs_params);
case ANV_INTERNAL_KERNEL_DGC_DUMP_COMPUTE:
case ANV_INTERNAL_KERNEL_DGC_DUMP_FRAGMENT:
genX(libanv_dgc_dump)(
b,
load_param(b, 64, struct anv_dgc_dump_params, cmd_addr),
load_param(b, 32, struct anv_dgc_dump_params, n_dwords),
load_param(b, 64, struct anv_dgc_dump_params, call_addr));
return sizeof(struct anv_dgc_dump_params);
#endif /* GFX_VER >= 11 */
#if GFX_VERx10 >= 125