amd: improve the IB parser, parse more packets

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23687>
This commit is contained in:
Marek Olšák 2023-06-02 14:33:10 -04:00 committed by Marge Bot
parent 95a85d1b37
commit 789a3b24c1
4 changed files with 117 additions and 36 deletions

View file

@ -35,12 +35,14 @@ DEBUG_GET_ONCE_BOOL_OPTION(color, "AMD_COLOR", true);
#define COLOR_GREEN "\033[1;32m"
#define COLOR_YELLOW "\033[1;33m"
#define COLOR_CYAN "\033[1;36m"
#define COLOR_PURPLE "\033[1;35m"
#define O_COLOR_RESET (debug_get_option_color() ? COLOR_RESET : "")
#define O_COLOR_RED (debug_get_option_color() ? COLOR_RED : "")
#define O_COLOR_GREEN (debug_get_option_color() ? COLOR_GREEN : "")
#define O_COLOR_YELLOW (debug_get_option_color() ? COLOR_YELLOW : "")
#define O_COLOR_CYAN (debug_get_option_color() ? COLOR_CYAN : "")
#define O_COLOR_PURPLE (debug_get_option_color() ? COLOR_PURPLE : "")
#define INDENT_PKT 8
@ -84,6 +86,12 @@ static void print_value(FILE *file, uint32_t value, int bits)
}
}
static void print_reserved_dword(FILE *file, uint32_t value)
{
print_spaces(file, INDENT_PKT);
fprintf(file, "(reserved)\n");
}
static void print_named_value(FILE *file, const char *name, uint32_t value, int bits)
{
print_spaces(file, INDENT_PKT);
@ -93,6 +101,15 @@ static void print_named_value(FILE *file, const char *name, uint32_t value, int
print_value(file, value, bits);
}
static void print_string_value(FILE *file, const char *name, const char *value)
{
print_spaces(file, INDENT_PKT);
fprintf(file, "%s%s%s <- ",
O_COLOR_YELLOW, name,
O_COLOR_RESET);
fprintf(file, "%s\n", value);
}
static const struct si_reg *find_register(enum amd_gfx_level gfx_level, enum radeon_family family,
unsigned offset)
{
@ -242,39 +259,65 @@ static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset
unsigned index = reg_dw >> 28;
int i;
if (index != 0) {
print_spaces(f, INDENT_PKT);
fprintf(f, "INDEX = %u\n", index);
}
if (index != 0)
print_named_value(f, "INDEX", index, 32);
for (i = 0; i < count; i++)
ac_dump_reg(f, ib->gfx_level, ib->family, reg + i * 4, ac_ib_get(ib), ~0);
}
static void ac_parse_set_reg_pairs_packed_packet(FILE *f, unsigned count, unsigned reg_base,
struct ac_ib_parser *ib)
{
unsigned reg_offset0 = 0, reg_offset1 = 0;
print_named_value(f, "REG_COUNT", ac_ib_get(ib), 32);
for (unsigned i = 0; i < count; i++) {
if (i % 3 == 0) {
unsigned tmp = ac_ib_get(ib);
reg_offset0 = ((tmp & 0xffff) << 2) + reg_base;
reg_offset1 = ((tmp >> 16) << 2) + reg_base;
} else if (i % 3 == 1) {
ac_dump_reg(f, ib->gfx_level, ib->family, reg_offset0, ac_ib_get(ib), ~0);
} else {
ac_dump_reg(f, ib->gfx_level, ib->family, reg_offset1, ac_ib_get(ib), ~0);
}
}
}
static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
int *current_trace_id)
{
unsigned first_dw = ib->cur_dw;
int count = PKT_COUNT_G(header);
unsigned op = PKT3_IT_OPCODE_G(header);
const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : "";
const char *shader_type = PKT3_SHADER_TYPE_G(header) ? "(shader_type=compute)" : "";
const char *predicated = PKT3_PREDICATE(header) ? "(predicated)" : "";
const char *reset_filter_cam = PKT3_RESET_FILTER_CAM_G(header) ? "(reset_filter_cam)" : "";
int i;
unsigned tmp;
/* Print the name first. */
for (i = 0; i < ARRAY_SIZE(packet3_table); i++)
if (packet3_table[i].op == op)
break;
if (i < ARRAY_SIZE(packet3_table)) {
const char *name = sid_strings + packet3_table[i].name_offset;
const char *pkt_name = i < ARRAY_SIZE(packet3_table) ? sid_strings + packet3_table[i].name_offset
: "UNKNOWN";
const char *color;
if (op == PKT3_SET_CONTEXT_REG || op == PKT3_SET_CONFIG_REG || op == PKT3_SET_UCONFIG_REG ||
op == PKT3_SET_UCONFIG_REG_INDEX || op == PKT3_SET_SH_REG || op == PKT3_SET_SH_REG_INDEX)
fprintf(f, "%s%s%s%s:\n", O_COLOR_CYAN, name, predicate, O_COLOR_RESET);
else
fprintf(f, "%s%s%s%s:\n", O_COLOR_GREEN, name, predicate, O_COLOR_RESET);
} else
fprintf(f, "%sPKT3_UNKNOWN 0x%x%s%s:\n", O_COLOR_RED, op, predicate, O_COLOR_RESET);
if (strstr(pkt_name, "DRAW") || strstr(pkt_name, "DISPATCH"))
color = O_COLOR_PURPLE;
else if (strstr(pkt_name, "SET") == pkt_name && strstr(pkt_name, "REG"))
color = O_COLOR_CYAN;
else if (i >= ARRAY_SIZE(packet3_table))
color = O_COLOR_RED;
else
color = O_COLOR_GREEN;
fprintf(f, "%s%s%s%s%s%s:\n", color, pkt_name, O_COLOR_RESET,
shader_type, predicated, reset_filter_cam);
/* Print the contents. */
switch (op) {
@ -292,25 +335,44 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
case PKT3_SET_SH_REG_INDEX:
ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
break;
case PKT3_SET_CONTEXT_REG_PAIRS_PACKED:
ac_parse_set_reg_pairs_packed_packet(f, count, SI_CONTEXT_REG_OFFSET, ib);
break;
case PKT3_SET_SH_REG_PAIRS_PACKED:
case PKT3_SET_SH_REG_PAIRS_PACKED_N:
ac_parse_set_reg_pairs_packed_packet(f, count, SI_SH_REG_OFFSET, ib);
break;
case PKT3_ACQUIRE_MEM:
if (ib->gfx_level >= GFX11 && G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) {
ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0);
print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
break;
if (ib->gfx_level >= GFX11) {
if (G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) {
ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0);
print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
} else {
print_string_value(f, "ENGINE_SEL", ac_ib_get(ib) & 0x80000000 ? "ME" : "PFP");
print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
}
} else {
tmp = ac_ib_get(ib);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, tmp, 0x7fffffff);
print_string_value(f, "ENGINE_SEL", tmp & 0x80000000 ? "ME" : "PFP");
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
if (ib->gfx_level >= GFX10)
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
}
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
if (ib->gfx_level >= GFX10)
ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
break;
case PKT3_SURFACE_SYNC:
if (ib->gfx_level >= GFX7) {
@ -477,6 +539,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
case PKT3_CLEAR_STATE:
case PKT3_INCREMENT_DE_COUNTER:
case PKT3_PFP_SYNC_ME:
print_reserved_dword(f, ac_ib_get(ib));
break;
case PKT3_NOP:
if (header == PKT3_NOP_PAD) {
@ -512,6 +575,22 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
break;
}
break;
case PKT3_DISPATCH_DIRECT:
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B804_COMPUTE_DIM_X, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B808_COMPUTE_DIM_Y, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B80C_COMPUTE_DIM_Z, ac_ib_get(ib), ~0);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR,
ac_ib_get(ib), ~0);
break;
case PKT3_DISPATCH_INDIRECT:
print_named_value(f, "DATA_OFFSET", ac_ib_get(ib), 32);
ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR,
ac_ib_get(ib), ~0);
break;
case PKT3_SET_BASE:
tmp = ac_ib_get(ib);
print_string_value(f, "BASE_INDEX", tmp == 1 ? "INDIRECT_BASE" : COLOR_RED "UNKNOWN" COLOR_RESET);
break;
}
/* print additional dwords */

View file

@ -293,8 +293,10 @@
#define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF)
#define PKT3_IT_OPCODE_C 0xFFFF00FF
#define PKT3_PREDICATE(x) (((x) >> 0) & 0x1)
#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
#define PKT3_RESET_FILTER_CAM(x) (((unsigned)(x)&0x1) << 2)
#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x) & 0x1) << 1)
#define PKT3_SHADER_TYPE_G(x) (((x) >> 1) & 0x1)
#define PKT3_RESET_FILTER_CAM_S(x) (((unsigned)(x) & 0x1) << 2)
#define PKT3_RESET_FILTER_CAM_G(x) (((unsigned)(x) >> 2) & 0x1)
#define PKT3(op, count, predicate) \
(PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))

View file

@ -7936,7 +7936,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
uint32_t xyz_dim_enable = 1; /* TODO: disable XYZ_DIM when unneeded */
uint32_t mode1_enable = 1; /* legacy fast launch mode */
radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM(1));
radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1));
radeon_emit(cs, 0); /* data_offset */
radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) | S_4C1_DRAW_INDEX_REG(draw_id_reg));
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11)
@ -8036,7 +8036,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer)
uint32_t mode1_en = 1; /* legacy fast launch mode */
uint32_t linear_dispatch_en = cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch;
radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM(1));
radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM_S(1));
radeon_emit(cs, S_4D0_RING_ENTRY_REG(ring_entry_reg) | S_4D0_XYZ_DIM_REG(xyz_dim_reg));
if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11)
radeon_emit(cs, S_4D1_XYZ_DIM_ENABLE(xyz_dim_en) | S_4D1_MODE1_ENABLE(mode1_en) |

View file

@ -179,7 +179,7 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigne
bool filter_cam_workaround =
cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && cmd_buffer->qf == RADV_QUEUE_GENERAL;
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM(filter_cam_workaround));
radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
radeon_emit(cs, value);
}