diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c index b24572e2610..088ab09ac76 100644 --- a/src/amd/common/ac_debug.c +++ b/src/amd/common/ac_debug.c @@ -35,12 +35,14 @@ DEBUG_GET_ONCE_BOOL_OPTION(color, "AMD_COLOR", true); #define COLOR_GREEN "\033[1;32m" #define COLOR_YELLOW "\033[1;33m" #define COLOR_CYAN "\033[1;36m" +#define COLOR_PURPLE "\033[1;35m" #define O_COLOR_RESET (debug_get_option_color() ? COLOR_RESET : "") #define O_COLOR_RED (debug_get_option_color() ? COLOR_RED : "") #define O_COLOR_GREEN (debug_get_option_color() ? COLOR_GREEN : "") #define O_COLOR_YELLOW (debug_get_option_color() ? COLOR_YELLOW : "") #define O_COLOR_CYAN (debug_get_option_color() ? COLOR_CYAN : "") +#define O_COLOR_PURPLE (debug_get_option_color() ? COLOR_PURPLE : "") #define INDENT_PKT 8 @@ -84,6 +86,12 @@ static void print_value(FILE *file, uint32_t value, int bits) } } +static void print_reserved_dword(FILE *file, uint32_t value) +{ + print_spaces(file, INDENT_PKT); + fprintf(file, "(reserved)\n"); +} + static void print_named_value(FILE *file, const char *name, uint32_t value, int bits) { print_spaces(file, INDENT_PKT); @@ -93,6 +101,15 @@ static void print_named_value(FILE *file, const char *name, uint32_t value, int print_value(file, value, bits); } +static void print_string_value(FILE *file, const char *name, const char *value) +{ + print_spaces(file, INDENT_PKT); + fprintf(file, "%s%s%s <- ", + O_COLOR_YELLOW, name, + O_COLOR_RESET); + fprintf(file, "%s\n", value); +} + static const struct si_reg *find_register(enum amd_gfx_level gfx_level, enum radeon_family family, unsigned offset) { @@ -242,39 +259,65 @@ static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset unsigned index = reg_dw >> 28; int i; - if (index != 0) { - print_spaces(f, INDENT_PKT); - fprintf(f, "INDEX = %u\n", index); - } + if (index != 0) + print_named_value(f, "INDEX", index, 32); for (i = 0; i < count; i++) ac_dump_reg(f, ib->gfx_level, ib->family, reg + i * 4, ac_ib_get(ib), ~0); } +static void ac_parse_set_reg_pairs_packed_packet(FILE *f, unsigned count, unsigned reg_base, + struct ac_ib_parser *ib) +{ + unsigned reg_offset0 = 0, reg_offset1 = 0; + + print_named_value(f, "REG_COUNT", ac_ib_get(ib), 32); + + for (unsigned i = 0; i < count; i++) { + if (i % 3 == 0) { + unsigned tmp = ac_ib_get(ib); + reg_offset0 = ((tmp & 0xffff) << 2) + reg_base; + reg_offset1 = ((tmp >> 16) << 2) + reg_base; + } else if (i % 3 == 1) { + ac_dump_reg(f, ib->gfx_level, ib->family, reg_offset0, ac_ib_get(ib), ~0); + } else { + ac_dump_reg(f, ib->gfx_level, ib->family, reg_offset1, ac_ib_get(ib), ~0); + } + } +} + static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, int *current_trace_id) { unsigned first_dw = ib->cur_dw; int count = PKT_COUNT_G(header); unsigned op = PKT3_IT_OPCODE_G(header); - const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : ""; + const char *shader_type = PKT3_SHADER_TYPE_G(header) ? "(shader_type=compute)" : ""; + const char *predicated = PKT3_PREDICATE(header) ? "(predicated)" : ""; + const char *reset_filter_cam = PKT3_RESET_FILTER_CAM_G(header) ? "(reset_filter_cam)" : ""; int i; + unsigned tmp; /* Print the name first. */ for (i = 0; i < ARRAY_SIZE(packet3_table); i++) if (packet3_table[i].op == op) break; - if (i < ARRAY_SIZE(packet3_table)) { - const char *name = sid_strings + packet3_table[i].name_offset; + const char *pkt_name = i < ARRAY_SIZE(packet3_table) ? sid_strings + packet3_table[i].name_offset + : "UNKNOWN"; + const char *color; - if (op == PKT3_SET_CONTEXT_REG || op == PKT3_SET_CONFIG_REG || op == PKT3_SET_UCONFIG_REG || - op == PKT3_SET_UCONFIG_REG_INDEX || op == PKT3_SET_SH_REG || op == PKT3_SET_SH_REG_INDEX) - fprintf(f, "%s%s%s%s:\n", O_COLOR_CYAN, name, predicate, O_COLOR_RESET); - else - fprintf(f, "%s%s%s%s:\n", O_COLOR_GREEN, name, predicate, O_COLOR_RESET); - } else - fprintf(f, "%sPKT3_UNKNOWN 0x%x%s%s:\n", O_COLOR_RED, op, predicate, O_COLOR_RESET); + if (strstr(pkt_name, "DRAW") || strstr(pkt_name, "DISPATCH")) + color = O_COLOR_PURPLE; + else if (strstr(pkt_name, "SET") == pkt_name && strstr(pkt_name, "REG")) + color = O_COLOR_CYAN; + else if (i >= ARRAY_SIZE(packet3_table)) + color = O_COLOR_RED; + else + color = O_COLOR_GREEN; + + fprintf(f, "%s%s%s%s%s%s:\n", color, pkt_name, O_COLOR_RESET, + shader_type, predicated, reset_filter_cam); /* Print the contents. */ switch (op) { @@ -292,25 +335,44 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, case PKT3_SET_SH_REG_INDEX: ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib); break; + case PKT3_SET_CONTEXT_REG_PAIRS_PACKED: + ac_parse_set_reg_pairs_packed_packet(f, count, SI_CONTEXT_REG_OFFSET, ib); + break; + case PKT3_SET_SH_REG_PAIRS_PACKED: + case PKT3_SET_SH_REG_PAIRS_PACKED_N: + ac_parse_set_reg_pairs_packed_packet(f, count, SI_SH_REG_OFFSET, ib); + break; case PKT3_ACQUIRE_MEM: - if (ib->gfx_level >= GFX11 && G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) { - ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0); - print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32); - print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25); - print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32); - print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32); - ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); - break; + if (ib->gfx_level >= GFX11) { + if (G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) { + ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0); + print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32); + print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25); + print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32); + print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32); + ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); + } else { + print_string_value(f, "ENGINE_SEL", ac_ib_get(ib) & 0x80000000 ? "ME" : "PFP"); + print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32); + print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25); + print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32); + print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32); + print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); + ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); + } + } else { + tmp = ac_ib_get(ib); + ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, tmp, 0x7fffffff); + print_string_value(f, "ENGINE_SEL", tmp & 0x80000000 ? "ME" : "PFP"); + ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0); + print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); + if (ib->gfx_level >= GFX10) + ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); } - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); - ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0); - print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); - if (ib->gfx_level >= GFX10) - ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0); break; case PKT3_SURFACE_SYNC: if (ib->gfx_level >= GFX7) { @@ -477,6 +539,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, case PKT3_CLEAR_STATE: case PKT3_INCREMENT_DE_COUNTER: case PKT3_PFP_SYNC_ME: + print_reserved_dword(f, ac_ib_get(ib)); break; case PKT3_NOP: if (header == PKT3_NOP_PAD) { @@ -512,6 +575,22 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, break; } break; + case PKT3_DISPATCH_DIRECT: + ac_dump_reg(f, ib->gfx_level, ib->family, R_00B804_COMPUTE_DIM_X, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_00B808_COMPUTE_DIM_Y, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_00B80C_COMPUTE_DIM_Z, ac_ib_get(ib), ~0); + ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR, + ac_ib_get(ib), ~0); + break; + case PKT3_DISPATCH_INDIRECT: + print_named_value(f, "DATA_OFFSET", ac_ib_get(ib), 32); + ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR, + ac_ib_get(ib), ~0); + break; + case PKT3_SET_BASE: + tmp = ac_ib_get(ib); + print_string_value(f, "BASE_INDEX", tmp == 1 ? "INDIRECT_BASE" : COLOR_RED "UNKNOWN" COLOR_RESET); + break; } /* print additional dwords */ diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h index ac579c5eeb7..e2a82b13e78 100644 --- a/src/amd/common/sid.h +++ b/src/amd/common/sid.h @@ -293,8 +293,10 @@ #define PKT3_IT_OPCODE_G(x) (((x) >> 8) & 0xFF) #define PKT3_IT_OPCODE_C 0xFFFF00FF #define PKT3_PREDICATE(x) (((x) >> 0) & 0x1) -#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1) -#define PKT3_RESET_FILTER_CAM(x) (((unsigned)(x)&0x1) << 2) +#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x) & 0x1) << 1) +#define PKT3_SHADER_TYPE_G(x) (((x) >> 1) & 0x1) +#define PKT3_RESET_FILTER_CAM_S(x) (((unsigned)(x) & 0x1) << 2) +#define PKT3_RESET_FILTER_CAM_G(x) (((unsigned)(x) >> 2) & 0x1) #define PKT3(op, count, predicate) \ (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate)) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 50ef321228e..1d1ad477c02 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -7936,7 +7936,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3 uint32_t xyz_dim_enable = 1; /* TODO: disable XYZ_DIM when unneeded */ uint32_t mode1_enable = 1; /* legacy fast launch mode */ - radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM(1)); + radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1)); radeon_emit(cs, 0); /* data_offset */ radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) | S_4C1_DRAW_INDEX_REG(draw_id_reg)); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) @@ -8036,7 +8036,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer) uint32_t mode1_en = 1; /* legacy fast launch mode */ uint32_t linear_dispatch_en = cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch; - radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM(1)); + radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM_S(1)); radeon_emit(cs, S_4D0_RING_ENTRY_REG(ring_entry_reg) | S_4D0_XYZ_DIM_REG(xyz_dim_reg)); if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) radeon_emit(cs, S_4D1_XYZ_DIM_ENABLE(xyz_dim_en) | S_4D1_MODE1_ENABLE(mode1_en) | diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h index afa7157edd7..f8c69f5ff3b 100644 --- a/src/amd/vulkan/radv_cs.h +++ b/src/amd/vulkan/radv_cs.h @@ -179,7 +179,7 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigne bool filter_cam_workaround = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && cmd_buffer->qf == RADV_QUEUE_GENERAL; - radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM(filter_cam_workaround)); + radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround)); radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2); radeon_emit(cs, value); }