radeonsi: add IB tracing support for debug contexts

This adds trace points to all IBs and the parser prints them and also
prints which trace points were reached (executed) by the CP.
This can help pinpoint a problematic packet, draw call, etc.

Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Marek Olšák 2015-08-19 11:53:25 +02:00
parent 189953ee13
commit 2c14a6d3b1
5 changed files with 105 additions and 16 deletions

View file

@ -142,7 +142,8 @@ static void si_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
si_dump_reg(f, reg + i*4, ib[2+i], ~0);
}
static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
int trace_id)
{
unsigned count = PKT_COUNT_G(ib[0]);
unsigned op = PKT3_IT_OPCODE_G(ib[0]);
@ -232,6 +233,36 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
if (ib[0] == 0xffff1000) {
count = -1; /* One dword NOP. */
break;
} else if (count == 0 && SI_IS_TRACE_POINT(ib[1])) {
unsigned packet_id = SI_GET_TRACE_POINT_ID(ib[1]);
print_spaces(f, INDENT_PKT);
fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
if (trace_id == -1)
break; /* tracing was disabled */
print_spaces(f, INDENT_PKT);
if (packet_id < trace_id)
fprintf(f, COLOR_RED
"This trace point was reached by the CP."
COLOR_RESET "\n");
else if (packet_id == trace_id)
fprintf(f, COLOR_RED
"!!!!! This is the last trace point that "
"was reached by the CP !!!!!"
COLOR_RESET "\n");
else if (packet_id+1 == trace_id)
fprintf(f, COLOR_RED
"!!!!! This is the first trace point that "
"was NOT been reached by the CP !!!!!"
COLOR_RESET "\n");
else
fprintf(f, COLOR_RED
"!!!!! This trace point was NOT reached "
"by the CP !!!!!"
COLOR_RESET "\n");
break;
}
/* fall through, print all dwords */
default:
@ -246,7 +277,17 @@ static uint32_t *si_parse_packet3(FILE *f, uint32_t *ib, int *num_dw)
return ib;
}
static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw)
/**
* Parse and print an IB into a file.
*
* \param f file
* \param ib IB
* \param num_dw size of the IB
* \param chip_class chip class
* \param trace_id the last trace ID that is known to have been reached
* and executed by the CP, typically read from a buffer
*/
static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id)
{
fprintf(f, "------------------ IB begin ------------------\n");
@ -255,7 +296,7 @@ static void si_parse_ib(FILE *f, uint32_t *ib, int num_dw)
switch (type) {
case 3:
ib = si_parse_packet3(f, ib, &num_dw);
ib = si_parse_packet3(f, ib, &num_dw, trace_id);
break;
case 2:
/* type-2 nop */
@ -342,9 +383,27 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
si_dump_shader(sctx->ps_shader, "Fragment", f);
if (sctx->last_ib) {
si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size);
int last_trace_id = -1;
if (sctx->last_trace_buf) {
/* We are expecting that the ddebug pipe has already
* waited for the context, so this buffer should be idle.
* If the GPU is hung, there is no point in waiting for it.
*/
uint32_t *map =
sctx->b.ws->buffer_map(sctx->last_trace_buf->cs_buf,
NULL,
PIPE_TRANSFER_UNSYNCHRONIZED |
PIPE_TRANSFER_READ);
if (map)
last_trace_id = *map;
}
si_parse_ib(f, sctx->last_ib, sctx->last_ib_dw_size,
last_trace_id);
free(sctx->last_ib); /* dump only once */
sctx->last_ib = NULL;
r600_resource_reference(&sctx->last_trace_buf, NULL);
}
fprintf(f, "Done.\n");

View file

@ -89,7 +89,7 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
num_dw += ctx->atoms.s.cache_flush->num_dw;
if (ctx->screen->b.trace_bo)
num_dw += SI_TRACE_CS_DWORDS;
num_dw += SI_TRACE_CS_DWORDS * 2;
/* Flush if there's not enough space. */
if (num_dw > cs->max_dw) {
@ -127,12 +127,17 @@ void si_context_gfx_flush(void *context, unsigned flags,
/* force to keep tiling flags */
flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
if (ctx->trace_buf)
si_trace_emit(ctx);
/* Save the IB for debug contexts. */
if (ctx->is_debug) {
free(ctx->last_ib);
ctx->last_ib_dw_size = cs->cdw;
ctx->last_ib = malloc(cs->cdw * 4);
memcpy(ctx->last_ib, cs->buf, cs->cdw * 4);
r600_resource_reference(&ctx->last_trace_buf, ctx->trace_buf);
r600_resource_reference(&ctx->trace_buf, NULL);
}
/* Flush the CS. */
@ -148,6 +153,23 @@ void si_context_gfx_flush(void *context, unsigned flags,
void si_begin_new_cs(struct si_context *ctx)
{
if (ctx->is_debug) {
uint32_t zero = 0;
/* Create a buffer used for writing trace IDs and initialize it to 0. */
assert(!ctx->trace_buf);
ctx->trace_buf = (struct r600_resource*)
pipe_buffer_create(ctx->b.b.screen, PIPE_BIND_CUSTOM,
PIPE_USAGE_STAGING, 4);
if (ctx->trace_buf)
pipe_buffer_write_nooverlap(&ctx->b.b, &ctx->trace_buf->b.b,
0, sizeof(zero), &zero);
ctx->trace_id = 0;
}
if (ctx->trace_buf)
si_trace_emit(ctx);
/* Flush read caches at the beginning of CS. */
ctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
SI_CONTEXT_INV_TC_L1 |

View file

@ -81,6 +81,8 @@ static void si_destroy_context(struct pipe_context *context)
LLVMDisposeTargetMachine(sctx->tm);
#endif
r600_resource_reference(&sctx->trace_buf, NULL);
r600_resource_reference(&sctx->last_trace_buf, NULL);
free(sctx->last_ib);
FREE(sctx);
}

View file

@ -43,7 +43,7 @@
#define SI_RESTART_INDEX_UNKNOWN INT_MIN
#define SI_NUM_SMOOTH_AA_SAMPLES 8
#define SI_TRACE_CS_DWORDS 6
#define SI_TRACE_CS_DWORDS 7
#define SI_MAX_DRAW_CS_DWORDS \
(/*scratch:*/ 3 + /*derived prim state:*/ 3 + \
@ -81,6 +81,10 @@
SI_CONTEXT_FLUSH_AND_INV_DB | \
SI_CONTEXT_FLUSH_AND_INV_DB_META)
#define SI_ENCODE_TRACE_POINT(id) (0xcafe0000 | ((id) & 0xffff))
#define SI_IS_TRACE_POINT(x) (((x) & 0xcafe0000) == 0xcafe0000)
#define SI_GET_TRACE_POINT_ID(x) ((x) & 0xffff)
struct si_compute;
struct si_screen {
@ -247,6 +251,9 @@ struct si_context {
bool is_debug;
uint32_t *last_ib;
unsigned last_ib_dw_size;
struct r600_resource *last_trace_buf;
struct r600_resource *trace_buf;
unsigned trace_id;
};
/* cik_sdma.c */

View file

@ -835,7 +835,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
si_emit_draw_registers(sctx, info);
si_emit_draw_packets(sctx, info, &ib);
if (sctx->screen->b.trace_bo)
if (sctx->trace_buf)
si_trace_emit(sctx);
/* Workaround for a VGT hang when streamout is enabled.
@ -873,19 +873,18 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
void si_trace_emit(struct si_context *sctx)
{
struct si_screen *sscreen = sctx->screen;
struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs;
uint64_t va;
va = sscreen->b.trace_bo->gpu_address;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sscreen->b.trace_bo,
sctx->trace_id++;
r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, sctx->trace_buf,
RADEON_USAGE_READWRITE, RADEON_PRIO_MIN);
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
PKT3_WRITE_DATA_WR_CONFIRM |
PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME));
radeon_emit(cs, va & 0xFFFFFFFFUL);
radeon_emit(cs, (va >> 32UL) & 0xFFFFFFFFUL);
radeon_emit(cs, cs->cdw);
radeon_emit(cs, sscreen->b.cs_count);
radeon_emit(cs, sctx->trace_buf->gpu_address);
radeon_emit(cs, sctx->trace_buf->gpu_address >> 32);
radeon_emit(cs, sctx->trace_id);
radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
radeon_emit(cs, SI_ENCODE_TRACE_POINT(sctx->trace_id));
}