mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 11:48:06 +02:00
radeonsi: write shader asm annotated with wave info into GPU hang reports
Note that the disassembly is written twice - first the unmodified compiler
output and then the wave-annotated output only if there are waves executing
the shader.
Sample output from a real GPU hang most likely caused by image_sample:
The number of active waves = 28
Pixel Shader - annotated disassembly:
s_mov_b64 s[6:7], exec ; BE86017E [PC=0x10f3e3800, off=0, size=4]
s_wqm_b64 exec, exec ; BEFE077E [PC=0x10f3e3804, off=4, size=4]
...
image_sample v[7:9], v[0:1], s[12:19], s[20:23] dmask:0x7 ; F0800700 00A30700 [PC=0x10f3e3a94, off=660, size=8]
s_buffer_load_dword s20, s[0:3], 0x50 ; C0220500 00000050 [PC=0x10f3e3a9c, off=668, size=8]
s_load_dwordx4 s[24:27], s[4:5], 0x170 ; C00A0602 00000170 [PC=0x10f3e3aa4, off=676, size=8]
s_load_dwordx8 s[12:19], s[4:5], 0x140 ; C00E0302 00000140 [PC=0x10f3e3aac, off=684, size=8]
s_buffer_load_dword s11, s[0:3], 0x5c ; C02202C0 0000005C [PC=0x10f3e3ab4, off=692, size=8]
s_buffer_load_dword s21, s[0:3], 0x54 ; C0220540 00000054 [PC=0x10f3e3abc, off=700, size=8]
s_buffer_load_dword s22, s[0:3], 0x58 ; C0220580 00000058 [PC=0x10f3e3ac4, off=708, size=8]
s_waitcnt vmcnt(0) ; BF8C0F70 [PC=0x10f3e3acc, off=716, size=4]
^ SE0 SH0 CU1 SIMD1 WAVE0 EXEC=aaaaaaa555aaaaaa INST32=BF8C0F70
^ SE0 SH0 CU1 SIMD2 WAVE0 EXEC=aaaa85555555552a INST32=BF8C0F70
^ SE0 SH0 CU1 SIMD3 WAVE0 EXEC=000000000000000a INST32=BF8C0F70
^ SE0 SH0 CU6 SIMD1 WAVE0 EXEC=25a5a5aa82aaaaaa INST32=BF8C0F70
^ SE0 SH0 CU6 SIMD3 WAVE0 EXEC=50aaaa8fffa55555 INST32=BF8C0F70
^ SE0 SH0 CU7 SIMD0 WAVE0 EXEC=5554aaaaaaa1a555 INST32=BF8C0F70
^ SE0 SH0 CU7 SIMD0 WAVE1 EXEC=aaaa5555ffffffff INST32=BF8C0F70
^ SE0 SH0 CU7 SIMD1 WAVE0 EXEC=555557aaaaaaaaa5 INST32=BF8C0F70
^ SE0 SH0 CU7 SIMD3 WAVE0 EXEC=5555aaaaaaaaaa85 INST32=BF8C0F70
^ SE1 SH0 CU3 SIMD1 WAVE0 EXEC=aaaaaaaaaaaaaaaa INST32=BF8C0F70
^ SE1 SH0 CU4 SIMD0 WAVE0 EXEC=aaaaaaaa5a5a5a5a INST32=BF8C0F70
^ SE1 SH0 CU4 SIMD1 WAVE0 EXEC=aaaaaaa5a5a5a4a5 INST32=BF8C0F70
^ SE1 SH0 CU4 SIMD2 WAVE0 EXEC=5555555000000000 INST32=BF8C0F70
^ SE1 SH0 CU4 SIMD3 WAVE0 EXEC=aa555554155aaaaa INST32=BF8C0F70
^ SE1 SH0 CU5 SIMD0 WAVE0 EXEC=55ffff55555555aa INST32=BF8C0F70
^ SE1 SH0 CU5 SIMD1 WAVE0 EXEC=555555555aaaaaaa INST32=BF8C0F70
^ SE1 SH0 CU5 SIMD2 WAVE0 EXEC=a0aaaaaaa8555555 INST32=BF8C0F70
^ SE1 SH0 CU5 SIMD3 WAVE0 EXEC=8aaaaaaaaaaaa555 INST32=BF8C0F70
^ SE1 SH0 CU6 SIMD0 WAVE0 EXEC=000000002aaaaaaa INST32=BF8C0F70
^ SE2 SH0 CU1 SIMD0 WAVE0 EXEC=5aaaa5400aaaa15a INST32=BF8C0F70
^ SE2 SH0 CU1 SIMD1 WAVE0 EXEC=00aaaaaaaa5555aa INST32=BF8C0F70
^ SE2 SH0 CU1 SIMD2 WAVE0 EXEC=aa00005555554555 INST32=BF8C0F70
^ SE2 SH0 CU1 SIMD3 WAVE0 EXEC=aaaaaaa000000000 INST32=BF8C0F70
^ SE3 SH0 CU4 SIMD0 WAVE0 EXEC=5555aaaaaaaaaaaa INST32=BF8C0F70
^ SE3 SH0 CU4 SIMD2 WAVE0 EXEC=ffaaaaaaaaaa5555 INST32=BF8C0F70
^ SE3 SH0 CU4 SIMD3 WAVE0 EXEC=aaaa55555555aa00 INST32=BF8C0F70
^ SE3 SH0 CU5 SIMD0 WAVE0 EXEC=00aaaaaaaaaaaa5a INST32=BF8C0F70
^ SE3 SH0 CU5 SIMD1 WAVE0 EXEC=5a555555005555ff INST32=BF8C0F70
v_mul_f32_e32 v7, s6, v7 ; 0A0E0E06 [PC=0x10f3e3ad0, off=720, size=4]
...
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
This commit is contained in:
parent
3de8c5a3c5
commit
28c06b3ceb
3 changed files with 252 additions and 3 deletions
|
|
@ -468,6 +468,254 @@ static void si_dump_descriptors(struct si_context *sctx,
|
|||
num_elements[i], f);
|
||||
}
|
||||
|
||||
struct si_shader_inst {
|
||||
char text[160]; /* one disasm line */
|
||||
unsigned offset; /* instruction offset */
|
||||
unsigned size; /* instruction size = 4 or 8 */
|
||||
};
|
||||
|
||||
/* Split a disassembly string into lines and add them to the array pointed
|
||||
* to by "instructions". */
|
||||
static void si_add_split_disasm(const char *disasm,
|
||||
uint64_t start_addr,
|
||||
unsigned *num,
|
||||
struct si_shader_inst *instructions)
|
||||
{
|
||||
struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
|
||||
char *next;
|
||||
|
||||
while ((next = strchr(disasm, '\n'))) {
|
||||
struct si_shader_inst *inst = &instructions[*num];
|
||||
unsigned len = next - disasm;
|
||||
|
||||
assert(len < ARRAY_SIZE(inst->text));
|
||||
memcpy(inst->text, disasm, len);
|
||||
inst->text[len] = 0;
|
||||
inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
|
||||
|
||||
const char *semicolon = strchr(disasm, ';');
|
||||
assert(semicolon);
|
||||
/* More than 16 chars after ";" means the instruction is 8 bytes long. */
|
||||
inst->size = next - semicolon > 16 ? 8 : 4;
|
||||
|
||||
snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
|
||||
" [PC=0x%"PRIx64", off=%u, size=%u]",
|
||||
start_addr + inst->offset, inst->offset, inst->size);
|
||||
|
||||
last_inst = inst;
|
||||
(*num)++;
|
||||
disasm = next + 1;
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_WAVES_PER_CHIP (64 * 40)
|
||||
|
||||
struct si_wave_info {
|
||||
unsigned se; /* shader engine */
|
||||
unsigned sh; /* shader array */
|
||||
unsigned cu; /* compute unit */
|
||||
unsigned simd;
|
||||
unsigned wave;
|
||||
uint32_t status;
|
||||
uint64_t pc; /* program counter */
|
||||
uint32_t inst_dw0;
|
||||
uint32_t inst_dw1;
|
||||
uint64_t exec;
|
||||
bool matched; /* whether the wave is used by a currently-bound shader */
|
||||
};
|
||||
|
||||
static int compare_wave(const void *p1, const void *p2)
|
||||
{
|
||||
struct si_wave_info *w1 = (struct si_wave_info *)p1;
|
||||
struct si_wave_info *w2 = (struct si_wave_info *)p2;
|
||||
|
||||
/* Sort waves according to PC and then SE, SH, CU, etc. */
|
||||
if (w1->pc < w2->pc)
|
||||
return -1;
|
||||
if (w1->pc > w2->pc)
|
||||
return 1;
|
||||
if (w1->se < w2->se)
|
||||
return -1;
|
||||
if (w1->se > w2->se)
|
||||
return 1;
|
||||
if (w1->sh < w2->sh)
|
||||
return -1;
|
||||
if (w1->sh > w2->sh)
|
||||
return 1;
|
||||
if (w1->cu < w2->cu)
|
||||
return -1;
|
||||
if (w1->cu > w2->cu)
|
||||
return 1;
|
||||
if (w1->simd < w2->simd)
|
||||
return -1;
|
||||
if (w1->simd > w2->simd)
|
||||
return 1;
|
||||
if (w1->wave < w2->wave)
|
||||
return -1;
|
||||
if (w1->wave > w2->wave)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return wave information. "waves" should be a large enough array. */
|
||||
static unsigned si_get_wave_info(struct si_wave_info waves[MAX_WAVES_PER_CHIP])
|
||||
{
|
||||
char line[2000];
|
||||
unsigned num_waves = 0;
|
||||
|
||||
FILE *p = popen("umr -wa", "r");
|
||||
if (!p)
|
||||
return 0;
|
||||
|
||||
if (!fgets(line, sizeof(line), p) ||
|
||||
strncmp(line, "SE", 2) != 0) {
|
||||
pclose(p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (fgets(line, sizeof(line), p)) {
|
||||
struct si_wave_info *w;
|
||||
uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
|
||||
|
||||
assert(num_waves < MAX_WAVES_PER_CHIP);
|
||||
w = &waves[num_waves];
|
||||
|
||||
if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
|
||||
&w->se, &w->sh, &w->cu, &w->simd, &w->wave,
|
||||
&w->status, &pc_hi, &pc_lo, &w->inst_dw0,
|
||||
&w->inst_dw1, &exec_hi, &exec_lo) == 12) {
|
||||
w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
|
||||
w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
|
||||
w->matched = false;
|
||||
num_waves++;
|
||||
}
|
||||
}
|
||||
|
||||
qsort(waves, num_waves, sizeof(struct si_wave_info), compare_wave);
|
||||
|
||||
pclose(p);
|
||||
return num_waves;
|
||||
}
|
||||
|
||||
/* If the shader is being executed, print its asm instructions, and annotate
|
||||
* those that are being executed right now with information about waves that
|
||||
* execute them. This is most useful during a GPU hang.
|
||||
*/
|
||||
static void si_print_annotated_shader(struct si_shader *shader,
|
||||
struct si_wave_info *waves,
|
||||
unsigned num_waves,
|
||||
FILE *f)
|
||||
{
|
||||
if (!shader || !shader->binary.disasm_string)
|
||||
return;
|
||||
|
||||
uint64_t start_addr = shader->bo->gpu_address;
|
||||
uint64_t end_addr = start_addr + shader->bo->b.b.width0;
|
||||
unsigned i;
|
||||
|
||||
/* See if any wave executes the shader. */
|
||||
for (i = 0; i < num_waves; i++) {
|
||||
if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
|
||||
break;
|
||||
}
|
||||
if (i == num_waves)
|
||||
return; /* the shader is not being executed */
|
||||
|
||||
/* Remember the first found wave. The waves are sorted according to PC. */
|
||||
waves = &waves[i];
|
||||
num_waves -= i;
|
||||
|
||||
/* Get the list of instructions.
|
||||
* Buffer size / 4 is the upper bound of the instruction count.
|
||||
*/
|
||||
unsigned num_inst = 0;
|
||||
struct si_shader_inst *instructions =
|
||||
calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
|
||||
|
||||
if (shader->prolog) {
|
||||
si_add_split_disasm(shader->prolog->binary.disasm_string,
|
||||
start_addr, &num_inst, instructions);
|
||||
}
|
||||
si_add_split_disasm(shader->binary.disasm_string,
|
||||
start_addr, &num_inst, instructions);
|
||||
if (shader->epilog) {
|
||||
si_add_split_disasm(shader->epilog->binary.disasm_string,
|
||||
start_addr, &num_inst, instructions);
|
||||
}
|
||||
|
||||
fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
|
||||
si_get_shader_name(shader, shader->selector->type));
|
||||
|
||||
/* Print instructions with annotations. */
|
||||
for (i = 0; i < num_inst; i++) {
|
||||
struct si_shader_inst *inst = &instructions[i];
|
||||
|
||||
fprintf(f, "%s\n", inst->text);
|
||||
|
||||
/* Print which waves execute the instruction right now. */
|
||||
while (num_waves && start_addr + inst->offset == waves->pc) {
|
||||
fprintf(f,
|
||||
" " COLOR_GREEN "^ SE%u SH%u CU%u "
|
||||
"SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
|
||||
waves->se, waves->sh, waves->cu, waves->simd,
|
||||
waves->wave, waves->exec);
|
||||
|
||||
if (inst->size == 4) {
|
||||
fprintf(f, "INST32=%08X" COLOR_RESET "\n",
|
||||
waves->inst_dw0);
|
||||
} else {
|
||||
fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
|
||||
waves->inst_dw0, waves->inst_dw1);
|
||||
}
|
||||
|
||||
waves->matched = true;
|
||||
waves = &waves[1];
|
||||
num_waves--;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(f, "\n\n");
|
||||
free(instructions);
|
||||
}
|
||||
|
||||
static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
|
||||
{
|
||||
struct si_wave_info waves[MAX_WAVES_PER_CHIP];
|
||||
unsigned num_waves = si_get_wave_info(waves);
|
||||
|
||||
fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
|
||||
"\n\n", num_waves);
|
||||
|
||||
si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
|
||||
si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
|
||||
si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
|
||||
si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
|
||||
si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
|
||||
|
||||
/* Print waves executing shaders that are not currently bound. */
|
||||
unsigned i;
|
||||
bool found = false;
|
||||
for (i = 0; i < num_waves; i++) {
|
||||
if (waves[i].matched)
|
||||
continue;
|
||||
|
||||
if (!found) {
|
||||
fprintf(f, COLOR_CYAN
|
||||
"Waves not executing currently-bound shaders:"
|
||||
COLOR_RESET "\n");
|
||||
found = true;
|
||||
}
|
||||
fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
|
||||
" INST=%08X %08X PC=%"PRIx64"\n",
|
||||
waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
|
||||
waves[i].wave, waves[i].exec, waves[i].inst_dw0,
|
||||
waves[i].inst_dw1, waves[i].pc);
|
||||
}
|
||||
if (found)
|
||||
fprintf(f, "\n\n");
|
||||
}
|
||||
|
||||
static void si_dump_command(const char *title, const char *command, FILE *f)
|
||||
{
|
||||
char line[2000];
|
||||
|
|
@ -502,7 +750,8 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
|
|||
si_dump_shader(sctx->screen, &sctx->ps_shader, f);
|
||||
|
||||
if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
|
||||
si_dump_command("Active waves", "umr -wa | column -t", f);
|
||||
si_dump_annotated_shaders(sctx, f);
|
||||
si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
|
||||
si_dump_command("Wave information", "umr -O bits -wa", f);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5918,8 +5918,7 @@ static void si_shader_dump_stats(struct si_screen *sscreen,
|
|||
conf->spilled_vgprs, conf->private_mem_vgprs);
|
||||
}
|
||||
|
||||
static const char *si_get_shader_name(struct si_shader *shader,
|
||||
unsigned processor)
|
||||
const char *si_get_shader_name(struct si_shader *shader, unsigned processor)
|
||||
{
|
||||
switch (processor) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
|
|
|
|||
|
|
@ -576,5 +576,6 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
|
|||
unsigned symbol_offset);
|
||||
unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
|
||||
bool writes_samplemask);
|
||||
const char *si_get_shader_name(struct si_shader *shader, unsigned processor);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue