From b97a6ae5efd6310c4c83f6360c83b2bc123c4dff Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Wed, 21 Jan 2026 15:46:12 +0100 Subject: [PATCH] radeonsi: split shaders/draw code from si_debug to a new file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anything that depends on shaders is now part of si_debug_gfx_compute.c Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/radeonsi/meson.build | 1 + src/gallium/drivers/radeonsi/si_debug.c | 658 ----------------- .../drivers/radeonsi/si_debug_gfx_compute.c | 685 ++++++++++++++++++ src/gallium/drivers/radeonsi/si_pipe.h | 11 +- 4 files changed, 693 insertions(+), 662 deletions(-) create mode 100644 src/gallium/drivers/radeonsi/si_debug_gfx_compute.c diff --git a/src/gallium/drivers/radeonsi/meson.build b/src/gallium/drivers/radeonsi/meson.build index 780f870c7ee..8eb6c011d88 100644 --- a/src/gallium/drivers/radeonsi/meson.build +++ b/src/gallium/drivers/radeonsi/meson.build @@ -33,6 +33,7 @@ files_libradeonsi = files( 'si_cp_reg_shadowing.c', 'si_cp_utils.c', 'si_debug.c', + 'si_debug_gfx_compute.c', 'si_descriptors.c', 'si_fence.c', 'si_get.c', diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index 4d44b701279..20cadcd0b5a 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -19,8 +19,6 @@ static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_cs *saved, FILE *f); -DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) - static enum amd_ip_type si_get_context_ip_type(struct si_context *sctx) { return sctx->is_gfx_queue ? AMD_IP_GFX : AMD_IP_COMPUTE; @@ -83,193 +81,6 @@ void si_destroy_saved_cs(struct si_saved_cs *scs) free(scs); } -static void si_dump_shader(struct si_screen *sscreen, struct si_shader *shader, FILE *f) -{ - if (shader->shader_log) - fwrite(shader->shader_log, shader->shader_log_size, 1, f); - else - si_shader_dump(sscreen, shader, NULL, f, false); - - if (shader->bo && sscreen->options.dump_shader_binary) { - unsigned size = shader->bo->b.b.width0; - fprintf(f, "BO: VA=%" PRIx64 " Size=%u\n", shader->bo->gpu_address, size); - - const char *mapped = sscreen->ws->buffer_map(sscreen->ws, - shader->bo->buf, NULL, - PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_READ | RADEON_MAP_TEMPORARY); - - for (unsigned i = 0; i < size; i += 4) { - fprintf(f, " %4x: %08x\n", i, *(uint32_t *)(mapped + i)); - } - - sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf); - - fprintf(f, "\n"); - } -} - -struct si_log_chunk_shader { - /* The shader destroy code assumes a current context for unlinking of - * PM4 packets etc. - * - * While we should be able to destroy shaders without a context, doing - * so would happen only very rarely and be therefore likely to fail - * just when you're trying to debug something. Let's just remember the - * current context in the chunk. - */ - struct si_context *ctx; - struct si_shader *shader; - - /* For keep-alive reference counts */ - struct si_shader_selector *sel; - struct si_compute *program; -}; - -static void si_log_chunk_shader_destroy(void *data) -{ - struct si_log_chunk_shader *chunk = data; - si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL); - si_compute_reference(&chunk->program, NULL); - FREE(chunk); -} - -static void si_log_chunk_shader_print(void *data, FILE *f) -{ - struct si_log_chunk_shader *chunk = data; - struct si_screen *sscreen = chunk->ctx->screen; - si_dump_shader(sscreen, chunk->shader, f); -} - -static struct u_log_chunk_type si_log_chunk_type_shader = { - .destroy = si_log_chunk_shader_destroy, - .print = si_log_chunk_shader_print, -}; - -static void si_dump_gfx_shader(struct si_context *ctx, const struct si_shader_ctx_state *state, - struct u_log_context *log) -{ - struct si_shader *current = state->current; - - if (!state->cso || !current) - return; - - struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); - chunk->ctx = ctx; - chunk->shader = current; - si_shader_selector_reference(ctx, &chunk->sel, current->selector); - u_log_chunk(log, &si_log_chunk_type_shader, chunk); -} - -static void si_dump_compute_shader(struct si_context *ctx, - struct si_compute *program, - struct u_log_context *log) -{ - if (!program) - return; - - struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); - chunk->ctx = ctx; - chunk->shader = &program->shader; - si_compute_reference(&chunk->program, program); - u_log_chunk(log, &si_log_chunk_type_shader, chunk); -} - -/** - * Shader compiles can be overridden with arbitrary ELF objects by setting - * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] - * - * TODO: key this off some hash - */ -bool si_replace_shader(unsigned num, struct si_shader_binary *binary) -{ - const char *p = debug_get_option_replace_shaders(); - const char *semicolon; - char *copy = NULL; - FILE *f; - long filesize, nread; - bool replaced = false; - - if (!p) - return false; - - while (*p) { - unsigned long i; - char *endp; - i = strtoul(p, &endp, 0); - - p = endp; - if (*p != ':') { - mesa_loge("RADEON_REPLACE_SHADERS formatted badly."); - exit(1); - } - ++p; - - if (i == num) - break; - - p = strchr(p, ';'); - if (!p) - return false; - ++p; - } - if (!*p) - return false; - - semicolon = strchr(p, ';'); - if (semicolon) { - p = copy = strndup(p, semicolon - p); - if (!copy) { - mesa_loge("out of memory"); - return false; - } - } - - mesa_logi("replace shader %u by %s", num, p); - - f = fopen(p, "r"); - if (!f) { - perror("radeonsi: failed to open file"); - goto out_free; - } - - if (fseek(f, 0, SEEK_END) != 0) - goto file_error; - - filesize = ftell(f); - if (filesize < 0) - goto file_error; - - if (fseek(f, 0, SEEK_SET) != 0) - goto file_error; - - binary->code_buffer = MALLOC(filesize); - if (!binary->code_buffer) { - mesa_loge("out of memory"); - goto out_close; - } - - nread = fread((void *)binary->code_buffer, 1, filesize, f); - if (nread != filesize) { - FREE((void *)binary->code_buffer); - binary->code_buffer = NULL; - goto file_error; - } - - binary->type = SI_SHADER_BINARY_ELF; - binary->code_size = nread; - replaced = true; - -out_close: - fclose(f); -out_free: - free(copy); - return replaced; - -file_error: - perror("radeonsi: reading shader"); - goto out_close; -} - /* Parsed IBs are difficult to read without colors. Use "less -R file" to * read them, or use "aha -b -f file" to convert them to html. */ @@ -607,418 +418,6 @@ static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_c " Other buffers can still be allocated there.\n\n"); } -static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log) -{ - struct pipe_framebuffer_state *state = &sctx->framebuffer.state; - struct si_texture *tex; - int i; - - for (i = 0; i < state->nr_cbufs; i++) { - if (!state->cbufs[i].texture) - continue; - - tex = (struct si_texture *)state->cbufs[i].texture; - u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i); - si_print_texture_info(sctx->screen, tex, log); - u_log_printf(log, "\n"); - } - - if (state->zsbuf.texture) { - tex = (struct si_texture *)state->zsbuf.texture; - u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n"); - si_print_texture_info(sctx->screen, tex, log); - u_log_printf(log, "\n"); - } -} - -typedef unsigned (*slot_remap_func)(unsigned); - -struct si_log_chunk_desc_list { - /** Pointer to memory map of buffer where the list is uploader */ - uint32_t *gpu_list; - /** Reference of buffer where the list is uploaded, so that gpu_list - * is kept live. */ - struct si_resource *buf; - - const char *shader_name; - const char *elem_name; - slot_remap_func slot_remap; - enum amd_gfx_level gfx_level; - enum radeon_family family; - unsigned element_dw_size; - unsigned num_elements; - - uint32_t list[0]; -}; - -static void si_log_chunk_desc_list_destroy(void *data) -{ - struct si_log_chunk_desc_list *chunk = data; - si_resource_reference(&chunk->buf, NULL); - FREE(chunk); -} - -static void si_log_chunk_desc_list_print(void *data, FILE *f) -{ - struct si_log_chunk_desc_list *chunk = data; - unsigned sq_img_rsrc_word0 = - chunk->gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; - - for (unsigned i = 0; i < chunk->num_elements; i++) { - unsigned cpu_dw_offset = i * chunk->element_dw_size; - unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size; - const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list"; - uint32_t *cpu_list = chunk->list + cpu_dw_offset; - uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list; - - fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", chunk->shader_name, - chunk->elem_name, i, list_note); - - switch (chunk->element_dw_size) { - case 4: - for (unsigned j = 0; j < 4; j++) - ac_dump_reg(f, chunk->gfx_level, chunk->family, - R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[j], 0xffffffff); - break; - case 8: - for (unsigned j = 0; j < 8; j++) - ac_dump_reg(f, chunk->gfx_level, chunk->family, - sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff); - - fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 4; j++) - ac_dump_reg(f, chunk->gfx_level, chunk->family, - R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j], 0xffffffff); - break; - case 16: - for (unsigned j = 0; j < 8; j++) - ac_dump_reg(f, chunk->gfx_level, chunk->family, - sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff); - - fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 4; j++) - ac_dump_reg(f, chunk->gfx_level, chunk->family, - R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j], 0xffffffff); - - fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 8; j++) - ac_dump_reg(f, chunk->gfx_level, chunk->family, - sq_img_rsrc_word0 + j * 4, gpu_list[8 + j], 0xffffffff); - - fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); - for (unsigned j = 0; j < 4; j++) - ac_dump_reg(f, chunk->gfx_level, chunk->family, - R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, gpu_list[12 + j], 0xffffffff); - break; - } - - if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) { - fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!" COLOR_RESET "\n"); - } - - fprintf(f, "\n"); - } -} - -static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = { - .destroy = si_log_chunk_desc_list_destroy, - .print = si_log_chunk_desc_list_print, -}; - -static void si_dump_descriptor_list(struct si_screen *screen, struct si_descriptors *desc, - const char *shader_name, const char *elem_name, - unsigned element_dw_size, unsigned num_elements, - slot_remap_func slot_remap, struct u_log_context *log) -{ - if (!desc->list) - return; - - /* In some cases, the caller doesn't know how many elements are really - * uploaded. Reduce num_elements to fit in the range of active slots. */ - unsigned active_range_dw_begin = desc->first_active_slot * desc->element_dw_size; - unsigned active_range_dw_end = - active_range_dw_begin + desc->num_active_slots * desc->element_dw_size; - - while (num_elements > 0) { - int i = slot_remap(num_elements - 1); - unsigned dw_begin = i * element_dw_size; - unsigned dw_end = dw_begin + element_dw_size; - - if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end) - break; - - num_elements--; - } - - struct si_log_chunk_desc_list *chunk = - CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list, 4 * (size_t)element_dw_size * num_elements); - chunk->shader_name = shader_name; - chunk->elem_name = elem_name; - chunk->element_dw_size = element_dw_size; - chunk->num_elements = num_elements; - chunk->slot_remap = slot_remap; - chunk->gfx_level = screen->info.gfx_level; - chunk->family = screen->info.family; - - si_resource_reference(&chunk->buf, desc->buffer); - chunk->gpu_list = desc->gpu_list; - - for (unsigned i = 0; i < num_elements; ++i) { - memcpy(&chunk->list[i * element_dw_size], &desc->list[slot_remap(i) * element_dw_size], - 4 * element_dw_size); - } - - u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk); -} - -static unsigned si_identity(unsigned slot) -{ - return slot; -} - -static void si_dump_descriptors(struct si_context *sctx, mesa_shader_stage stage, - const struct si_shader_info *info, struct u_log_context *log) -{ - struct si_descriptors *descs = - &sctx->descriptors[SI_DESCS_FIRST_SHADER + stage * SI_NUM_SHADER_DESCS]; - static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"}; - const char *name = shader_name[stage]; - unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers; - unsigned enabled_images; - - if (info) { - enabled_constbuf = BITFIELD_MASK(info->base.num_ubos); - enabled_shaderbuf = BITFIELD_MASK(info->base.num_ssbos); - enabled_samplers = info->base.textures_used; - enabled_images = BITFIELD_MASK(info->base.num_images); - } else { - enabled_constbuf = - sctx->const_and_shader_buffers[stage].enabled_mask >> SI_NUM_SHADER_BUFFERS; - enabled_shaderbuf = 0; - for (int i = 0; i < SI_NUM_SHADER_BUFFERS; i++) { - enabled_shaderbuf |= - (sctx->const_and_shader_buffers[stage].enabled_mask & - 1llu << (SI_NUM_SHADER_BUFFERS - i - 1)) << i; - } - enabled_samplers = sctx->samplers[stage].enabled_mask; - enabled_images = sctx->images[stage].enabled_mask; - } - - si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name, - " - Constant buffer", 4, util_last_bit(enabled_constbuf), - si_get_constbuf_slot, log); - si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name, - " - Shader buffer", 4, util_last_bit(enabled_shaderbuf), - si_get_shaderbuf_slot, log); - si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name, - " - Sampler", 16, util_last_bit(enabled_samplers), si_get_sampler_slot, - log); - si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name, - " - Image", 8, util_last_bit(enabled_images), si_get_image_slot, log); -} - -static void si_dump_gfx_descriptors(struct si_context *sctx, - const struct si_shader_ctx_state *state, - struct u_log_context *log) -{ - if (!state->cso || !state->current) - return; - - si_dump_descriptors(sctx, state->cso->stage, &state->cso->info, log); -} - -static void si_dump_compute_descriptors(struct si_context *sctx, - const struct si_compute *program, - struct u_log_context *log) -{ - if (!program) - return; - - si_dump_descriptors(sctx, program->sel.stage, NULL, log); -} - -struct si_shader_inst { - const char *text; /* start of disassembly for this instruction */ - unsigned textlen; - unsigned size; /* instruction size = 4 or 8 */ - uint64_t addr; /* instruction address */ -}; - -/** - * Open the given \p binary as \p rtld_binary and split the contained - * disassembly string into instructions and add them to the array - * pointed to by \p instructions, which must be sufficiently large. - * - * Labels are considered to be part of the following instruction. - * - * The caller must keep \p rtld_binary alive as long as \p instructions are - * used and then close it afterwards. - */ -static void si_add_split_disasm(struct si_screen *screen, struct ac_rtld_binary *rtld_binary, - struct si_shader_binary *binary, uint64_t *addr, unsigned *num, - struct si_shader_inst *instructions, - mesa_shader_stage stage, unsigned wave_size) -{ - if (!ac_rtld_open(rtld_binary, (struct ac_rtld_open_info){ - .info = &screen->info, - .shader_type = stage, - .wave_size = wave_size, - .num_parts = 1, - .elf_ptrs = &binary->code_buffer, - .elf_sizes = &binary->code_size})) - return; - - const char *disasm; - size_t nbytes; - if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm", &disasm, &nbytes)) - return; - - const char *end = disasm + nbytes; - while (disasm < end) { - const char *semicolon = memchr(disasm, ';', end - disasm); - if (!semicolon) - break; - - struct si_shader_inst *inst = &instructions[(*num)++]; - const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1); - if (!inst_end) - inst_end = end; - - inst->text = disasm; - inst->textlen = inst_end - disasm; - - inst->addr = *addr; - /* More than 16 chars after ";" means the instruction is 8 bytes long. */ - inst->size = inst_end - semicolon > 16 ? 8 : 4; - *addr += inst->size; - - if (inst_end == end) - break; - disasm = inst_end + 1; - } -} - -/* If the shader is being executed, print its asm instructions, and annotate - * those that are being executed right now with information about waves that - * execute them. This is most useful during a GPU hang. - */ -static void si_print_annotated_shader(struct si_shader *shader, struct ac_wave_info *waves, - unsigned num_waves, FILE *f) -{ - if (!shader) - return; - - struct si_screen *screen = shader->selector->screen; - mesa_shader_stage stage = shader->selector->stage; - uint64_t start_addr = shader->bo->gpu_address; - uint64_t end_addr = start_addr + shader->bo->b.b.width0; - unsigned i; - - /* See if any wave executes the shader. */ - for (i = 0; i < num_waves; i++) { - if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) - break; - } - if (i == num_waves) - return; /* the shader is not being executed */ - - /* Remember the first found wave. The waves are sorted according to PC. */ - waves = &waves[i]; - num_waves -= i; - - /* Get the list of instructions. - * Buffer size / 4 is the upper bound of the instruction count. - */ - unsigned num_inst = 0; - uint64_t inst_addr = start_addr; - struct ac_rtld_binary rtld_binaries[5] = {}; - struct si_shader_inst *instructions = - calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); - - if (shader->prolog) { - si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary, &inst_addr, &num_inst, - instructions, stage, shader->wave_size); - } - if (shader->previous_stage) { - si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary, &inst_addr, - &num_inst, instructions, stage, shader->wave_size); - } - si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary, &inst_addr, &num_inst, - instructions, stage, shader->wave_size); - if (shader->epilog) { - si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary, &inst_addr, &num_inst, - instructions, stage, shader->wave_size); - } - - fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", - si_get_shader_name(shader)); - - /* Print instructions with annotations. */ - for (i = 0; i < num_inst; i++) { - struct si_shader_inst *inst = &instructions[i]; - - fprintf(f, "%.*s [PC=0x%" PRIx64 ", size=%u]\n", inst->textlen, inst->text, inst->addr, - inst->size); - - /* Print which waves execute the instruction right now. */ - while (num_waves && inst->addr == waves->pc) { - fprintf(f, - " " COLOR_GREEN "^ SE%u SH%u CU%u " - "SIMD%u WAVE%u EXEC=%016" PRIx64 " ", - waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); - - if (inst->size == 4) { - fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); - } else { - fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); - } - - waves->matched = true; - waves = &waves[1]; - num_waves--; - } - } - - fprintf(f, "\n\n"); - free(instructions); - for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i) - ac_rtld_close(&rtld_binaries[i]); -} - -static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f) -{ - struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; - unsigned num_waves = ac_get_wave_info(sctx->gfx_level, &sctx->screen->info, NULL, waves); - - fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); - - si_print_annotated_shader(sctx->shader.vs.current, waves, num_waves, f); - si_print_annotated_shader(sctx->shader.tcs.current, waves, num_waves, f); - si_print_annotated_shader(sctx->shader.tes.current, waves, num_waves, f); - si_print_annotated_shader(sctx->shader.gs.current, waves, num_waves, f); - si_print_annotated_shader(sctx->shader.ps.current, waves, num_waves, f); - - /* Print waves executing shaders that are not currently bound. */ - unsigned i; - bool found = false; - for (i = 0; i < num_waves; i++) { - if (waves[i].matched) - continue; - - if (!found) { - fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); - found = true; - } - fprintf(f, - " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 - "\n", - waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, - waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); - } - if (found) - fprintf(f, "\n\n"); -} - static void si_dump_command(const char *title, const char *command, FILE *f) { char line[2000]; @@ -1050,42 +449,6 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flag } } -void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) -{ - if (!log) - return; - - si_dump_framebuffer(sctx, log); - - si_dump_compute_shader(sctx, sctx->ts_shader_state.program, log); - si_dump_gfx_shader(sctx, &sctx->ms_shader_state, log); - si_dump_gfx_shader(sctx, &sctx->shader.vs, log); - si_dump_gfx_shader(sctx, &sctx->shader.tcs, log); - si_dump_gfx_shader(sctx, &sctx->shader.tes, log); - si_dump_gfx_shader(sctx, &sctx->shader.gs, log); - si_dump_gfx_shader(sctx, &sctx->shader.ps, log); - - si_dump_descriptor_list(sctx->screen, &sctx->descriptors[SI_DESCS_INTERNAL], "", "RW buffers", - 4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity, - log); - si_dump_compute_descriptors(sctx, sctx->ts_shader_state.program, log); - si_dump_gfx_descriptors(sctx, &sctx->ms_shader_state, log); - si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log); - si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log); - si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log); - si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log); - si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log); -} - -void si_log_compute_state(struct si_context *sctx, struct u_log_context *log) -{ - if (!log) - return; - - si_dump_compute_shader(sctx, sctx->cs_shader_state.program, log); - si_dump_compute_descriptors(sctx, sctx->cs_shader_state.program, log); -} - void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved) { struct pipe_screen *screen = sctx->b.screen; @@ -1136,27 +499,6 @@ void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved) exit(0); } -void si_gather_context_rolls(struct si_context *sctx) -{ - struct radeon_cmdbuf *cs = &sctx->gfx_cs; - uint32_t **ibs = alloca(sizeof(ibs[0]) * (cs->num_prev + 1)); - uint32_t *ib_dw_sizes = alloca(sizeof(ib_dw_sizes[0]) * (cs->num_prev + 1)); - - for (unsigned i = 0; i < cs->num_prev; i++) { - struct ac_cmdbuf *chunk = &cs->prev[i]; - - ibs[i] = chunk->buf; - ib_dw_sizes[i] = chunk->cdw; - } - - ibs[cs->num_prev] = cs->current.buf; - ib_dw_sizes[cs->num_prev] = cs->current.cdw; - - FILE *f = fopen(sctx->screen->context_roll_log_filename, "a"); - ac_gather_context_rolls(f, ibs, ib_dw_sizes, cs->num_prev + 1, NULL, &sctx->screen->info); - fclose(f); -} - void si_init_debug_functions(struct si_context *sctx) { sctx->b.dump_debug_state = si_dump_debug_state; diff --git a/src/gallium/drivers/radeonsi/si_debug_gfx_compute.c b/src/gallium/drivers/radeonsi/si_debug_gfx_compute.c new file mode 100644 index 00000000000..347aa4b85a9 --- /dev/null +++ b/src/gallium/drivers/radeonsi/si_debug_gfx_compute.c @@ -0,0 +1,685 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "ac_debug.h" +#include "ac_rtld.h" +#include "driver_ddebug/dd_util.h" +#include "si_pipe.h" +#include "sid.h" +#include "sid_tables.h" +#include "tgsi/tgsi_from_mesa.h" +#include "util/u_dump.h" +#include "util/u_log.h" +#include "util/u_memory.h" +#include "util/u_process.h" +#include "util/u_string.h" + +DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) + +static void si_dump_shader(struct si_screen *sscreen, struct si_shader *shader, FILE *f) +{ + if (shader->shader_log) + fwrite(shader->shader_log, shader->shader_log_size, 1, f); + else + si_shader_dump(sscreen, shader, NULL, f, false); + + if (shader->bo && sscreen->options.dump_shader_binary) { + unsigned size = shader->bo->b.b.width0; + fprintf(f, "BO: VA=%" PRIx64 " Size=%u\n", shader->bo->gpu_address, size); + + const char *mapped = sscreen->ws->buffer_map(sscreen->ws, + shader->bo->buf, NULL, + PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_READ | RADEON_MAP_TEMPORARY); + + for (unsigned i = 0; i < size; i += 4) { + fprintf(f, " %4x: %08x\n", i, *(uint32_t *)(mapped + i)); + } + + sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf); + + fprintf(f, "\n"); + } +} + +struct si_log_chunk_shader { + /* The shader destroy code assumes a current context for unlinking of + * PM4 packets etc. + * + * While we should be able to destroy shaders without a context, doing + * so would happen only very rarely and be therefore likely to fail + * just when you're trying to debug something. Let's just remember the + * current context in the chunk. + */ + struct si_context *ctx; + struct si_shader *shader; + + /* For keep-alive reference counts */ + struct si_shader_selector *sel; + struct si_compute *program; +}; + +static void si_log_chunk_shader_destroy(void *data) +{ + struct si_log_chunk_shader *chunk = data; + si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL); + si_compute_reference(&chunk->program, NULL); + FREE(chunk); +} + +static void si_log_chunk_shader_print(void *data, FILE *f) +{ + struct si_log_chunk_shader *chunk = data; + struct si_screen *sscreen = chunk->ctx->screen; + si_dump_shader(sscreen, chunk->shader, f); +} + +static struct u_log_chunk_type si_log_chunk_type_shader = { + .destroy = si_log_chunk_shader_destroy, + .print = si_log_chunk_shader_print, +}; + +static void si_dump_gfx_shader(struct si_context *ctx, const struct si_shader_ctx_state *state, + struct u_log_context *log) +{ + struct si_shader *current = state->current; + + if (!state->cso || !current) + return; + + struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); + chunk->ctx = ctx; + chunk->shader = current; + si_shader_selector_reference(ctx, &chunk->sel, current->selector); + u_log_chunk(log, &si_log_chunk_type_shader, chunk); +} + +static void si_dump_compute_shader(struct si_context *ctx, + struct si_compute *program, + struct u_log_context *log) +{ + if (!program) + return; + + struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); + chunk->ctx = ctx; + chunk->shader = &program->shader; + si_compute_reference(&chunk->program, program); + u_log_chunk(log, &si_log_chunk_type_shader, chunk); +} + +/** + * Shader compiles can be overridden with arbitrary ELF objects by setting + * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] + * + * TODO: key this off some hash + */ +bool si_replace_shader(unsigned num, struct si_shader_binary *binary) +{ + const char *p = debug_get_option_replace_shaders(); + const char *semicolon; + char *copy = NULL; + FILE *f; + long filesize, nread; + bool replaced = false; + + if (!p) + return false; + + while (*p) { + unsigned long i; + char *endp; + i = strtoul(p, &endp, 0); + + p = endp; + if (*p != ':') { + mesa_loge("RADEON_REPLACE_SHADERS formatted badly."); + exit(1); + } + ++p; + + if (i == num) + break; + + p = strchr(p, ';'); + if (!p) + return false; + ++p; + } + if (!*p) + return false; + + semicolon = strchr(p, ';'); + if (semicolon) { + p = copy = strndup(p, semicolon - p); + if (!copy) { + mesa_loge("out of memory"); + return false; + } + } + + mesa_logi("replace shader %u by %s", num, p); + + f = fopen(p, "r"); + if (!f) { + perror("radeonsi: failed to open file"); + goto out_free; + } + + if (fseek(f, 0, SEEK_END) != 0) + goto file_error; + + filesize = ftell(f); + if (filesize < 0) + goto file_error; + + if (fseek(f, 0, SEEK_SET) != 0) + goto file_error; + + binary->code_buffer = MALLOC(filesize); + if (!binary->code_buffer) { + mesa_loge("out of memory"); + goto out_close; + } + + nread = fread((void *)binary->code_buffer, 1, filesize, f); + if (nread != filesize) { + FREE((void *)binary->code_buffer); + binary->code_buffer = NULL; + goto file_error; + } + + binary->type = SI_SHADER_BINARY_ELF; + binary->code_size = nread; + replaced = true; + +out_close: + fclose(f); +out_free: + free(copy); + return replaced; + +file_error: + perror("radeonsi: reading shader"); + goto out_close; +} + +/* Parsed IBs are difficult to read without colors. Use "less -R file" to + * read them, or use "aha -b -f file" to convert them to html. + */ +#define COLOR_RESET "\033[0m" +#define COLOR_RED "\033[31m" +#define COLOR_GREEN "\033[1;32m" +#define COLOR_YELLOW "\033[1;33m" +#define COLOR_CYAN "\033[1;36m" + +static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log) +{ + struct pipe_framebuffer_state *state = &sctx->framebuffer.state; + struct si_texture *tex; + int i; + + for (i = 0; i < state->nr_cbufs; i++) { + if (!state->cbufs[i].texture) + continue; + + tex = (struct si_texture *)state->cbufs[i].texture; + u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i); + si_print_texture_info(sctx->screen, tex, log); + u_log_printf(log, "\n"); + } + + if (state->zsbuf.texture) { + tex = (struct si_texture *)state->zsbuf.texture; + u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n"); + si_print_texture_info(sctx->screen, tex, log); + u_log_printf(log, "\n"); + } +} + +typedef unsigned (*slot_remap_func)(unsigned); + +struct si_log_chunk_desc_list { + /** Pointer to memory map of buffer where the list is uploader */ + uint32_t *gpu_list; + /** Reference of buffer where the list is uploaded, so that gpu_list + * is kept live. */ + struct si_resource *buf; + + const char *shader_name; + const char *elem_name; + slot_remap_func slot_remap; + enum amd_gfx_level gfx_level; + enum radeon_family family; + unsigned element_dw_size; + unsigned num_elements; + + uint32_t list[0]; +}; + +static void si_log_chunk_desc_list_destroy(void *data) +{ + struct si_log_chunk_desc_list *chunk = data; + si_resource_reference(&chunk->buf, NULL); + FREE(chunk); +} + +static void si_log_chunk_desc_list_print(void *data, FILE *f) +{ + struct si_log_chunk_desc_list *chunk = data; + unsigned sq_img_rsrc_word0 = + chunk->gfx_level >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; + + for (unsigned i = 0; i < chunk->num_elements; i++) { + unsigned cpu_dw_offset = i * chunk->element_dw_size; + unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size; + const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list"; + uint32_t *cpu_list = chunk->list + cpu_dw_offset; + uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list; + + fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", chunk->shader_name, + chunk->elem_name, i, list_note); + + switch (chunk->element_dw_size) { + case 4: + for (unsigned j = 0; j < 4; j++) + ac_dump_reg(f, chunk->gfx_level, chunk->family, + R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[j], 0xffffffff); + break; + case 8: + for (unsigned j = 0; j < 8; j++) + ac_dump_reg(f, chunk->gfx_level, chunk->family, + sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff); + + fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 4; j++) + ac_dump_reg(f, chunk->gfx_level, chunk->family, + R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j], 0xffffffff); + break; + case 16: + for (unsigned j = 0; j < 8; j++) + ac_dump_reg(f, chunk->gfx_level, chunk->family, + sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff); + + fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 4; j++) + ac_dump_reg(f, chunk->gfx_level, chunk->family, + R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j], 0xffffffff); + + fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 8; j++) + ac_dump_reg(f, chunk->gfx_level, chunk->family, + sq_img_rsrc_word0 + j * 4, gpu_list[8 + j], 0xffffffff); + + fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); + for (unsigned j = 0; j < 4; j++) + ac_dump_reg(f, chunk->gfx_level, chunk->family, + R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, gpu_list[12 + j], 0xffffffff); + break; + } + + if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) { + fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!" COLOR_RESET "\n"); + } + + fprintf(f, "\n"); + } +} + +static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = { + .destroy = si_log_chunk_desc_list_destroy, + .print = si_log_chunk_desc_list_print, +}; + +static void si_dump_descriptor_list(struct si_screen *screen, struct si_descriptors *desc, + const char *shader_name, const char *elem_name, + unsigned element_dw_size, unsigned num_elements, + slot_remap_func slot_remap, struct u_log_context *log) +{ + if (!desc->list) + return; + + /* In some cases, the caller doesn't know how many elements are really + * uploaded. Reduce num_elements to fit in the range of active slots. */ + unsigned active_range_dw_begin = desc->first_active_slot * desc->element_dw_size; + unsigned active_range_dw_end = + active_range_dw_begin + desc->num_active_slots * desc->element_dw_size; + + while (num_elements > 0) { + int i = slot_remap(num_elements - 1); + unsigned dw_begin = i * element_dw_size; + unsigned dw_end = dw_begin + element_dw_size; + + if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end) + break; + + num_elements--; + } + + struct si_log_chunk_desc_list *chunk = + CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list, 4 * (size_t)element_dw_size * num_elements); + chunk->shader_name = shader_name; + chunk->elem_name = elem_name; + chunk->element_dw_size = element_dw_size; + chunk->num_elements = num_elements; + chunk->slot_remap = slot_remap; + chunk->gfx_level = screen->info.gfx_level; + chunk->family = screen->info.family; + + si_resource_reference(&chunk->buf, desc->buffer); + chunk->gpu_list = desc->gpu_list; + + for (unsigned i = 0; i < num_elements; ++i) { + memcpy(&chunk->list[i * element_dw_size], &desc->list[slot_remap(i) * element_dw_size], + 4 * element_dw_size); + } + + u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk); +} + +static unsigned si_identity(unsigned slot) +{ + return slot; +} + +static void si_dump_descriptors(struct si_context *sctx, mesa_shader_stage stage, + const struct si_shader_info *info, struct u_log_context *log) +{ + struct si_descriptors *descs = + &sctx->descriptors[SI_DESCS_FIRST_SHADER + stage * SI_NUM_SHADER_DESCS]; + static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"}; + const char *name = shader_name[stage]; + unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers; + unsigned enabled_images; + + if (info) { + enabled_constbuf = BITFIELD_MASK(info->base.num_ubos); + enabled_shaderbuf = BITFIELD_MASK(info->base.num_ssbos); + enabled_samplers = info->base.textures_used; + enabled_images = BITFIELD_MASK(info->base.num_images); + } else { + enabled_constbuf = + sctx->const_and_shader_buffers[stage].enabled_mask >> SI_NUM_SHADER_BUFFERS; + enabled_shaderbuf = 0; + for (int i = 0; i < SI_NUM_SHADER_BUFFERS; i++) { + enabled_shaderbuf |= + (sctx->const_and_shader_buffers[stage].enabled_mask & + 1llu << (SI_NUM_SHADER_BUFFERS - i - 1)) << i; + } + enabled_samplers = sctx->samplers[stage].enabled_mask; + enabled_images = sctx->images[stage].enabled_mask; + } + + si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name, + " - Constant buffer", 4, util_last_bit(enabled_constbuf), + si_get_constbuf_slot, log); + si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name, + " - Shader buffer", 4, util_last_bit(enabled_shaderbuf), + si_get_shaderbuf_slot, log); + si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name, + " - Sampler", 16, util_last_bit(enabled_samplers), si_get_sampler_slot, + log); + si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name, + " - Image", 8, util_last_bit(enabled_images), si_get_image_slot, log); +} + +static void si_dump_gfx_descriptors(struct si_context *sctx, + const struct si_shader_ctx_state *state, + struct u_log_context *log) +{ + if (!state->cso || !state->current) + return; + + si_dump_descriptors(sctx, state->cso->stage, &state->cso->info, log); +} + +static void si_dump_compute_descriptors(struct si_context *sctx, + const struct si_compute *program, + struct u_log_context *log) +{ + if (!program) + return; + + si_dump_descriptors(sctx, program->sel.stage, NULL, log); +} + +struct si_shader_inst { + const char *text; /* start of disassembly for this instruction */ + unsigned textlen; + unsigned size; /* instruction size = 4 or 8 */ + uint64_t addr; /* instruction address */ +}; + +/** + * Open the given \p binary as \p rtld_binary and split the contained + * disassembly string into instructions and add them to the array + * pointed to by \p instructions, which must be sufficiently large. + * + * Labels are considered to be part of the following instruction. + * + * The caller must keep \p rtld_binary alive as long as \p instructions are + * used and then close it afterwards. + */ +static void si_add_split_disasm(struct si_screen *screen, struct ac_rtld_binary *rtld_binary, + struct si_shader_binary *binary, uint64_t *addr, unsigned *num, + struct si_shader_inst *instructions, + mesa_shader_stage stage, unsigned wave_size) +{ + if (!ac_rtld_open(rtld_binary, (struct ac_rtld_open_info){ + .info = &screen->info, + .shader_type = stage, + .wave_size = wave_size, + .num_parts = 1, + .elf_ptrs = &binary->code_buffer, + .elf_sizes = &binary->code_size})) + return; + + const char *disasm; + size_t nbytes; + if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm", &disasm, &nbytes)) + return; + + const char *end = disasm + nbytes; + while (disasm < end) { + const char *semicolon = memchr(disasm, ';', end - disasm); + if (!semicolon) + break; + + struct si_shader_inst *inst = &instructions[(*num)++]; + const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1); + if (!inst_end) + inst_end = end; + + inst->text = disasm; + inst->textlen = inst_end - disasm; + + inst->addr = *addr; + /* More than 16 chars after ";" means the instruction is 8 bytes long. */ + inst->size = inst_end - semicolon > 16 ? 8 : 4; + *addr += inst->size; + + if (inst_end == end) + break; + disasm = inst_end + 1; + } +} + +/* If the shader is being executed, print its asm instructions, and annotate + * those that are being executed right now with information about waves that + * execute them. This is most useful during a GPU hang. + */ +static void si_print_annotated_shader(struct si_shader *shader, struct ac_wave_info *waves, + unsigned num_waves, FILE *f) +{ + if (!shader) + return; + + struct si_screen *screen = shader->selector->screen; + mesa_shader_stage stage = shader->selector->stage; + uint64_t start_addr = shader->bo->gpu_address; + uint64_t end_addr = start_addr + shader->bo->b.b.width0; + unsigned i; + + /* See if any wave executes the shader. */ + for (i = 0; i < num_waves; i++) { + if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) + break; + } + if (i == num_waves) + return; /* the shader is not being executed */ + + /* Remember the first found wave. The waves are sorted according to PC. */ + waves = &waves[i]; + num_waves -= i; + + /* Get the list of instructions. + * Buffer size / 4 is the upper bound of the instruction count. + */ + unsigned num_inst = 0; + uint64_t inst_addr = start_addr; + struct ac_rtld_binary rtld_binaries[5] = {}; + struct si_shader_inst *instructions = + calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); + + if (shader->prolog) { + si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary, &inst_addr, &num_inst, + instructions, stage, shader->wave_size); + } + if (shader->previous_stage) { + si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary, &inst_addr, + &num_inst, instructions, stage, shader->wave_size); + } + si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary, &inst_addr, &num_inst, + instructions, stage, shader->wave_size); + if (shader->epilog) { + si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary, &inst_addr, &num_inst, + instructions, stage, shader->wave_size); + } + + fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", + si_get_shader_name(shader)); + + /* Print instructions with annotations. */ + for (i = 0; i < num_inst; i++) { + struct si_shader_inst *inst = &instructions[i]; + + fprintf(f, "%.*s [PC=0x%" PRIx64 ", size=%u]\n", inst->textlen, inst->text, inst->addr, + inst->size); + + /* Print which waves execute the instruction right now. */ + while (num_waves && inst->addr == waves->pc) { + fprintf(f, + " " COLOR_GREEN "^ SE%u SH%u CU%u " + "SIMD%u WAVE%u EXEC=%016" PRIx64 " ", + waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); + + if (inst->size == 4) { + fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); + } else { + fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); + } + + waves->matched = true; + waves = &waves[1]; + num_waves--; + } + } + + fprintf(f, "\n\n"); + free(instructions); + for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i) + ac_rtld_close(&rtld_binaries[i]); +} + +void si_dump_annotated_shaders(struct si_context *sctx, FILE *f) +{ + struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; + unsigned num_waves = ac_get_wave_info(sctx->gfx_level, &sctx->screen->info, NULL, waves); + + fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); + + si_print_annotated_shader(sctx->shader.vs.current, waves, num_waves, f); + si_print_annotated_shader(sctx->shader.tcs.current, waves, num_waves, f); + si_print_annotated_shader(sctx->shader.tes.current, waves, num_waves, f); + si_print_annotated_shader(sctx->shader.gs.current, waves, num_waves, f); + si_print_annotated_shader(sctx->shader.ps.current, waves, num_waves, f); + + /* Print waves executing shaders that are not currently bound. */ + unsigned i; + bool found = false; + for (i = 0; i < num_waves; i++) { + if (waves[i].matched) + continue; + + if (!found) { + fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); + found = true; + } + fprintf(f, + " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 + "\n", + waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, + waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); + } + if (found) + fprintf(f, "\n\n"); +} + +void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) +{ + if (!log) + return; + + si_dump_framebuffer(sctx, log); + + si_dump_compute_shader(sctx, sctx->ts_shader_state.program, log); + si_dump_gfx_shader(sctx, &sctx->ms_shader_state, log); + si_dump_gfx_shader(sctx, &sctx->shader.vs, log); + si_dump_gfx_shader(sctx, &sctx->shader.tcs, log); + si_dump_gfx_shader(sctx, &sctx->shader.tes, log); + si_dump_gfx_shader(sctx, &sctx->shader.gs, log); + si_dump_gfx_shader(sctx, &sctx->shader.ps, log); + + si_dump_descriptor_list(sctx->screen, &sctx->descriptors[SI_DESCS_INTERNAL], "", "RW buffers", + 4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity, + log); + si_dump_compute_descriptors(sctx, sctx->ts_shader_state.program, log); + si_dump_gfx_descriptors(sctx, &sctx->ms_shader_state, log); + si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log); + si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log); + si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log); + si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log); + si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log); +} + +void si_log_compute_state(struct si_context *sctx, struct u_log_context *log) +{ + if (!log) + return; + + si_dump_compute_shader(sctx, sctx->cs_shader_state.program, log); + si_dump_compute_descriptors(sctx, sctx->cs_shader_state.program, log); +} + +void si_gather_context_rolls(struct si_context *sctx) +{ + struct radeon_cmdbuf *cs = &sctx->gfx_cs; + uint32_t **ibs = alloca(sizeof(ibs[0]) * (cs->num_prev + 1)); + uint32_t *ib_dw_sizes = alloca(sizeof(ib_dw_sizes[0]) * (cs->num_prev + 1)); + + for (unsigned i = 0; i < cs->num_prev; i++) { + struct ac_cmdbuf *chunk = &cs->prev[i]; + + ibs[i] = chunk->buf; + ib_dw_sizes[i] = chunk->cdw; + } + + ibs[cs->num_prev] = cs->current.buf; + ib_dw_sizes[cs->num_prev] = cs->current.cdw; + + FILE *f = fopen(sctx->screen->context_roll_log_filename, "a"); + ac_gather_context_rolls(f, ibs, ib_dw_sizes, cs->num_prev + 1, NULL, &sctx->screen->info); + fclose(f); +} diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 4010b5fc584..ce671b71da3 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1560,19 +1560,22 @@ void si_cp_acquire_mem(struct si_context *sctx, struct radeon_cmdbuf *cs, unsign void si_cp_pfp_sync_me(struct radeon_cmdbuf *cs); /* si_debug.c */ -void si_gather_context_rolls(struct si_context *sctx); void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved, bool get_buffer_list); void si_destroy_saved_cs(struct si_saved_cs *scs); void si_auto_log_cs(void *data, struct u_log_context *log); void si_log_hw_flush(struct si_context *sctx); -void si_log_draw_state(struct si_context *sctx, struct u_log_context *log); -void si_log_compute_state(struct si_context *sctx, struct u_log_context *log); void si_init_debug_functions(struct si_context *sctx); void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved); -bool si_replace_shader(unsigned num, struct si_shader_binary *binary); void si_print_current_ib(struct si_context *sctx, FILE *f); +/* si_debug_gfx_compute.c */ +bool si_replace_shader(unsigned num, struct si_shader_binary *binary); +void si_dump_annotated_shaders(struct si_context *sctx, FILE *f); +void si_log_draw_state(struct si_context *sctx, struct u_log_context *log); +void si_gather_context_rolls(struct si_context *sctx); +void si_log_compute_state(struct si_context *sctx, struct u_log_context *log); + /* si_fence.c */ void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigned event, unsigned event_flags, unsigned dst_sel, unsigned int_sel, unsigned data_sel,