diff --git a/src/freedreno/decode/rdcompiler-meson.build b/src/freedreno/decode/rdcompiler-meson.build index 730b28f05f9..020db1585ed 100644 --- a/src/freedreno/decode/rdcompiler-meson.build +++ b/src/freedreno/decode/rdcompiler-meson.build @@ -30,4 +30,7 @@ generate_rd = executable( link_with: [ mesa_proj.get_variable('libfreedreno_cffdec'), ], + dependencies : [ + mesa_proj.get_variable('idep_nir'), + ], ) \ No newline at end of file diff --git a/src/freedreno/decode/rdcompiler-utils.h b/src/freedreno/decode/rdcompiler-utils.h index 4a63e706a7f..fadaa541f5f 100644 --- a/src/freedreno/decode/rdcompiler-utils.h +++ b/src/freedreno/decode/rdcompiler-utils.h @@ -22,6 +22,10 @@ #include "a6xx.xml.h" +#include "ir3/ir3_assembler.h" +#include "ir3/ir3_compiler.h" +#include "ir3/ir3_shader.h" + #include "util/list.h" #include "util/vma.h" @@ -35,14 +39,27 @@ struct cmdstream { uint64_t iova; }; +static uint64_t +cs_get_cur_iova(struct cmdstream *cs) +{ + return cs->iova + cs->cur * sizeof(uint32_t); +} + struct replay_context { + void *mem_ctx; + struct util_vma_heap vma; struct cmdstream *submit_cs; struct cmdstream *state_cs; + struct cmdstream *shader_cs; struct list_head cs_list; + struct ir3_compiler *compiler; + + struct hash_table_u64 *compiled_shaders; + const char *output_name; }; @@ -60,6 +77,18 @@ pkt_qw(struct cmdstream *cs, uint64_t payload) pkt(cs, payload >> 32); } +static uint64_t +pkt_blob(struct cmdstream *cs, void *payload, uint32_t size, uint32_t alignment) +{ + cs->cur = align(cs->cur, alignment / sizeof(uint32_t)); + uint64_t start_iova = cs_get_cur_iova(cs); + + memcpy(cs->mem + cs->cur, payload, size); + cs->cur += size; + + return start_iova; +} + static void pkt4(struct cmdstream *cs, uint16_t regindx, uint16_t cnt, uint32_t payload) { @@ -94,12 +123,6 @@ cs_alloc(struct replay_context *ctx, uint32_t size) return cs; } -static uint64_t -cs_get_cur_iova(struct cmdstream *cs) -{ - return cs->iova + cs->cur * sizeof(uint32_t); -} - static void rd_write_cs_buffer(FILE *out, struct cmdstream *cs) { @@ -159,7 +182,8 @@ static const struct option opts[] = { /* clang-format on */ static void -replay_context_init(struct replay_context *ctx, int argc, char **argv) +replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id, + int argc, char **argv) { uint64_t va_start = 0; uint64_t va_size = 0; @@ -189,12 +213,18 @@ replay_context_init(struct replay_context *ctx, int argc, char **argv) exit(1); } + ctx->mem_ctx = ralloc_context(NULL); list_inithead(&ctx->cs_list); util_vma_heap_init(&ctx->vma, va_start, ROUND_DOWN_TO(va_size, 4096)); ctx->submit_cs = cs_alloc(ctx, 1024 * 1024); ctx->state_cs = cs_alloc(ctx, 2 * 1024 * 1024); + ctx->shader_cs = cs_alloc(ctx, 8 * 1024 * 1024); + + ctx->compiler = + ir3_compiler_create(NULL, dev_id, &(struct ir3_compiler_options){}); + ctx->compiled_shaders = _mesa_hash_table_u64_create(ctx->mem_ctx); } static void @@ -219,6 +249,33 @@ replay_context_finish(struct replay_context *ctx) fclose(out); } +static void +upload_shader(struct replay_context *ctx, uint64_t id, const char *source) +{ + FILE *in = fmemopen((void *)source, strlen(source), "r"); + + struct ir3_kernel_info info = {}; + struct ir3_shader *shader = ir3_parse_asm(ctx->compiler, &info, in); + assert(shader); + + fclose(in); + + uint64_t *shader_iova = ralloc(ctx->mem_ctx, uint64_t); + *shader_iova = pkt_blob(ctx->shader_cs, shader->variants->bin, + shader->variants->info.size, 128); + ralloc_free(shader); + + _mesa_hash_table_u64_insert(ctx->compiled_shaders, id, shader_iova); +} + +static void +emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id) +{ + uint64_t *shader_iova = + _mesa_hash_table_u64_search(ctx->compiled_shaders, id); + pkt_qw(cs, *shader_iova); +} + #define begin_draw_state() \ uint64_t subcs_iova_start = cs_get_cur_iova(ctx.state_cs); \ struct cmdstream *prev_cs = cs; \ diff --git a/src/freedreno/decode/rddecompiler.c b/src/freedreno/decode/rddecompiler.c index 1d19be6da58..092691c37c3 100644 --- a/src/freedreno/decode/rddecompiler.c +++ b/src/freedreno/decode/rddecompiler.c @@ -29,12 +29,17 @@ #include "freedreno_pm4.h" #include "a6xx.xml.h" +#include "common/freedreno_dev_info.h" +#include "util/hash_table.h" #include "util/os_time.h" +#include "util/ralloc.h" #include "util/rb_tree.h" +#include "util/set.h" #include "util/u_vector.h" #include "buffers.h" #include "cffdec.h" +#include "disasm.h" #include "io.h" #include "rdutil.h" #include "redump.h" @@ -148,6 +153,10 @@ main(int argc, char **argv) } static struct rnn *rnn; +static struct fd_dev_id dev_id; +static void *mem_ctx; + +static struct set decompiled_shaders; static void init_rnn(const char *gpuname) @@ -162,11 +171,71 @@ pktname(unsigned opc) return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc); } -static void -decompile_register(uint32_t regbase, uint32_t dword, uint16_t cnt, int level) +static uint32_t +decompile_shader(const char *name, uint32_t regbase, uint32_t *dwords, int level) +{ + uint64_t gpuaddr = ((uint64_t)dwords[1] << 32) | dwords[0]; + gpuaddr &= 0xfffffffffffffff0; + + /* Shader's iova is referenced in two places, so we have to remember it. */ + if (_mesa_set_search(&decompiled_shaders, &gpuaddr)) { + printlvl(level, "emit_shader_iova(&ctx, cs, 0x%" PRIx64 ");\n", gpuaddr); + } else { + uint64_t *key = ralloc(mem_ctx, uint64_t); + *key = gpuaddr; + _mesa_set_add(&decompiled_shaders, key); + + void *buf = hostptr(gpuaddr); + assert(buf); + + uint32_t sizedwords = hostlen(gpuaddr) / 4; + + char *stream_data = NULL; + size_t stream_size = 0; + FILE *stream = open_memstream(&stream_data, &stream_size); + + try_disasm_a3xx(buf, sizedwords, 0, stream, dev_id.gpu_id); + fclose(stream); + + printlvl(level, "{\n"); + printlvl(level + 1, "const char *source = R\"(\n"); + printf("%s", stream_data); + printlvl(level + 1, ")\";\n"); + printlvl(level + 1, "upload_shader(&ctx, 0x%" PRIx64 ", source);\n", gpuaddr); + printlvl(level + 1, "emit_shader_iova(&ctx, cs, 0x%" PRIx64 ");\n", gpuaddr); + printlvl(level, "}\n"); + } + + return 2; +} + +static struct { + uint32_t regbase; + uint32_t (*fxn)(const char *name, uint32_t regbase, uint32_t *dwords, int level); +} reg_a6xx[] = { + {REG_A6XX_SP_VS_OBJ_START, decompile_shader}, + {REG_A6XX_SP_HS_OBJ_START, decompile_shader}, + {REG_A6XX_SP_DS_OBJ_START, decompile_shader}, + {REG_A6XX_SP_GS_OBJ_START, decompile_shader}, + {REG_A6XX_SP_FS_OBJ_START, decompile_shader}, + {REG_A6XX_SP_CS_OBJ_START, decompile_shader}, + + {0, NULL}, +}, *type0_reg; + +static uint32_t +decompile_register(uint32_t regbase, uint32_t *dwords, uint16_t cnt, int level) { struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase); + for (unsigned idx = 0; type0_reg[idx].regbase; idx++) { + if (type0_reg[idx].regbase == regbase) { + return type0_reg[idx].fxn(info->name, regbase, dwords, level); + } + } + + const uint32_t dword = *dwords; + if (info && info->typeinfo) { char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword); printlvl(level, "/* pkt4: %s = %s */\n", info->name, decoded); @@ -195,6 +264,8 @@ decompile_register(uint32_t regbase, uint32_t dword, uint16_t cnt, int level) printlvl(level, "/* unknown pkt4 */\n"); printlvl(level, "pkt4(cs, %u, (%u), %u);\n", regbase, cnt, dword); } + + return 1; } static void @@ -203,17 +274,19 @@ decompile_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, { if (!sizedwords) return; - decompile_register(regbase, *dwords, sizedwords--, level); - while (sizedwords--) { - regbase++; - dwords++; - decompile_register(regbase, *dwords, 0, level); + uint32_t consumed = decompile_register(regbase, dwords, sizedwords, level); + sizedwords -= consumed; + while (sizedwords > 0) { + regbase += consumed; + dwords += consumed; + consumed = decompile_register(regbase, dwords, 0, level); + sizedwords -= consumed; } } static void -decompile_domain(uint32_t *dwords, uint32_t sizedwords, const char *dom_name, - const char *packet_name, int level) +decompile_domain(uint32_t pkt, uint32_t *dwords, uint32_t sizedwords, + const char *dom_name, const char *packet_name, int level) { struct rnndomain *dom; int i; @@ -222,6 +295,22 @@ decompile_domain(uint32_t *dwords, uint32_t sizedwords, const char *dom_name, printlvl(level, "pkt7(cs, %s, %u);\n", packet_name, sizedwords); + if (pkt == CP_LOAD_STATE6_FRAG || pkt == CP_LOAD_STATE6_GEOM) { + enum a6xx_state_type state_type = + (dwords[0] & CP_LOAD_STATE6_0_STATE_TYPE__MASK) >> + CP_LOAD_STATE6_0_STATE_TYPE__SHIFT; + enum a6xx_state_src state_src = + (dwords[0] & CP_LOAD_STATE6_0_STATE_SRC__MASK) >> + CP_LOAD_STATE6_0_STATE_SRC__SHIFT; + + /* TODO: decompile all other state */ + if (state_type == ST6_SHADER && state_src == SS6_INDIRECT) { + printlvl(level, "pkt(cs, %u);\n", dwords[0]); + decompile_shader(NULL, 0, dwords + 1, level); + return; + } + } + for (i = 0; i < sizedwords; i++) { struct rnndecaddrinfo *info = NULL; if (dom) { @@ -300,7 +389,7 @@ decompile_commands(uint32_t *dwords, uint32_t sizedwords, int level) printlvl(level + 1, "end_draw_state(%u);\n", unchanged); printlvl(level, "}\n"); } else { - decompile_domain(dwords + i, 3, "CP_SET_DRAW_STATE", + decompile_domain(val, dwords + i, 3, "CP_SET_DRAW_STATE", "CP_SET_DRAW_STATE", level); } } @@ -314,7 +403,7 @@ decompile_commands(uint32_t *dwords, uint32_t sizedwords, int level) if (!strcmp(packet_name, "CP_LOAD_STATE6_FRAG") || !strcmp(packet_name, "CP_LOAD_STATE6_GEOM")) dom_name = "CP_LOAD_STATE6"; - decompile_domain(dwords + 1, count - 1, dom_name, packet_name, + decompile_domain(val, dwords + 1, count - 1, dom_name, packet_name, level); } else { errx(1, "unknown pkt7 %u", val); @@ -332,6 +421,39 @@ decompile_commands(uint32_t *dwords, uint32_t sizedwords, int level) fprintf(stderr, "**** this ain't right!! dwords_left=%d\n", dwords_left); } +static void +emit_header() +{ + if (!dev_id.gpu_id || !dev_id.chip_id) + return; + + static bool emitted = false; + if (emitted) + return; + emitted = true; + + printf("#include \"decode/rdcompiler-utils.h\"\n" + "int main(int argc, char **argv)\n" + "{\n" + "\tstruct replay_context ctx;\n" + "\tstruct fd_dev_id dev_id = {%u, %" PRIu64 "};\n" + "\treplay_context_init(&ctx, &dev_id, argc, argv);\n" + "\tstruct cmdstream *cs = ctx.submit_cs;\n\n", + dev_id.gpu_id, dev_id.chip_id); +} + +static inline uint32_t +u64_hash(const void *key) +{ + return _mesa_hash_data(key, sizeof(uint64_t)); +} + +static inline bool +u64_compare(const void *key1, const void *key2) +{ + return memcmp(key1, key2, sizeof(uint64_t)) == 0; +} + static int handle_file(const char *filename, uint32_t submit_to_decompile) { @@ -351,13 +473,9 @@ handle_file(const char *filename, uint32_t submit_to_decompile) } init_rnn("a6xx"); - - printf("#include \"decode/rdcompiler-utils.h\"\n" - "int main(int argc, char **argv)\n" - "{\n" - "\tstruct replay_context ctx;\n" - "\treplay_context_init(&ctx, argc, argv);\n" - "\tstruct cmdstream *cs = ctx.submit_cs;\n\n"); + type0_reg = reg_a6xx; + mem_ctx = ralloc_context(NULL); + _mesa_set_init(&decompiled_shaders, mem_ctx, u64_hash, u64_compare); struct { unsigned int len; @@ -369,8 +487,6 @@ handle_file(const char *filename, uint32_t submit_to_decompile) case RD_TEST: case RD_VERT_SHADER: case RD_FRAG_SHADER: - case RD_GPU_ID: - case RD_CHIP_ID: case RD_CMD: /* no-op */ break; @@ -398,6 +514,16 @@ handle_file(const char *filename, uint32_t submit_to_decompile) submit++; break; } + case RD_GPU_ID: { + dev_id.gpu_id = parse_gpu_id(ps.buf); + emit_header(); + break; + } + case RD_CHIP_ID: { + dev_id.chip_id = *(uint64_t *)ps.buf; + emit_header(); + break; + } default: break; }