diff --git a/src/freedreno/decode/meson.build b/src/freedreno/decode/meson.build index a34ca892b53..fe7143d1cae 100644 --- a/src/freedreno/decode/meson.build +++ b/src/freedreno/decode/meson.build @@ -92,6 +92,30 @@ if dep_libarchive.found() build_by_default: with_tools.contains('freedreno'), install: install_fd_decode_tools, ) + + rddecompiler = executable( + 'rddecompiler', + [ + 'rddecompiler.c' + ], + include_directories: [ + inc_freedreno, + inc_freedreno_rnn, + inc_include, + inc_src, + ], + c_args : [no_override_init_args], + gnu_symbol_visibility: 'hidden', + dependencies: [ + dep_libdrm, + ], + link_with: [ + libfreedreno_cffdec, + libfreedreno_io, + ], + build_by_default: with_tools.contains('freedreno'), + install: install_fd_decode_tools, + ) endif if dep_lua.found() and dep_libarchive.found() diff --git a/src/freedreno/decode/rdcompiler-meson.build b/src/freedreno/decode/rdcompiler-meson.build new file mode 100644 index 00000000000..730b28f05f9 --- /dev/null +++ b/src/freedreno/decode/rdcompiler-meson.build @@ -0,0 +1,33 @@ +# An example meson.build for .rd decompiled into C source via rddecompiler. +# It uses Mesa as a subproject since the generated sources depend on +# generated headers with registers info and on some utils. +# Before compiling, create "subprojects" directory and create a link +# to mesa there. + +project('generate_rd', ['c'], default_options : ['c_std=c11']) + +mesa_proj = subproject('mesa', + default_options : ['gallium-drivers=', + 'dri-drivers=', + 'glx=disabled', + 'vulkan-drivers=', + 'tools=freedreno']) + +add_project_arguments(mesa_proj.get_variable('pre_args'), language : ['c']) +add_project_arguments(mesa_proj.get_variable('c_args'), language : ['c']) + +generate_rd = executable( + 'generate_rd', + [ + 'generate_rd.c' + ], + include_directories: [ + mesa_proj.get_variable('inc_freedreno'), + mesa_proj.get_variable('inc_freedreno_rnn'), + mesa_proj.get_variable('inc_include'), + mesa_proj.get_variable('inc_src'), + ], + link_with: [ + mesa_proj.get_variable('libfreedreno_cffdec'), + ], +) \ No newline at end of file diff --git a/src/freedreno/decode/rdcompiler-utils.h b/src/freedreno/decode/rdcompiler-utils.h new file mode 100644 index 00000000000..4a63e706a7f --- /dev/null +++ b/src/freedreno/decode/rdcompiler-utils.h @@ -0,0 +1,243 @@ +/* + * Copyright © 2022 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "freedreno/decode/redump.h" + +#include "util/u_math.h" + +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" +#include "freedreno_pm4.h" + +#include "a6xx.xml.h" + +#include "util/list.h" +#include "util/vma.h" + +struct cmdstream { + struct list_head link; + + uint32_t *mem; + uint32_t total_size; + uint32_t cur; + + uint64_t iova; +}; + +struct replay_context { + struct util_vma_heap vma; + + struct cmdstream *submit_cs; + struct cmdstream *state_cs; + + struct list_head cs_list; + + const char *output_name; +}; + +static void +pkt(struct cmdstream *cs, uint32_t payload) +{ + assert(cs->cur <= cs->total_size); + cs->mem[cs->cur++] = payload; +} + +static void +pkt_qw(struct cmdstream *cs, uint64_t payload) +{ + pkt(cs, payload); + pkt(cs, payload >> 32); +} + +static void +pkt4(struct cmdstream *cs, uint16_t regindx, uint16_t cnt, uint32_t payload) +{ + pkt(cs, pm4_pkt4_hdr(regindx, cnt)); + pkt(cs, payload); +} + +static void +pkt7(struct cmdstream *cs, uint8_t opcode, uint16_t cnt) +{ + pkt(cs, pm4_pkt7_hdr(opcode, cnt)); +} + +struct rd_section { + uint32_t type; + uint32_t size; +}; + +static struct cmdstream * +cs_alloc(struct replay_context *ctx, uint32_t size) +{ + struct cmdstream *cs = calloc(1, sizeof(struct cmdstream)); + cs->mem = (uint32_t *)calloc(1, size); + cs->total_size = size / sizeof(uint32_t); + cs->cur = 0; + cs->iova = util_vma_heap_alloc(&ctx->vma, size, 4096); + + assert(cs->iova != 0); + + list_addtail(&cs->link, &ctx->cs_list); + + return cs; +} + +static uint64_t +cs_get_cur_iova(struct cmdstream *cs) +{ + return cs->iova + cs->cur * sizeof(uint32_t); +} + +static void +rd_write_cs_buffer(FILE *out, struct cmdstream *cs) +{ + if (cs->cur == 0) + return; + + const uint32_t packet[] = {(uint32_t)cs->iova, cs->cur * sizeof(uint32_t), + (uint32_t)(cs->iova >> 32)}; + struct rd_section section_address = {.type = RD_GPUADDR, + .size = sizeof(packet)}; + fwrite(§ion_address, sizeof(section_address), 1, out); + fwrite(packet, sizeof(packet), 1, out); + + struct rd_section section_contents = {.type = RD_BUFFER_CONTENTS, + .size = cs->cur * sizeof(uint32_t)}; + + fwrite(§ion_contents, sizeof(section_contents), 1, out); + fwrite(cs->mem, sizeof(uint32_t), cs->cur, out); +} + +static void +rd_write_cs_submit(FILE *out, struct cmdstream *cs) +{ + const uint32_t packet[] = {(uint32_t)cs->iova, cs->cur, + (uint32_t)(cs->iova >> 32)}; + struct rd_section section_cmdstream = {.type = RD_CMDSTREAM_ADDR, + .size = sizeof(packet)}; + + fwrite(§ion_cmdstream, sizeof(section_cmdstream), 1, out); + fwrite(packet, sizeof(packet), 1, out); +} + +static void +print_usage(const char *name) +{ + /* clang-format off */ + fprintf(stderr, "Usage:\n\n" + "\t%s [OPTSIONS]... FILE...\n\n" + "Options:\n" + "\t --vastart=offset\n" + "\t --vasize=size\n" + "\t-h, --help - show this message\n" + , name); + /* clang-format on */ + exit(2); +} + +#define OPT_VA_START 1000 +#define OPT_VA_SIZE 1001 + +/* clang-format off */ +static const struct option opts[] = { + { "vastart", required_argument, 0, OPT_VA_START }, + { "vasize", required_argument, 0, OPT_VA_SIZE }, + { "help", no_argument, 0, 'h' }, +}; +/* clang-format on */ + +static void +replay_context_init(struct replay_context *ctx, int argc, char **argv) +{ + uint64_t va_start = 0; + uint64_t va_size = 0; + + int c; + while ((c = getopt_long(argc, argv, "h", opts, NULL)) != -1) { + switch (c) { + case OPT_VA_START: + va_start = strtoull(optarg, NULL, 0); + break; + case OPT_VA_SIZE: + va_size = strtoull(optarg, NULL, 0); + break; + case 'h': + default: + print_usage(argv[0]); + } + } + + if (optind < argc) { + ctx->output_name = argv[optind]; + } else { + } + + if (!va_start || !va_size || !ctx->output_name) { + print_usage(argv[0]); + exit(1); + } + + list_inithead(&ctx->cs_list); + + util_vma_heap_init(&ctx->vma, va_start, ROUND_DOWN_TO(va_size, 4096)); + + ctx->submit_cs = cs_alloc(ctx, 1024 * 1024); + ctx->state_cs = cs_alloc(ctx, 2 * 1024 * 1024); +} + +static void +replay_context_finish(struct replay_context *ctx) +{ + FILE *out = fopen(ctx->output_name, "w"); + if (!out) { + errx(1, "Cannot open '%s' for writing\n", ctx->output_name); + } + + static const uint32_t gpu_id = 660; + struct rd_section section_gpu_id = {.type = RD_GPU_ID, + .size = 1 * sizeof(uint32_t)}; + fwrite(§ion_gpu_id, sizeof(section_gpu_id), 1, out); + fwrite(&gpu_id, sizeof(uint32_t), 1, out); + + list_for_each_entry (struct cmdstream, cs, &ctx->cs_list, link) { + rd_write_cs_buffer(out, cs); + } + rd_write_cs_submit(out, ctx->submit_cs); + + fclose(out); +} + +#define begin_draw_state() \ + uint64_t subcs_iova_start = cs_get_cur_iova(ctx.state_cs); \ + struct cmdstream *prev_cs = cs; \ + struct cmdstream *cs = ctx.state_cs; + +#define end_draw_state(params) \ + uint64_t subcs_iova_end = cs_get_cur_iova(ctx.state_cs); \ + uint32_t subcs_size = \ + (subcs_iova_end - subcs_iova_start) / sizeof(uint32_t); \ + pkt7(prev_cs, CP_SET_DRAW_STATE, 3); \ + pkt(prev_cs, (params) | subcs_size); \ + pkt_qw(prev_cs, subcs_iova_start); + +#define begin_ib() \ + struct cmdstream *prev_cs = cs; \ + struct cmdstream *cs = cs_alloc(&ctx, 1024 * 1024); + +#define end_ib() \ + uint64_t ibcs_size = cs->cur; \ + pkt7(prev_cs, CP_INDIRECT_BUFFER, 3); \ + pkt_qw(prev_cs, cs->iova); \ + pkt(prev_cs, ibcs_size); diff --git a/src/freedreno/decode/rddecompiler.c b/src/freedreno/decode/rddecompiler.c new file mode 100644 index 00000000000..1d19be6da58 --- /dev/null +++ b/src/freedreno/decode/rddecompiler.c @@ -0,0 +1,415 @@ +/* + * Copyright © 2022 Igalia S.L. + * SPDX-License-Identifier: MIT + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util/u_math.h" + +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" +#include "freedreno_pm4.h" + +#include "a6xx.xml.h" + +#include "util/os_time.h" +#include "util/rb_tree.h" +#include "util/u_vector.h" +#include "buffers.h" +#include "cffdec.h" +#include "io.h" +#include "rdutil.h" +#include "redump.h" +#include "rnnutil.h" + +/* Decompiles a single cmdstream from .rd into compilable C source. + * Given the address space bounds the generated program creates + * a new .rd which could be used to override cmdstream with 'replay'. + * Generated .rd is not replayable on its own and depends on buffers + * provided by the source .rd. + * + * C source could be compiled using rdcompiler-meson.build as an example. + * + * The workflow would look like this: + * 1) Find the cmdstream № you want to edit; + * 2) Decompile it: + * rddecompiler -s %cmd_stream_№% example.rd > generate_rd.c + * 3) Edit the command stream; + * 4) Compile it back, see rdcompiler-meson.build for the instructions; + * 5) Plug the generator into cmdstream replay: + * replay --override=%cmd_stream_№% --generator=~/generate_rd + * 6) Repeat 3-5. + */ + +static int handle_file(const char *filename, uint32_t submit_to_decompile); + +static const char *levels[] = { + "\t", + "\t\t", + "\t\t\t", + "\t\t\t\t", + "\t\t\t\t\t", + "\t\t\t\t\t\t", + "\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t", + "\t\t\t\t\t\t\t\t\t", +}; + +static void +printlvl(int lvl, const char *fmt, ...) +{ + assert(lvl < ARRAY_SIZE(levels)); + + va_list args; + va_start(args, fmt); + fputs(levels[lvl], stdout); + vprintf(fmt, args); + va_end(args); +} + +static void +print_usage(const char *name) +{ + /* clang-format off */ + fprintf(stderr, "Usage:\n\n" + "\t%s [OPTSIONS]... FILE...\n\n" + "Options:\n" + "\t-s, --submit=№ - № of the submit to decompile\n" + "\t-h, --help - show this message\n" + , name); + /* clang-format on */ + exit(2); +} + +/* clang-format off */ +static const struct option opts[] = { + { "submit", required_argument, 0, 's' }, + { "help", no_argument, 0, 'h' }, +}; +/* clang-format on */ + +int +main(int argc, char **argv) +{ + int ret = -1; + int c; + uint32_t submit_to_decompile = -1; + + while ((c = getopt_long(argc, argv, "s:h", opts, NULL)) != -1) { + switch (c) { + case 0: + /* option that set a flag, nothing to do */ + break; + case 's': + submit_to_decompile = strtoul(optarg, NULL, 0); + break; + case 'h': + default: + print_usage(argv[0]); + } + } + + if (submit_to_decompile == -1) { + fprintf(stderr, "Submit to decompile has to be specified\n"); + print_usage(argv[0]); + } + + while (optind < argc) { + ret = handle_file(argv[optind], submit_to_decompile); + if (ret) { + fprintf(stderr, "error reading: %s\n", argv[optind]); + break; + } + optind++; + } + + if (ret) + print_usage(argv[0]); + + return ret; +} + +static struct rnn *rnn; + +static void +init_rnn(const char *gpuname) +{ + rnn = rnn_new(true); + rnn_load(rnn, gpuname); +} + +const char * +pktname(unsigned opc) +{ + return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc); +} + +static void +decompile_register(uint32_t regbase, uint32_t dword, uint16_t cnt, int level) +{ + struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase); + + if (info && info->typeinfo) { + char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword); + printlvl(level, "/* pkt4: %s = %s */\n", info->name, decoded); + + if (cnt == 0) { + printlvl(level, "pkt(cs, %u);\n", dword); + } else { + char reg_name[32]; + char field_name[32]; + char reg_idx[32]; + + /* reginfo doesn't return reg name in a compilable format, for now just + * parse it into a compilable reg name. + * TODO: Make RNN optionally return compilable reg name. + */ + if (sscanf(info->name, "%32[A-Z0-6_][%32[x0-9]].%32s", reg_name, + reg_idx, field_name) != 3) { + printlvl(level, "pkt4(cs, REG_%s_%s, (%u), %u);\n", rnn->variant, + info->name, cnt, dword); + } else { + printlvl(level, "pkt4(cs, REG_%s_%s_%s(%s), (%u), %u);\n", + rnn->variant, reg_name, field_name, reg_idx, cnt, dword); + } + } + } else { + printlvl(level, "/* unknown pkt4 */\n"); + printlvl(level, "pkt4(cs, %u, (%u), %u);\n", regbase, cnt, dword); + } +} + +static void +decompile_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords, + int level) +{ + if (!sizedwords) + return; + decompile_register(regbase, *dwords, sizedwords--, level); + while (sizedwords--) { + regbase++; + dwords++; + decompile_register(regbase, *dwords, 0, level); + } +} + +static void +decompile_domain(uint32_t *dwords, uint32_t sizedwords, const char *dom_name, + const char *packet_name, int level) +{ + struct rnndomain *dom; + int i; + + dom = rnn_finddomain(rnn->db, dom_name); + + printlvl(level, "pkt7(cs, %s, %u);\n", packet_name, sizedwords); + + for (i = 0; i < sizedwords; i++) { + struct rnndecaddrinfo *info = NULL; + if (dom) { + info = rnndec_decodeaddr(rnn->vc, dom, i, 0); + } + + char *decoded; + if (!(info && info->typeinfo)) { + printlvl(level, "pkt(cs, %u);\n", dwords[i]); + continue; + } + uint64_t value = dwords[i]; + if (info->typeinfo->high >= 32 && i < sizedwords - 1) { + value |= (uint64_t)dwords[i + 1] << 32; + i++; /* skip the next dword since we're printing it now */ + } + decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value); + + printlvl(level, "/* %s */\n", decoded); + printlvl(level, "pkt(cs, %u);\n", dwords[i]); + + free(decoded); + free(info->name); + free(info); + } +} + +static void +decompile_commands(uint32_t *dwords, uint32_t sizedwords, int level) +{ + int dwords_left = sizedwords; + uint32_t count = 0; /* dword count including packet header */ + uint32_t val; + + if (!dwords) { + fprintf(stderr, "NULL cmd buffer!\n"); + return; + } + + while (dwords_left > 0) { + if (pkt_is_regwrite(dwords[0], &val, &count)) { + assert(val < 0xffff); + decompile_registers(val, dwords + 1, count - 1, level); + } else if (pkt_is_opcode(dwords[0], &val, &count)) { + if (val == CP_INDIRECT_BUFFER) { + uint64_t ibaddr; + uint32_t ibsize; + ibaddr = dwords[1]; + ibaddr |= ((uint64_t)dwords[2]) << 32; + ibsize = dwords[3]; + + printlvl(level, "{\n"); + printlvl(level + 1, "begin_ib();\n"); + + if (!has_dumped(ibaddr, 0x7)) { + uint32_t *ptr = hostptr(ibaddr); + decompile_commands(ptr, ibsize, level + 1); + } + + printlvl(level + 1, "end_ib();\n"); + printlvl(level, "}\n"); + } else if (val == CP_SET_DRAW_STATE) { + for (int i = 1; i < count; i += 3) { + uint32_t state_count = dwords[i] & 0xffff; + if (state_count != 0) { + uint32_t unchanged = dwords[i] & (~0xffff); + uint64_t ibaddr = dwords[i + 1]; + ibaddr |= ((uint64_t)dwords[i + 2]) << 32; + + printlvl(level, "{\n"); + printlvl(level + 1, "begin_draw_state();\n"); + + uint32_t *ptr = hostptr(ibaddr); + decompile_commands(ptr, state_count, level + 1); + + printlvl(level + 1, "end_draw_state(%u);\n", unchanged); + printlvl(level, "}\n"); + } else { + decompile_domain(dwords + i, 3, "CP_SET_DRAW_STATE", + "CP_SET_DRAW_STATE", level); + } + } + } else { + const char *packet_name = pktname(val); + const char *dom_name = packet_name; + if (packet_name) { + /* special hack for two packets that decode the same way + * on a6xx: + */ + if (!strcmp(packet_name, "CP_LOAD_STATE6_FRAG") || + !strcmp(packet_name, "CP_LOAD_STATE6_GEOM")) + dom_name = "CP_LOAD_STATE6"; + decompile_domain(dwords + 1, count - 1, dom_name, packet_name, + level); + } else { + errx(1, "unknown pkt7 %u", val); + } + } + } else { + errx(1, "unknown packet %u", dwords[0]); + } + + dwords += count; + dwords_left -= count; + } + + if (dwords_left < 0) + fprintf(stderr, "**** this ain't right!! dwords_left=%d\n", dwords_left); +} + +static int +handle_file(const char *filename, uint32_t submit_to_decompile) +{ + struct io *io; + int submit = 0; + bool needs_reset = false; + struct rd_parsed_section ps = {0}; + + if (!strcmp(filename, "-")) + io = io_openfd(0); + else + io = io_open(filename); + + if (!io) { + fprintf(stderr, "could not open: %s\n", filename); + return -1; + } + + init_rnn("a6xx"); + + printf("#include \"decode/rdcompiler-utils.h\"\n" + "int main(int argc, char **argv)\n" + "{\n" + "\tstruct replay_context ctx;\n" + "\treplay_context_init(&ctx, argc, argv);\n" + "\tstruct cmdstream *cs = ctx.submit_cs;\n\n"); + + struct { + unsigned int len; + uint64_t gpuaddr; + } gpuaddr = {0}; + + while (parse_rd_section(io, &ps)) { + switch (ps.type) { + case RD_TEST: + case RD_VERT_SHADER: + case RD_FRAG_SHADER: + case RD_GPU_ID: + case RD_CHIP_ID: + case RD_CMD: + /* no-op */ + break; + case RD_GPUADDR: + if (needs_reset) { + reset_buffers(); + needs_reset = false; + } + + parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr); + break; + case RD_BUFFER_CONTENTS: + add_buffer(gpuaddr.gpuaddr, gpuaddr.len, ps.buf); + ps.buf = NULL; + break; + case RD_CMDSTREAM_ADDR: { + unsigned int sizedwords; + uint64_t gpuaddr; + parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr); + + if (submit == submit_to_decompile) { + decompile_commands(hostptr(gpuaddr), sizedwords, 0); + } + + submit++; + break; + } + default: + break; + } + } + + printf("\treplay_context_finish(&ctx);\n}"); + + io_close(io); + fflush(stdout); + + if (ps.ret < 0) { + fprintf(stderr, "corrupt file\n"); + } + return 0; +}