freedreno/rddecompiler: Add shader disasm/asm support

Now rddecompiler outputs shaders in editable assembly form.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/19444>
This commit is contained in:
Danylo Piliaiev 2022-11-03 19:18:21 +01:00 committed by Marge Bot
parent 03d80e0a6d
commit a7773c3c4c
3 changed files with 213 additions and 27 deletions

View file

@ -30,4 +30,7 @@ generate_rd = executable(
link_with: [
mesa_proj.get_variable('libfreedreno_cffdec'),
],
dependencies : [
mesa_proj.get_variable('idep_nir'),
],
)

View file

@ -22,6 +22,10 @@
#include "a6xx.xml.h"
#include "ir3/ir3_assembler.h"
#include "ir3/ir3_compiler.h"
#include "ir3/ir3_shader.h"
#include "util/list.h"
#include "util/vma.h"
@ -35,14 +39,27 @@ struct cmdstream {
uint64_t iova;
};
static uint64_t
cs_get_cur_iova(struct cmdstream *cs)
{
return cs->iova + cs->cur * sizeof(uint32_t);
}
struct replay_context {
void *mem_ctx;
struct util_vma_heap vma;
struct cmdstream *submit_cs;
struct cmdstream *state_cs;
struct cmdstream *shader_cs;
struct list_head cs_list;
struct ir3_compiler *compiler;
struct hash_table_u64 *compiled_shaders;
const char *output_name;
};
@ -60,6 +77,18 @@ pkt_qw(struct cmdstream *cs, uint64_t payload)
pkt(cs, payload >> 32);
}
static uint64_t
pkt_blob(struct cmdstream *cs, void *payload, uint32_t size, uint32_t alignment)
{
cs->cur = align(cs->cur, alignment / sizeof(uint32_t));
uint64_t start_iova = cs_get_cur_iova(cs);
memcpy(cs->mem + cs->cur, payload, size);
cs->cur += size;
return start_iova;
}
static void
pkt4(struct cmdstream *cs, uint16_t regindx, uint16_t cnt, uint32_t payload)
{
@ -94,12 +123,6 @@ cs_alloc(struct replay_context *ctx, uint32_t size)
return cs;
}
static uint64_t
cs_get_cur_iova(struct cmdstream *cs)
{
return cs->iova + cs->cur * sizeof(uint32_t);
}
static void
rd_write_cs_buffer(FILE *out, struct cmdstream *cs)
{
@ -159,7 +182,8 @@ static const struct option opts[] = {
/* clang-format on */
static void
replay_context_init(struct replay_context *ctx, int argc, char **argv)
replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id,
int argc, char **argv)
{
uint64_t va_start = 0;
uint64_t va_size = 0;
@ -189,12 +213,18 @@ replay_context_init(struct replay_context *ctx, int argc, char **argv)
exit(1);
}
ctx->mem_ctx = ralloc_context(NULL);
list_inithead(&ctx->cs_list);
util_vma_heap_init(&ctx->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
ctx->submit_cs = cs_alloc(ctx, 1024 * 1024);
ctx->state_cs = cs_alloc(ctx, 2 * 1024 * 1024);
ctx->shader_cs = cs_alloc(ctx, 8 * 1024 * 1024);
ctx->compiler =
ir3_compiler_create(NULL, dev_id, &(struct ir3_compiler_options){});
ctx->compiled_shaders = _mesa_hash_table_u64_create(ctx->mem_ctx);
}
static void
@ -219,6 +249,33 @@ replay_context_finish(struct replay_context *ctx)
fclose(out);
}
static void
upload_shader(struct replay_context *ctx, uint64_t id, const char *source)
{
FILE *in = fmemopen((void *)source, strlen(source), "r");
struct ir3_kernel_info info = {};
struct ir3_shader *shader = ir3_parse_asm(ctx->compiler, &info, in);
assert(shader);
fclose(in);
uint64_t *shader_iova = ralloc(ctx->mem_ctx, uint64_t);
*shader_iova = pkt_blob(ctx->shader_cs, shader->variants->bin,
shader->variants->info.size, 128);
ralloc_free(shader);
_mesa_hash_table_u64_insert(ctx->compiled_shaders, id, shader_iova);
}
static void
emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id)
{
uint64_t *shader_iova =
_mesa_hash_table_u64_search(ctx->compiled_shaders, id);
pkt_qw(cs, *shader_iova);
}
#define begin_draw_state() \
uint64_t subcs_iova_start = cs_get_cur_iova(ctx.state_cs); \
struct cmdstream *prev_cs = cs; \

View file

@ -29,12 +29,17 @@
#include "freedreno_pm4.h"
#include "a6xx.xml.h"
#include "common/freedreno_dev_info.h"
#include "util/hash_table.h"
#include "util/os_time.h"
#include "util/ralloc.h"
#include "util/rb_tree.h"
#include "util/set.h"
#include "util/u_vector.h"
#include "buffers.h"
#include "cffdec.h"
#include "disasm.h"
#include "io.h"
#include "rdutil.h"
#include "redump.h"
@ -148,6 +153,10 @@ main(int argc, char **argv)
}
static struct rnn *rnn;
static struct fd_dev_id dev_id;
static void *mem_ctx;
static struct set decompiled_shaders;
static void
init_rnn(const char *gpuname)
@ -162,11 +171,71 @@ pktname(unsigned opc)
return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
}
static void
decompile_register(uint32_t regbase, uint32_t dword, uint16_t cnt, int level)
static uint32_t
decompile_shader(const char *name, uint32_t regbase, uint32_t *dwords, int level)
{
uint64_t gpuaddr = ((uint64_t)dwords[1] << 32) | dwords[0];
gpuaddr &= 0xfffffffffffffff0;
/* Shader's iova is referenced in two places, so we have to remember it. */
if (_mesa_set_search(&decompiled_shaders, &gpuaddr)) {
printlvl(level, "emit_shader_iova(&ctx, cs, 0x%" PRIx64 ");\n", gpuaddr);
} else {
uint64_t *key = ralloc(mem_ctx, uint64_t);
*key = gpuaddr;
_mesa_set_add(&decompiled_shaders, key);
void *buf = hostptr(gpuaddr);
assert(buf);
uint32_t sizedwords = hostlen(gpuaddr) / 4;
char *stream_data = NULL;
size_t stream_size = 0;
FILE *stream = open_memstream(&stream_data, &stream_size);
try_disasm_a3xx(buf, sizedwords, 0, stream, dev_id.gpu_id);
fclose(stream);
printlvl(level, "{\n");
printlvl(level + 1, "const char *source = R\"(\n");
printf("%s", stream_data);
printlvl(level + 1, ")\";\n");
printlvl(level + 1, "upload_shader(&ctx, 0x%" PRIx64 ", source);\n", gpuaddr);
printlvl(level + 1, "emit_shader_iova(&ctx, cs, 0x%" PRIx64 ");\n", gpuaddr);
printlvl(level, "}\n");
}
return 2;
}
static struct {
uint32_t regbase;
uint32_t (*fxn)(const char *name, uint32_t regbase, uint32_t *dwords, int level);
} reg_a6xx[] = {
{REG_A6XX_SP_VS_OBJ_START, decompile_shader},
{REG_A6XX_SP_HS_OBJ_START, decompile_shader},
{REG_A6XX_SP_DS_OBJ_START, decompile_shader},
{REG_A6XX_SP_GS_OBJ_START, decompile_shader},
{REG_A6XX_SP_FS_OBJ_START, decompile_shader},
{REG_A6XX_SP_CS_OBJ_START, decompile_shader},
{0, NULL},
}, *type0_reg;
static uint32_t
decompile_register(uint32_t regbase, uint32_t *dwords, uint16_t cnt, int level)
{
struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
for (unsigned idx = 0; type0_reg[idx].regbase; idx++) {
if (type0_reg[idx].regbase == regbase) {
return type0_reg[idx].fxn(info->name, regbase, dwords, level);
}
}
const uint32_t dword = *dwords;
if (info && info->typeinfo) {
char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
printlvl(level, "/* pkt4: %s = %s */\n", info->name, decoded);
@ -195,6 +264,8 @@ decompile_register(uint32_t regbase, uint32_t dword, uint16_t cnt, int level)
printlvl(level, "/* unknown pkt4 */\n");
printlvl(level, "pkt4(cs, %u, (%u), %u);\n", regbase, cnt, dword);
}
return 1;
}
static void
@ -203,17 +274,19 @@ decompile_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
{
if (!sizedwords)
return;
decompile_register(regbase, *dwords, sizedwords--, level);
while (sizedwords--) {
regbase++;
dwords++;
decompile_register(regbase, *dwords, 0, level);
uint32_t consumed = decompile_register(regbase, dwords, sizedwords, level);
sizedwords -= consumed;
while (sizedwords > 0) {
regbase += consumed;
dwords += consumed;
consumed = decompile_register(regbase, dwords, 0, level);
sizedwords -= consumed;
}
}
static void
decompile_domain(uint32_t *dwords, uint32_t sizedwords, const char *dom_name,
const char *packet_name, int level)
decompile_domain(uint32_t pkt, uint32_t *dwords, uint32_t sizedwords,
const char *dom_name, const char *packet_name, int level)
{
struct rnndomain *dom;
int i;
@ -222,6 +295,22 @@ decompile_domain(uint32_t *dwords, uint32_t sizedwords, const char *dom_name,
printlvl(level, "pkt7(cs, %s, %u);\n", packet_name, sizedwords);
if (pkt == CP_LOAD_STATE6_FRAG || pkt == CP_LOAD_STATE6_GEOM) {
enum a6xx_state_type state_type =
(dwords[0] & CP_LOAD_STATE6_0_STATE_TYPE__MASK) >>
CP_LOAD_STATE6_0_STATE_TYPE__SHIFT;
enum a6xx_state_src state_src =
(dwords[0] & CP_LOAD_STATE6_0_STATE_SRC__MASK) >>
CP_LOAD_STATE6_0_STATE_SRC__SHIFT;
/* TODO: decompile all other state */
if (state_type == ST6_SHADER && state_src == SS6_INDIRECT) {
printlvl(level, "pkt(cs, %u);\n", dwords[0]);
decompile_shader(NULL, 0, dwords + 1, level);
return;
}
}
for (i = 0; i < sizedwords; i++) {
struct rnndecaddrinfo *info = NULL;
if (dom) {
@ -300,7 +389,7 @@ decompile_commands(uint32_t *dwords, uint32_t sizedwords, int level)
printlvl(level + 1, "end_draw_state(%u);\n", unchanged);
printlvl(level, "}\n");
} else {
decompile_domain(dwords + i, 3, "CP_SET_DRAW_STATE",
decompile_domain(val, dwords + i, 3, "CP_SET_DRAW_STATE",
"CP_SET_DRAW_STATE", level);
}
}
@ -314,7 +403,7 @@ decompile_commands(uint32_t *dwords, uint32_t sizedwords, int level)
if (!strcmp(packet_name, "CP_LOAD_STATE6_FRAG") ||
!strcmp(packet_name, "CP_LOAD_STATE6_GEOM"))
dom_name = "CP_LOAD_STATE6";
decompile_domain(dwords + 1, count - 1, dom_name, packet_name,
decompile_domain(val, dwords + 1, count - 1, dom_name, packet_name,
level);
} else {
errx(1, "unknown pkt7 %u", val);
@ -332,6 +421,39 @@ decompile_commands(uint32_t *dwords, uint32_t sizedwords, int level)
fprintf(stderr, "**** this ain't right!! dwords_left=%d\n", dwords_left);
}
static void
emit_header()
{
if (!dev_id.gpu_id || !dev_id.chip_id)
return;
static bool emitted = false;
if (emitted)
return;
emitted = true;
printf("#include \"decode/rdcompiler-utils.h\"\n"
"int main(int argc, char **argv)\n"
"{\n"
"\tstruct replay_context ctx;\n"
"\tstruct fd_dev_id dev_id = {%u, %" PRIu64 "};\n"
"\treplay_context_init(&ctx, &dev_id, argc, argv);\n"
"\tstruct cmdstream *cs = ctx.submit_cs;\n\n",
dev_id.gpu_id, dev_id.chip_id);
}
static inline uint32_t
u64_hash(const void *key)
{
return _mesa_hash_data(key, sizeof(uint64_t));
}
static inline bool
u64_compare(const void *key1, const void *key2)
{
return memcmp(key1, key2, sizeof(uint64_t)) == 0;
}
static int
handle_file(const char *filename, uint32_t submit_to_decompile)
{
@ -351,13 +473,9 @@ handle_file(const char *filename, uint32_t submit_to_decompile)
}
init_rnn("a6xx");
printf("#include \"decode/rdcompiler-utils.h\"\n"
"int main(int argc, char **argv)\n"
"{\n"
"\tstruct replay_context ctx;\n"
"\treplay_context_init(&ctx, argc, argv);\n"
"\tstruct cmdstream *cs = ctx.submit_cs;\n\n");
type0_reg = reg_a6xx;
mem_ctx = ralloc_context(NULL);
_mesa_set_init(&decompiled_shaders, mem_ctx, u64_hash, u64_compare);
struct {
unsigned int len;
@ -369,8 +487,6 @@ handle_file(const char *filename, uint32_t submit_to_decompile)
case RD_TEST:
case RD_VERT_SHADER:
case RD_FRAG_SHADER:
case RD_GPU_ID:
case RD_CHIP_ID:
case RD_CMD:
/* no-op */
break;
@ -398,6 +514,16 @@ handle_file(const char *filename, uint32_t submit_to_decompile)
submit++;
break;
}
case RD_GPU_ID: {
dev_id.gpu_id = parse_gpu_id(ps.buf);
emit_header();
break;
}
case RD_CHIP_ID: {
dev_id.chip_id = *(uint64_t *)ps.buf;
emit_header();
break;
}
default:
break;
}