mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-20 06:58:16 +02:00
This replaces all full lisence headers with SPDX identifiers and generally makes things more consistent. I've also dropped the few remaining author tags. If someone wants to know who wrote a bit of code, `git blame` is going to be way more accurate than author tags anyway. Acked-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@intel.com> Acked-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39397>
2606 lines
81 KiB
C
2606 lines
81 KiB
C
/*
|
|
* Copyright (C) 2022-2023 Collabora, Ltd.
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include "util/bitset.h"
|
|
#include "util/hash_table.h"
|
|
#include "util/list.h"
|
|
#include "util/ralloc.h"
|
|
|
|
#include "genxml/gen_macros.h"
|
|
#include "decode.h"
|
|
|
|
#if PAN_ARCH >= 10
|
|
|
|
#include "genxml/cs_builder.h"
|
|
|
|
/* Limit for Mali-G610. -1 because we're not including the active frame */
|
|
#define MAX_CALL_STACK_DEPTH (8 - 1)
|
|
|
|
#define cs_unpack(packed, T, unpacked) pan_cast_and_unpack(packed, T, unpacked)
|
|
|
|
struct queue_ctx {
|
|
/* Size of CSHWIF register file in 32-bit registers */
|
|
unsigned nr_regs;
|
|
|
|
/* CSHWIF register file */
|
|
uint32_t *regs;
|
|
|
|
/* Current instruction pointer (CPU pointer for convenience) */
|
|
uint64_t *ip;
|
|
|
|
/* Current instruction end pointer */
|
|
uint64_t *end;
|
|
|
|
/* Whether currently inside an exception handler */
|
|
bool in_exception_handler;
|
|
|
|
/* Call stack. Depth=0 means root */
|
|
struct {
|
|
/* Link register to return to */
|
|
uint64_t *lr;
|
|
|
|
/* End pointer, there is a return (or exit) after */
|
|
uint64_t *end;
|
|
} call_stack[MAX_CALL_STACK_DEPTH + 1]; /* +1 for exception handler */
|
|
uint8_t call_stack_depth;
|
|
|
|
unsigned gpu_id;
|
|
};
|
|
|
|
static void
|
|
print_indirect(unsigned address, int16_t offset, FILE *fp)
|
|
{
|
|
if (offset)
|
|
fprintf(fp, "[d%u + %d]", address, offset);
|
|
else
|
|
fprintf(fp, "[d%u]", address);
|
|
}
|
|
|
|
static void
|
|
print_reg_tuple(unsigned base, uint16_t mask, FILE *fp)
|
|
{
|
|
bool first_reg = true;
|
|
|
|
u_foreach_bit(i, mask) {
|
|
fprintf(fp, "%sr%u", first_reg ? "" : ":", base + i);
|
|
first_reg = false;
|
|
}
|
|
|
|
if (mask == 0)
|
|
fprintf(fp, "_");
|
|
}
|
|
|
|
static const char *conditions_str[] = {
|
|
"le", "gt", "eq", "ne", "lt", "ge", "always",
|
|
};
|
|
|
|
#if PAN_ARCH >= 11
|
|
static const char *defer_modes_str[] = {
|
|
".defer_immediate",
|
|
".defer_indirect",
|
|
};
|
|
|
|
#define defer_mode_str(I) \
|
|
I.defer_mode < ARRAY_SIZE(defer_modes_str) ? defer_modes_str[I.defer_mode] \
|
|
: ".defer_mode_invalid"
|
|
#else
|
|
#define defer_mode_str(I) ""
|
|
#endif
|
|
|
|
static void
|
|
print_cs_instr(FILE *fp, const uint64_t *instr)
|
|
{
|
|
cs_unpack(instr, CS_BASE, base);
|
|
switch (base.opcode) {
|
|
case MALI_CS_OPCODE_NOP: {
|
|
cs_unpack(instr, CS_NOP, I);
|
|
if (I.ignored)
|
|
fprintf(fp, "NOP // 0x%" PRIX64, I.ignored);
|
|
else
|
|
fprintf(fp, "NOP");
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_MOVE48: {
|
|
cs_unpack(instr, CS_MOVE48, I);
|
|
fprintf(fp, "MOVE48 d%u, #0x%" PRIX64, I.destination, I.immediate);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_MOVE32: {
|
|
cs_unpack(instr, CS_MOVE32, I);
|
|
fprintf(fp, "MOVE32 r%u, #0x%" PRIX64, I.destination, I.immediate);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_WAIT: {
|
|
cs_unpack(instr, CS_WAIT, I);
|
|
fprintf(fp, "WAIT%s #%x", I.progress_increment ? ".progress_inc" : "",
|
|
I.wait_mask);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_COMPUTE: {
|
|
const char *axes[4] = {"x_axis", "y_axis", "z_axis"};
|
|
cs_unpack(instr, CS_RUN_COMPUTE, I);
|
|
|
|
/* Print the instruction. Ignore the selects and the flags override
|
|
* since we'll print them implicitly later.
|
|
*/
|
|
#if PAN_ARCH >= 12
|
|
fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
|
|
I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
|
|
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
|
|
I.task_increment, I.ep_limit);
|
|
#else
|
|
fprintf(fp, "RUN_COMPUTE%s.%s.srt%d.spd%d.tsd%d.fau%d #%u",
|
|
I.progress_increment ? ".progress_inc" : "", axes[I.task_axis],
|
|
I.srt_select, I.spd_select, I.tsd_select, I.fau_select,
|
|
I.task_increment);
|
|
#endif
|
|
break;
|
|
}
|
|
|
|
#if PAN_ARCH == 10
|
|
case MALI_CS_OPCODE_RUN_TILING: {
|
|
cs_unpack(instr, CS_RUN_TILING, I);
|
|
fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d",
|
|
I.progress_increment ? ".progress_inc" : "", I.srt_select,
|
|
I.spd_select, I.tsd_select, I.fau_select);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if PAN_ARCH < 12
|
|
case MALI_CS_OPCODE_RUN_IDVS: {
|
|
cs_unpack(instr, CS_RUN_IDVS, I);
|
|
fprintf(
|
|
fp,
|
|
"RUN_IDVS%s%s%s.varying_srt%d.varying_fau%d.varying_tsd%d.frag_srt%d.frag_tsd%d r%u, #%" PRIx64,
|
|
I.progress_increment ? ".progress_inc" : "",
|
|
I.malloc_enable ? "" : ".no_malloc",
|
|
I.draw_id_register_enable ? ".draw_id_enable" : "",
|
|
I.varying_srt_select, I.varying_fau_select, I.varying_tsd_select,
|
|
I.fragment_srt_select, I.fragment_tsd_select, I.draw_id,
|
|
I.flags_override);
|
|
break;
|
|
}
|
|
#else
|
|
case MALI_CS_OPCODE_RUN_IDVS2: {
|
|
cs_unpack(instr, CS_RUN_IDVS2, I);
|
|
|
|
const char *vertex_shading_str[] = {
|
|
".early",
|
|
".deferred",
|
|
".INVALID",
|
|
".INVALID",
|
|
};
|
|
|
|
fprintf(fp, "RUN_IDVS2%s%s%s%s r%u, #%" PRIx64,
|
|
I.progress_increment ? ".progress_inc" : "",
|
|
I.malloc_enable ? "" : ".no_malloc",
|
|
I.draw_id_register_enable ? ".draw_id_enable" : "",
|
|
vertex_shading_str[I.vertex_shading_mode], I.draw_id,
|
|
I.flags_override);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if PAN_ARCH >= 13
|
|
case MALI_CS_OPCODE_ARITH_IMM32: {
|
|
cs_unpack(instr, CS_ARITH_IMM32_BASE, I);
|
|
|
|
const char *instr_name[] = {
|
|
"ADD_IMM32", "LSHIFT_IMM32", "RSHIFT_IMM_U32", "RSHIFT_IMM_S32",
|
|
"BFEXT_U32", "BFEXT_S32", "BFINS_IMM32", "UMIN_IMM32",
|
|
};
|
|
|
|
fprintf(fp, "%s r%u, r%u, #%" PRId64, instr_name[I.sub_opcode],
|
|
I.destination, I.source, I.immediate);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_IMM64: {
|
|
cs_unpack(instr, CS_ARITH_IMM64_BASE, I);
|
|
|
|
const char *instr_name[] = {
|
|
"ADD_IMM64", "LSHIFT_IMM64", "RSHIFT_IMM_U64", "RSHIFT_IMM_S64",
|
|
"BFEXT_U64", "BFEXT_S64", "BFINS_IMM64", "UMIN_IMM64",
|
|
};
|
|
|
|
fprintf(fp, "%s d%u, d%u, #%" PRId64, instr_name[I.sub_opcode],
|
|
I.destination, I.source, I.immediate);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG32: {
|
|
cs_unpack(instr, CS_ARITH_REG32_BASE, I);
|
|
|
|
const char *instr_name[] = {
|
|
"UMIN32", "ADD32", "SUB32", "LSHIFT32",
|
|
"RSHIFT_U32", "RSHIFT_S32", "BFINS32",
|
|
};
|
|
|
|
fprintf(fp, "%s r%u, r%u, r%u", instr_name[I.sub_opcode], I.destination,
|
|
I.source_1, I.source_0);
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG64: {
|
|
cs_unpack(instr, CS_ARITH_REG64_BASE, I);
|
|
|
|
const char *instr_name[] = {
|
|
"UMIN64", "ADD64", "SUB64", "LSHIFT64",
|
|
"RSHIFT_U64", "RSHIFT_S64", "BFINS64",
|
|
};
|
|
|
|
fprintf(fp, "%s d%u, d%u, d%u", instr_name[I.sub_opcode], I.destination,
|
|
I.source_1, I.source_0);
|
|
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if PAN_ARCH >= 11
|
|
case MALI_CS_OPCODE_LOGIC_OP32: {
|
|
cs_unpack(instr, CS_LOGIC_OP32, I);
|
|
|
|
const char *mode_name[] = {
|
|
".CLEAR", ".AND", ".AND_A_NB", ".MOV_A", ".AND_NA_B", ".MOV_B",
|
|
".XOR", ".OR", ".NOR", ".XNOR", ".NOT_B", ".OR_A_NB",
|
|
".NOT_A", ".OR_NA_B", ".NAND", ".SET",
|
|
};
|
|
|
|
const char *index_name[] = {
|
|
".direct",
|
|
".index",
|
|
};
|
|
|
|
fprintf(fp, "LOGIC_OP32%s r%u, r%u, r%u%s", mode_name[I.mode],
|
|
I.destination, I.source_0, I.source_1, index_name[I.index]);
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_NEXT_SB_ENTRY: {
|
|
cs_unpack(instr, CS_NEXT_SB_ENTRY, I);
|
|
|
|
const char *sb_type_name[] = {
|
|
".no_change", ".endpoint", ".other", ".deferred",
|
|
".INVALID", ".INVALID", ".INVALID", ".INVALID",
|
|
".INVALID", ".INVALID", ".INVALID", ".INVALID",
|
|
".INVALID", ".INVALID", ".INVALID", ".INVALID",
|
|
};
|
|
|
|
const char *format_name[] = {".index", ".mask"};
|
|
|
|
fprintf(fp, "NEXT_SB_ENTRY%s%s r%u", sb_type_name[I.sb_type],
|
|
format_name[I.format], I.destination);
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SET_STATE: {
|
|
cs_unpack(instr, CS_SET_STATE, I);
|
|
|
|
const char *state_name[] = {
|
|
".sb_sel_endpoint", ".sb_sel_other", ".sb_sel_deferred", ".INVALID",
|
|
".INVALID", ".INVALID", ".INVALID", ".INVALID",
|
|
".sb_mask_stream", ".sb_mask_wait",
|
|
};
|
|
|
|
const char *state =
|
|
I.state <= sizeof(state_name) ? state_name[I.state] : ".INVALID";
|
|
|
|
fprintf(fp, "SET_STATE%s r%u", state, I.source);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SET_STATE_IMM32: {
|
|
cs_unpack(instr, CS_SET_STATE_IMM32, I);
|
|
|
|
const char *state_name[] = {
|
|
".sb_sel_endpoint", ".sb_sel_other", ".sb_sel_deferred", ".INVALID",
|
|
".INVALID", ".INVALID", ".INVALID", ".INVALID",
|
|
".sb_mask_stream", ".sb_mask_wait",
|
|
};
|
|
|
|
const char *state =
|
|
I.state <= sizeof(state_name) ? state_name[I.state] : ".INVALID";
|
|
|
|
fprintf(fp, "SET_STATE_IMM32%s #%" PRIu64, state, I.value);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SHARED_SB_INC: {
|
|
cs_unpack(instr, CS_SHARED_SB_INC, I);
|
|
|
|
const char *progress_increment_name[] = {
|
|
".no_increment",
|
|
".increment",
|
|
};
|
|
|
|
fprintf(fp, "SHARED_SB_INC%s%s #%u, #%u",
|
|
progress_increment_name[I.progress_increment],
|
|
defer_mode_str(I), I.sb_mask, I.shared_entry);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SHARED_SB_DEC: {
|
|
cs_unpack(instr, CS_SHARED_SB_DEC, I);
|
|
|
|
const char *progress_increment_name[] = {
|
|
".no_increment",
|
|
".increment",
|
|
};
|
|
|
|
fprintf(fp, "SHARED_SB_DEC%s #%u",
|
|
progress_increment_name[I.progress_increment], I.shared_entry);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case MALI_CS_OPCODE_RUN_FRAGMENT: {
|
|
static const char *tile_order[] = {
|
|
"zorder", "horizontal", "vertical", "unknown",
|
|
"unknown", "rev_horizontal", "rev_vertical", "unknown",
|
|
"unknown", "unknown", "unknown", "unknown",
|
|
"unknown", "unknown", "unknown", "unknown",
|
|
};
|
|
cs_unpack(instr, CS_RUN_FRAGMENT, I);
|
|
|
|
fprintf(fp, "RUN_FRAGMENT%s%s.tile_order=%s",
|
|
I.progress_increment ? ".progress_inc" : "",
|
|
I.enable_tem ? ".tile_enable_map_enable" : "",
|
|
tile_order[I.tile_order]);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
|
|
cs_unpack(instr, CS_RUN_FULLSCREEN, I);
|
|
fprintf(fp, "RUN_FULLSCREEN%s r%u, #%" PRIx64,
|
|
I.progress_increment ? ".progress_inc" : "", I.dcd,
|
|
I.flags_override);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_FINISH_TILING: {
|
|
cs_unpack(instr, CS_FINISH_TILING, I);
|
|
fprintf(fp, "FINISH_TILING%s",
|
|
I.progress_increment ? ".progress_inc" : "");
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_FINISH_FRAGMENT: {
|
|
cs_unpack(instr, CS_FINISH_FRAGMENT, I);
|
|
fprintf(fp, "FINISH_FRAGMENT%s%s d%u, d%u, #%x, #%u",
|
|
I.increment_fragment_completed ? ".frag_end" : "",
|
|
defer_mode_str(I),
|
|
I.last_heap_chunk, I.first_heap_chunk, I.wait_mask,
|
|
I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
#if PAN_ARCH < 13
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
|
|
cs_unpack(instr, CS_ADD_IMM32, I);
|
|
|
|
fprintf(fp, "ADD_IMMEDIATE32 r%u, r%u, #%" PRId64, I.destination,
|
|
I.source, I.immediate);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
|
|
cs_unpack(instr, CS_ADD_IMM64, I);
|
|
|
|
fprintf(fp, "ADD_IMMEDIATE64 d%u, d%u, #%" PRId64, I.destination,
|
|
I.source, I.immediate);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_COMPARE_SELECT32: {
|
|
cs_unpack(instr, CS_UMIN32, I);
|
|
|
|
fprintf(fp, "UMIN32 r%u, r%u, r%u", I.destination, I.source_1,
|
|
I.source_0);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case MALI_CS_OPCODE_LOAD_MULTIPLE: {
|
|
cs_unpack(instr, CS_LOAD_MULTIPLE, I);
|
|
|
|
fprintf(fp, "LOAD_MULTIPLE ");
|
|
print_reg_tuple(I.base_register, I.mask, fp);
|
|
fprintf(fp, ", ");
|
|
print_indirect(I.address, I.offset, fp);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_STORE_MULTIPLE: {
|
|
cs_unpack(instr, CS_STORE_MULTIPLE, I);
|
|
|
|
fprintf(fp, "STORE_MULTIPLE ");
|
|
print_indirect(I.address, I.offset, fp);
|
|
fprintf(fp, ", ");
|
|
print_reg_tuple(I.base_register, I.mask, fp);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_BRANCH: {
|
|
cs_unpack(instr, CS_BRANCH, I);
|
|
fprintf(fp, "BRANCH.%s r%u, #%d", conditions_str[I.condition], I.value,
|
|
I.offset);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SET_SB_ENTRY: {
|
|
cs_unpack(instr, CS_SET_SB_ENTRY, I);
|
|
fprintf(fp, "SET_SB_ENTRY #%u, #%u", I.endpoint_entry, I.other_entry);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_PROGRESS_WAIT: {
|
|
cs_unpack(instr, CS_PROGRESS_WAIT, I);
|
|
fprintf(fp, "PROGRESS_WAIT d%u, #%u", I.source, I.queue);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
|
|
cs_unpack(instr, CS_SET_EXCEPTION_HANDLER, I);
|
|
fprintf(fp, "SET_EXCEPTION_HANDLER d%u, r%u", I.address, I.length);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_CALL: {
|
|
cs_unpack(instr, CS_CALL, I);
|
|
fprintf(fp, "CALL d%u, r%u", I.address, I.length);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_JUMP: {
|
|
cs_unpack(instr, CS_JUMP, I);
|
|
fprintf(fp, "JUMP d%u, r%u", I.address, I.length);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_REQ_RESOURCE: {
|
|
cs_unpack(instr, CS_REQ_RESOURCE, I);
|
|
fprintf(fp, "REQ_RESOURCE%s%s%s%s", I.compute ? ".compute" : "",
|
|
I.fragment ? ".fragment" : "", I.tiler ? ".tiler" : "",
|
|
I.idvs ? ".idvs" : "");
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_FLUSH_CACHE2: {
|
|
cs_unpack(instr, CS_FLUSH_CACHE2, I);
|
|
static const char *mode[] = {
|
|
"nop",
|
|
"clean",
|
|
"INVALID",
|
|
"clean_invalidate",
|
|
};
|
|
|
|
static const char *other_mode[] = {
|
|
"nop_other",
|
|
"INVALID",
|
|
"invalidate_other",
|
|
"INVALID",
|
|
};
|
|
|
|
fprintf(fp, "FLUSH_CACHE2.%s_l2.%s_lsc.%s%s r%u, #%x, #%u",
|
|
mode[I.l2_flush_mode], mode[I.lsc_flush_mode],
|
|
other_mode[I.other_flush_mode], defer_mode_str(I),
|
|
I.latest_flush_id, I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SYNC_ADD32: {
|
|
cs_unpack(instr, CS_SYNC_ADD32, I);
|
|
fprintf(fp, "SYNC_ADD32%s%s%s [d%u], r%u, #%x, #%u",
|
|
I.error_propagate ? ".error_propagate" : "",
|
|
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system",
|
|
defer_mode_str(I), I.address,
|
|
I.data, I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SYNC_SET32: {
|
|
cs_unpack(instr, CS_SYNC_SET32, I);
|
|
fprintf(fp, "SYNC_SET32%s%s%s [d%u], r%u, #%x, #%u",
|
|
I.error_propagate ? ".error_propagate" : "",
|
|
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system",
|
|
defer_mode_str(I), I.address, I.data, I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SYNC_WAIT32: {
|
|
cs_unpack(instr, CS_SYNC_WAIT32, I);
|
|
fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u", conditions_str[I.condition],
|
|
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_STORE_STATE: {
|
|
static const char *states_str[] = {
|
|
"SYSTEM_TIMESTAMP",
|
|
"CYCLE_COUNT",
|
|
"DISJOINT_COUNT",
|
|
"ERROR_STATE",
|
|
};
|
|
|
|
cs_unpack(instr, CS_STORE_STATE, I);
|
|
fprintf(fp, "STORE_STATE.%s%s d%u, #%i, #%x, #%u",
|
|
I.state >= ARRAY_SIZE(states_str) ? "UNKNOWN_STATE"
|
|
: states_str[I.state],
|
|
defer_mode_str(I),
|
|
I.address, I.offset, I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_PROT_REGION: {
|
|
cs_unpack(instr, CS_PROT_REGION, I);
|
|
fprintf(fp, "PROT_REGION #%u", I.size);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_PROGRESS_STORE: {
|
|
cs_unpack(instr, CS_PROGRESS_STORE, I);
|
|
fprintf(fp, "PROGRESS_STORE d%u", I.source);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_PROGRESS_LOAD: {
|
|
cs_unpack(instr, CS_PROGRESS_LOAD, I);
|
|
fprintf(fp, "PROGRESS_LOAD d%u", I.destination);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
|
|
cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
|
|
#if PAN_ARCH >= 12
|
|
fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u, #%u",
|
|
I.progress_increment ? ".progress_inc" : "", I.srt_select,
|
|
I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task,
|
|
I.ep_limit);
|
|
#else
|
|
fprintf(fp, "RUN_COMPUTE_INDIRECT%s.srt%d.spd%d.tsd%d.fau%d #%u",
|
|
I.progress_increment ? ".progress_inc" : "", I.srt_select,
|
|
I.spd_select, I.tsd_select, I.fau_select, I.workgroups_per_task);
|
|
#endif
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ERROR_BARRIER: {
|
|
cs_unpack(instr, CS_ERROR_BARRIER, I);
|
|
fprintf(fp, "ERROR_BARRIER");
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_HEAP_SET: {
|
|
cs_unpack(instr, CS_HEAP_SET, I);
|
|
fprintf(fp, "HEAP_SET d%u", I.address);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_HEAP_OPERATION: {
|
|
cs_unpack(instr, CS_HEAP_OPERATION, I);
|
|
const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"};
|
|
fprintf(fp, "HEAP_OPERATION.%s #%x, #%d", counter_names[I.operation],
|
|
I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_TRACE_POINT: {
|
|
cs_unpack(instr, CS_TRACE_POINT, I);
|
|
fprintf(fp, "TRACE_POINT%s r%d:r%d, #%x, #%u", defer_mode_str(I),
|
|
I.base_register, I.base_register + I.register_count - 1,
|
|
I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SYNC_ADD64: {
|
|
cs_unpack(instr, CS_SYNC_ADD64, I);
|
|
fprintf(fp, "SYNC_ADD64%s%s%s [d%u], d%u, #%x, #%u",
|
|
I.error_propagate ? ".error_propagate" : "",
|
|
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system",
|
|
defer_mode_str(I), I.address, I.data, I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SYNC_SET64: {
|
|
cs_unpack(instr, CS_SYNC_SET64, I);
|
|
fprintf(fp, "SYNC_SET64.%s%s%s [d%u], d%u, #%x, #%u",
|
|
I.error_propagate ? ".error_propagate" : "",
|
|
I.scope == MALI_CS_SYNC_SCOPE_CSG ? ".csg" : ".system",
|
|
defer_mode_str(I), I.address, I.data, I.wait_mask, I.signal_slot);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SYNC_WAIT64: {
|
|
cs_unpack(instr, CS_SYNC_WAIT64, I);
|
|
|
|
fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u", conditions_str[I.condition],
|
|
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
|
|
break;
|
|
}
|
|
|
|
default: {
|
|
fprintf(fp, "UNKNOWN_%u 0x%" PRIX64 "\n", base.opcode, base.data);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static uint32_t
|
|
cs_get_u32(struct queue_ctx *qctx, uint8_t reg)
|
|
{
|
|
assert(reg < qctx->nr_regs);
|
|
return qctx->regs[reg];
|
|
}
|
|
|
|
static uint64_t
|
|
cs_get_u64(struct queue_ctx *qctx, uint8_t reg)
|
|
{
|
|
return (((uint64_t)cs_get_u32(qctx, reg + 1)) << 32) | cs_get_u32(qctx, reg);
|
|
}
|
|
|
|
static void
|
|
pandecode_run_compute(struct pandecode_context *ctx, FILE *fp,
|
|
struct queue_ctx *qctx, struct MALI_CS_RUN_COMPUTE *I)
|
|
{
|
|
if (qctx->in_exception_handler)
|
|
return;
|
|
|
|
ctx->indent++;
|
|
|
|
unsigned reg_srt = 0 + (I->srt_select * 2);
|
|
unsigned reg_fau = 8 + (I->fau_select * 2);
|
|
unsigned reg_spd = 16 + (I->spd_select * 2);
|
|
unsigned reg_tsd = 24 + (I->tsd_select * 2);
|
|
|
|
uint64_t compute_srt = cs_get_u64(qctx, reg_srt);
|
|
|
|
GENX(pandecode_resource_tables)(ctx, compute_srt, "Resources");
|
|
|
|
uint64_t fau = cs_get_u64(qctx, reg_fau);
|
|
|
|
if (fau)
|
|
GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU");
|
|
|
|
GENX(pandecode_shader)
|
|
(ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id);
|
|
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd),
|
|
"Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd));
|
|
|
|
pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32));
|
|
DUMP_CL(ctx, COMPUTE_SIZE_WORKGROUP, &qctx->regs[33], "Workgroup size\n");
|
|
pandecode_log(ctx, "Job offset X: %u\n", cs_get_u32(qctx, 34));
|
|
pandecode_log(ctx, "Job offset Y: %u\n", cs_get_u32(qctx, 35));
|
|
pandecode_log(ctx, "Job offset Z: %u\n", cs_get_u32(qctx, 36));
|
|
pandecode_log(ctx, "Job size X: %u\n", cs_get_u32(qctx, 37));
|
|
pandecode_log(ctx, "Job size Y: %u\n", cs_get_u32(qctx, 38));
|
|
pandecode_log(ctx, "Job size Z: %u\n", cs_get_u32(qctx, 39));
|
|
|
|
ctx->indent--;
|
|
}
|
|
|
|
static void
|
|
pandecode_run_compute_indirect(struct pandecode_context *ctx, FILE *fp,
|
|
struct queue_ctx *qctx,
|
|
struct MALI_CS_RUN_COMPUTE_INDIRECT *I)
|
|
{
|
|
if (qctx->in_exception_handler)
|
|
return;
|
|
|
|
ctx->indent++;
|
|
|
|
unsigned reg_srt = 0 + (I->srt_select * 2);
|
|
unsigned reg_fau = 8 + (I->fau_select * 2);
|
|
unsigned reg_spd = 16 + (I->spd_select * 2);
|
|
unsigned reg_tsd = 24 + (I->tsd_select * 2);
|
|
|
|
uint64_t compute_srt = cs_get_u64(qctx, reg_srt);
|
|
|
|
GENX(pandecode_resource_tables)(ctx, compute_srt, "Resources");
|
|
|
|
uint64_t fau = cs_get_u64(qctx, reg_fau);
|
|
|
|
if (fau)
|
|
GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU");
|
|
|
|
GENX(pandecode_shader)
|
|
(ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id);
|
|
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd),
|
|
"Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd));
|
|
|
|
pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32));
|
|
DUMP_CL(ctx, COMPUTE_SIZE_WORKGROUP, &qctx->regs[33], "Workgroup size\n");
|
|
pandecode_log(ctx, "Job offset X: %u\n", cs_get_u32(qctx, 34));
|
|
pandecode_log(ctx, "Job offset Y: %u\n", cs_get_u32(qctx, 35));
|
|
pandecode_log(ctx, "Job offset Z: %u\n", cs_get_u32(qctx, 36));
|
|
pandecode_log(ctx, "Job size X: %u\n", cs_get_u32(qctx, 37));
|
|
pandecode_log(ctx, "Job size Y: %u\n", cs_get_u32(qctx, 38));
|
|
pandecode_log(ctx, "Job size Z: %u\n", cs_get_u32(qctx, 39));
|
|
|
|
ctx->indent--;
|
|
}
|
|
|
|
#if PAN_ARCH == 10
|
|
static void
|
|
pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp,
|
|
struct queue_ctx *qctx, struct MALI_CS_RUN_TILING *I)
|
|
{
|
|
if (qctx->in_exception_handler)
|
|
return;
|
|
|
|
ctx->indent++;
|
|
|
|
/* Merge flag overrides with the register flags */
|
|
struct mali_primitive_flags_packed tiler_flags_packed = {
|
|
.opaque[0] = cs_get_u32(qctx, 56) | I->flags_override,
|
|
};
|
|
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
|
|
|
|
unsigned reg_srt = I->srt_select * 2;
|
|
unsigned reg_fau = 8 + I->fau_select * 2;
|
|
unsigned reg_spd = 16 + I->spd_select * 2;
|
|
unsigned reg_tsd = 24 + I->tsd_select;
|
|
|
|
uint64_t srt = cs_get_u64(qctx, reg_srt);
|
|
uint64_t fau = cs_get_u64(qctx, reg_fau);
|
|
uint64_t spd = cs_get_u64(qctx, reg_spd);
|
|
uint64_t tsd = cs_get_u64(qctx, reg_tsd);
|
|
|
|
GENX(pandecode_resource_tables)(ctx, srt, "Fragment resources");
|
|
|
|
if (fau) {
|
|
uint64_t lo = fau & BITFIELD64_MASK(48);
|
|
uint64_t hi = fau >> 56;
|
|
|
|
GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU");
|
|
}
|
|
|
|
uint64_t fs_bin_addr = 0;
|
|
if (spd) {
|
|
fs_bin_addr = GENX(pandecode_shader)
|
|
(ctx, spd, "Fragment shader", qctx->gpu_id);
|
|
}
|
|
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, tsd, "Fragment Local Storage @%" PRIx64 ":\n",
|
|
tsd);
|
|
|
|
pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32));
|
|
pandecode_log(ctx, "Index count: %u\n", cs_get_u32(qctx, 33));
|
|
pandecode_log(ctx, "Instance count: %u\n", cs_get_u32(qctx, 34));
|
|
|
|
if (tiler_flags.index_type)
|
|
pandecode_log(ctx, "Index offset: %u\n", cs_get_u32(qctx, 35));
|
|
|
|
pandecode_log(ctx, "Vertex offset: %d\n", cs_get_u32(qctx, 36));
|
|
pandecode_log(ctx, "Tiler DCD flags2: %X\n", cs_get_u32(qctx, 38));
|
|
|
|
if (tiler_flags.index_type)
|
|
pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39));
|
|
|
|
GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
|
|
|
|
DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
|
|
pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44)));
|
|
pandecode_log(ctx, "High depth clamp: %f\n", uif(cs_get_u32(qctx, 45)));
|
|
pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", cs_get_u64(qctx, 46));
|
|
pandecode_log(ctx, "Vertex position array: %" PRIx64 "\n",
|
|
cs_get_u64(qctx, 48));
|
|
|
|
uint64_t blend = cs_get_u64(qctx, 50);
|
|
GENX(pandecode_blend_descs)(ctx, blend & ~15, blend & 15,
|
|
fs_bin_addr, qctx->gpu_id);
|
|
|
|
DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, 52), "Depth/stencil");
|
|
|
|
if (tiler_flags.index_type)
|
|
pandecode_log(ctx, "Indices: %" PRIx64 "\n", cs_get_u64(qctx, 54));
|
|
|
|
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
|
|
DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[57], "DCD Flags 0\n");
|
|
DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[58], "DCD Flags 1\n");
|
|
pandecode_log(ctx, "Vertex bounds: %u\n", cs_get_u32(qctx, 59));
|
|
DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[60], "Primitive size\n");
|
|
|
|
ctx->indent--;
|
|
}
|
|
#endif
|
|
|
|
#if PAN_ARCH >= 12
|
|
static void
|
|
pandecode_run_idvs2(struct pandecode_context *ctx, FILE *fp,
|
|
struct queue_ctx *qctx, struct MALI_CS_RUN_IDVS2 *I)
|
|
{
|
|
if (qctx->in_exception_handler)
|
|
return;
|
|
|
|
ctx->indent++;
|
|
|
|
uint64_t vert_srt = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_SRT);
|
|
uint64_t frag_srt = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SRT);
|
|
uint64_t vert_fau = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_FAU);
|
|
uint64_t fragment_fau = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_FAU);
|
|
uint64_t vertex_spd = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_SPD);
|
|
uint64_t fragment_spd = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD);
|
|
uint64_t vertex_tsd = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_TSD);
|
|
uint64_t fragment_tsd = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_TSD);
|
|
uint32_t global_attribute_offset =
|
|
cs_get_u32(qctx, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET);
|
|
uint32_t index_count = cs_get_u32(qctx, MALI_IDVS_SR_INDEX_COUNT);
|
|
uint32_t instance_count = cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_COUNT);
|
|
uint32_t index_offset = cs_get_u32(qctx, MALI_IDVS_SR_INDEX_OFFSET);
|
|
uint32_t vertex_offset = cs_get_u32(qctx, MALI_IDVS_SR_VERTEX_OFFSET);
|
|
uint32_t instance_offset = cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_OFFSET);
|
|
uint64_t tilder_descriptor_pointer =
|
|
cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX);
|
|
uint64_t vertex_index_array_pointer =
|
|
cs_get_u64(qctx, MALI_IDVS_SR_INDEX_BUFFER);
|
|
uint32_t index_array_size = cs_get_u32(qctx, MALI_IDVS_SR_INDEX_BUFFER_SIZE);
|
|
uint32_t varying_size = cs_get_u32(qctx, MALI_IDVS_SR_VARY_SIZE) & 0xffff;
|
|
uint64_t zsd_pointer = cs_get_u64(qctx, MALI_IDVS_SR_ZSD);
|
|
uint64_t blend = cs_get_u64(qctx, MALI_IDVS_SR_BLEND_DESC);
|
|
uint32_t raw_tiler_flags = cs_get_u32(qctx, MALI_IDVS_SR_TILER_FLAGS);
|
|
uint64_t occlusion_pointer = cs_get_u32(qctx, MALI_IDVS_SR_OQ);
|
|
|
|
/* Merge flag overrides with the register flags */
|
|
struct mali_primitive_flags_packed tiler_flags_packed = {
|
|
.opaque[0] = raw_tiler_flags | I->flags_override,
|
|
};
|
|
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
|
|
|
|
GENX(pandecode_resource_tables)(ctx, vert_srt, "Vertex resources");
|
|
GENX(pandecode_resource_tables)(ctx, frag_srt, "Fragment resources");
|
|
|
|
if (vert_fau) {
|
|
uint64_t lo = vert_fau & BITFIELD64_MASK(48);
|
|
uint64_t hi = vert_fau >> 56;
|
|
|
|
GENX(pandecode_fau)(ctx, lo, hi, "Vertex FAU");
|
|
}
|
|
|
|
if (fragment_fau) {
|
|
uint64_t lo = fragment_fau & BITFIELD64_MASK(48);
|
|
uint64_t hi = fragment_fau >> 56;
|
|
|
|
GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU");
|
|
}
|
|
|
|
if (vertex_spd) {
|
|
GENX(pandecode_shader)
|
|
(ctx, vertex_spd, "Vertex shader", qctx->gpu_id);
|
|
}
|
|
|
|
uint64_t fs_bin_addr = 0;
|
|
if (fragment_spd) {
|
|
fs_bin_addr = GENX(pandecode_shader)
|
|
(ctx, fragment_spd, "Fragment shader", qctx->gpu_id);
|
|
}
|
|
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, vertex_tsd,
|
|
"Vertex Local Storage @%" PRIx64 ":\n", vertex_tsd);
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, fragment_tsd,
|
|
"Fragment Local Storage @%" PRIx64 ":\n", fragment_tsd);
|
|
|
|
pandecode_log(ctx, "Global attribute offset: %u\n", global_attribute_offset);
|
|
pandecode_log(ctx, "Index count: %u\n", index_count);
|
|
pandecode_log(ctx, "Instance count: %u\n", instance_count);
|
|
|
|
if (tiler_flags.index_type)
|
|
pandecode_log(ctx, "Index offset: %u\n", index_offset);
|
|
|
|
pandecode_log(ctx, "Vertex offset: %u\n", vertex_offset);
|
|
pandecode_log(ctx, "Instance offset: %u\n", instance_offset);
|
|
|
|
GENX(pandecode_tiler)(ctx, tilder_descriptor_pointer, qctx->gpu_id);
|
|
|
|
/* If this is true, then the scissor is actually a pointer to an
|
|
* array of boxes; bottom 56 bits are the pointer and top 8 are
|
|
* the length */
|
|
assert(!tiler_flags.scissor_array_enable);
|
|
|
|
struct mali_viewport_packed viewport_packed = {
|
|
.opaque[0] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_HIGH),
|
|
.opaque[1] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_HIGH + 1),
|
|
.opaque[2] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_LOW),
|
|
.opaque[3] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_LOW + 1),
|
|
};
|
|
DUMP_CL(ctx, VIEWPORT, &viewport_packed, "Viewport\n");
|
|
DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_IDVS_SR_SCISSOR_BOX], "Scissor\n");
|
|
|
|
pandecode_log(ctx, "Per-vertex varying size: %u\n", varying_size);
|
|
|
|
DUMP_ADDR(ctx, DEPTH_STENCIL, zsd_pointer, "Depth/stencil");
|
|
|
|
GENX(pandecode_blend_descs)(ctx, blend & ~15, blend & 15,
|
|
fs_bin_addr, qctx->gpu_id);
|
|
|
|
if (tiler_flags.index_type) {
|
|
pandecode_log(ctx, "Indices: %" PRIx64 "\n", vertex_index_array_pointer);
|
|
pandecode_log(ctx, "Index array size: %u\n", index_array_size);
|
|
}
|
|
|
|
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
|
|
DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[MALI_IDVS_SR_DCD0], "DCD Flags 0\n");
|
|
DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[MALI_IDVS_SR_DCD1], "DCD Flags 1\n");
|
|
DUMP_CL(ctx, DCD_FLAGS_2, &qctx->regs[MALI_IDVS_SR_DCD2], "DCD Flags 2\n");
|
|
|
|
#if PAN_ARCH >= 13
|
|
float line_width = cs_get_u32(qctx, MALI_IDVS_SR_LINE_WIDTH);
|
|
pandecode_log(ctx, "Line width: %f\n", line_width);
|
|
#else
|
|
DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[MALI_IDVS_SR_PRIMITIVE_SIZE],
|
|
"Primitive size\n");
|
|
#endif
|
|
|
|
DUMP_CL(ctx, PRIMITIVE_FLAGS_2, &qctx->regs[MALI_IDVS_SR_TILER_FLAGS2],
|
|
"Tiler flags 2\n");
|
|
pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", occlusion_pointer);
|
|
|
|
ctx->indent--;
|
|
}
|
|
#else
|
|
static void
|
|
pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
|
|
struct queue_ctx *qctx, struct MALI_CS_RUN_IDVS *I)
|
|
{
|
|
if (qctx->in_exception_handler)
|
|
return;
|
|
|
|
ctx->indent++;
|
|
|
|
/* Merge flag overrides with the register flags */
|
|
struct mali_primitive_flags_packed tiler_flags_packed = {
|
|
.opaque[0] =
|
|
cs_get_u32(qctx, MALI_IDVS_SR_TILER_FLAGS) | I->flags_override,
|
|
};
|
|
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
|
|
|
|
unsigned reg_position_srt = 0;
|
|
unsigned reg_position_fau = 8;
|
|
unsigned reg_position_tsd = 24;
|
|
|
|
unsigned reg_vary_srt = I->varying_srt_select ? 2 : 0;
|
|
unsigned reg_vary_fau = I->varying_fau_select ? 10 : 8;
|
|
unsigned reg_vary_tsd = I->varying_tsd_select ? 26 : 24;
|
|
|
|
unsigned reg_frag_srt = I->fragment_srt_select ? 4 : 0;
|
|
unsigned reg_frag_fau = 12;
|
|
unsigned reg_frag_tsd = I->fragment_tsd_select ? 28 : 24;
|
|
|
|
uint64_t position_srt = cs_get_u64(qctx, reg_position_srt);
|
|
uint64_t vary_srt = cs_get_u64(qctx, reg_vary_srt);
|
|
uint64_t frag_srt = cs_get_u64(qctx, reg_frag_srt);
|
|
|
|
GENX(pandecode_resource_tables)(ctx, position_srt, "Position resources");
|
|
GENX(pandecode_resource_tables)(ctx, vary_srt, "Varying resources");
|
|
GENX(pandecode_resource_tables)(ctx, frag_srt, "Fragment resources");
|
|
|
|
uint64_t position_fau = cs_get_u64(qctx, reg_position_fau);
|
|
uint64_t vary_fau = cs_get_u64(qctx, reg_vary_fau);
|
|
uint64_t fragment_fau = cs_get_u64(qctx, reg_frag_fau);
|
|
|
|
if (position_fau) {
|
|
uint64_t lo = position_fau & BITFIELD64_MASK(48);
|
|
uint64_t hi = position_fau >> 56;
|
|
|
|
GENX(pandecode_fau)(ctx, lo, hi, "Position FAU");
|
|
}
|
|
|
|
if (vary_fau) {
|
|
uint64_t lo = vary_fau & BITFIELD64_MASK(48);
|
|
uint64_t hi = vary_fau >> 56;
|
|
|
|
GENX(pandecode_fau)(ctx, lo, hi, "Varying FAU");
|
|
}
|
|
|
|
if (fragment_fau) {
|
|
uint64_t lo = fragment_fau & BITFIELD64_MASK(48);
|
|
uint64_t hi = fragment_fau >> 56;
|
|
|
|
GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU");
|
|
}
|
|
|
|
if (cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_POS_SPD)) {
|
|
GENX(pandecode_shader)
|
|
(ctx, cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_POS_SPD), "Position shader",
|
|
qctx->gpu_id);
|
|
}
|
|
|
|
if (tiler_flags.secondary_shader) {
|
|
uint64_t ptr = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_VARY_SPD);
|
|
|
|
GENX(pandecode_shader)(ctx, ptr, "Varying shader", qctx->gpu_id);
|
|
}
|
|
|
|
uint64_t fs_bin_addr = 0;
|
|
if (cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD)) {
|
|
fs_bin_addr = GENX(pandecode_shader)
|
|
(ctx, cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD), "Fragment shader",
|
|
qctx->gpu_id);
|
|
}
|
|
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_position_tsd),
|
|
"Position Local Storage @%" PRIx64 ":\n",
|
|
cs_get_u64(qctx, reg_position_tsd));
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_vary_tsd),
|
|
"Varying Local Storage @%" PRIx64 ":\n",
|
|
cs_get_u64(qctx, reg_vary_tsd));
|
|
DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_frag_tsd),
|
|
"Fragment Local Storage @%" PRIx64 ":\n",
|
|
cs_get_u64(qctx, reg_frag_tsd));
|
|
|
|
pandecode_log(ctx, "Global attribute offset: %u\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET));
|
|
pandecode_log(ctx, "Index count: %u\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_INDEX_COUNT));
|
|
pandecode_log(ctx, "Instance count: %u\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_COUNT));
|
|
|
|
if (tiler_flags.index_type)
|
|
pandecode_log(ctx, "Index offset: %u\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_INDEX_OFFSET));
|
|
|
|
pandecode_log(ctx, "Vertex offset: %d\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_VERTEX_OFFSET));
|
|
pandecode_log(ctx, "Instance offset: %u\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_OFFSET));
|
|
pandecode_log(ctx, "Tiler DCD flags2: %X\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_DCD2));
|
|
|
|
if (tiler_flags.index_type)
|
|
pandecode_log(ctx, "Index array size: %u\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_INDEX_BUFFER_SIZE));
|
|
|
|
GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX),
|
|
qctx->gpu_id);
|
|
|
|
DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_IDVS_SR_SCISSOR_BOX], "Scissor\n");
|
|
pandecode_log(ctx, "Low depth clamp: %f\n",
|
|
uif(cs_get_u32(qctx, MALI_IDVS_SR_LOW_DEPTH_CLAMP)));
|
|
pandecode_log(ctx, "High depth clamp: %f\n",
|
|
uif(cs_get_u32(qctx, MALI_IDVS_SR_HIGH_DEPTH_CLAMP)));
|
|
pandecode_log(ctx, "Occlusion: %" PRIx64 "\n",
|
|
cs_get_u64(qctx, MALI_IDVS_SR_OQ));
|
|
|
|
if (tiler_flags.secondary_shader)
|
|
pandecode_log(ctx, "Varying allocation: %u\n",
|
|
cs_get_u32(qctx, MALI_IDVS_SR_VARY_SIZE));
|
|
|
|
uint64_t blend = cs_get_u64(qctx, MALI_IDVS_SR_BLEND_DESC);
|
|
GENX(pandecode_blend_descs)(ctx, blend & ~15, blend & 15,
|
|
fs_bin_addr, qctx->gpu_id);
|
|
|
|
DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, MALI_IDVS_SR_ZSD),
|
|
"Depth/stencil");
|
|
|
|
if (tiler_flags.index_type)
|
|
pandecode_log(ctx, "Indices: %" PRIx64 "\n",
|
|
cs_get_u64(qctx, MALI_IDVS_SR_INDEX_BUFFER));
|
|
|
|
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
|
|
DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[MALI_IDVS_SR_DCD0], "DCD Flags 0\n");
|
|
DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[MALI_IDVS_SR_DCD1], "DCD Flags 1\n");
|
|
DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[MALI_IDVS_SR_PRIMITIVE_SIZE],
|
|
"Primitive size\n");
|
|
|
|
ctx->indent--;
|
|
}
|
|
#endif
|
|
|
|
static void
|
|
pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
|
|
struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT *I)
|
|
{
|
|
if (qctx->in_exception_handler)
|
|
return;
|
|
|
|
ctx->indent++;
|
|
|
|
DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_FRAGMENT_SR_BBOX_MIN], "Scissor\n");
|
|
|
|
/* TODO: Tile enable map */
|
|
GENX(pandecode_fbd)
|
|
(ctx, cs_get_u64(qctx, MALI_FRAGMENT_SR_FBD_POINTER) & ~0x3full, true,
|
|
qctx->gpu_id);
|
|
|
|
ctx->indent--;
|
|
}
|
|
|
|
static void
|
|
pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp,
|
|
struct queue_ctx *qctx,
|
|
struct MALI_CS_RUN_FULLSCREEN *I)
|
|
{
|
|
if (qctx->in_exception_handler)
|
|
return;
|
|
|
|
ctx->indent++;
|
|
|
|
/* Merge flag overrides with the register flags */
|
|
struct mali_primitive_flags_packed tiler_flags_packed = {
|
|
.opaque[0] = cs_get_u32(qctx, 56) | I->flags_override,
|
|
};
|
|
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
|
|
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
|
|
|
|
GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id);
|
|
|
|
DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n");
|
|
|
|
pan_unpack(
|
|
PANDECODE_PTR(ctx, cs_get_u64(qctx, I->dcd), struct mali_draw_packed),
|
|
DRAW, dcd);
|
|
GENX(pandecode_dcd)(ctx, &dcd, 0, qctx->gpu_id);
|
|
|
|
ctx->indent--;
|
|
}
|
|
|
|
static bool
|
|
interpret_cs_jump(struct pandecode_context *ctx, struct queue_ctx *qctx,
|
|
uint64_t reg_address, uint32_t reg_length)
|
|
{
|
|
uint32_t address_lo = qctx->regs[reg_address];
|
|
uint32_t address_hi = qctx->regs[reg_address + 1];
|
|
uint32_t length = qctx->regs[reg_length];
|
|
|
|
if (length % 8) {
|
|
fprintf(stderr, "CS call alignment error\n");
|
|
return false;
|
|
}
|
|
|
|
/* Map the entire subqueue now */
|
|
uint64_t address = ((uint64_t)address_hi << 32) | address_lo;
|
|
/* Return if the jump is for an exception handler that's set to zero */
|
|
if (qctx->in_exception_handler && (!address || !length)) {
|
|
qctx->in_exception_handler = false;
|
|
qctx->call_stack_depth--;
|
|
return true;
|
|
}
|
|
uint64_t *cs = pandecode_fetch_gpu_mem(ctx, address, length);
|
|
|
|
qctx->ip = cs;
|
|
qctx->end = cs + (length / 8);
|
|
|
|
/* Skip the usual IP update */
|
|
return true;
|
|
}
|
|
|
|
static bool
|
|
eval_cond(struct queue_ctx *qctx, enum mali_cs_condition cond, uint32_t reg)
|
|
{
|
|
int32_t val = qctx->regs[reg];
|
|
|
|
switch (cond) {
|
|
case MALI_CS_CONDITION_LEQUAL:
|
|
return val <= 0;
|
|
case MALI_CS_CONDITION_EQUAL:
|
|
return val == 0;
|
|
case MALI_CS_CONDITION_LESS:
|
|
return val < 0;
|
|
case MALI_CS_CONDITION_GREATER:
|
|
return val > 0;
|
|
case MALI_CS_CONDITION_NEQUAL:
|
|
return val != 0;
|
|
case MALI_CS_CONDITION_GEQUAL:
|
|
return val >= 0;
|
|
case MALI_CS_CONDITION_ALWAYS:
|
|
return true;
|
|
default:
|
|
assert(!"Invalid condition");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
static void
|
|
interpret_cs_branch(struct pandecode_context *ctx, struct queue_ctx *qctx,
|
|
int16_t offset, enum mali_cs_condition cond, uint32_t reg)
|
|
{
|
|
if (eval_cond(qctx, cond, reg))
|
|
qctx->ip += offset;
|
|
}
|
|
|
|
/*
|
|
* Interpret a single instruction of the CS, updating the register file,
|
|
* instruction pointer, and call stack. Memory access and GPU controls are
|
|
* ignored for now.
|
|
*
|
|
* Returns true if execution should continue.
|
|
*/
|
|
static bool
|
|
interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
|
|
{
|
|
FILE *fp = ctx->dump_stream;
|
|
/* Unpack the base so we get the opcode */
|
|
uint8_t *bytes = (uint8_t *)qctx->ip;
|
|
cs_unpack(bytes, CS_BASE, base);
|
|
|
|
assert(qctx->ip < qctx->end);
|
|
|
|
/* Don't try to keep track of registers/operations inside exception handler */
|
|
if (qctx->in_exception_handler) {
|
|
assert(base.opcode != MALI_CS_OPCODE_SET_EXCEPTION_HANDLER);
|
|
goto no_interpret;
|
|
}
|
|
|
|
switch (base.opcode) {
|
|
case MALI_CS_OPCODE_RUN_COMPUTE: {
|
|
cs_unpack(bytes, CS_RUN_COMPUTE, I);
|
|
pandecode_run_compute(ctx, fp, qctx, &I);
|
|
break;
|
|
}
|
|
|
|
#if PAN_ARCH == 10
|
|
case MALI_CS_OPCODE_RUN_TILING: {
|
|
cs_unpack(bytes, CS_RUN_TILING, I);
|
|
pandecode_run_tiling(ctx, fp, qctx, &I);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if PAN_ARCH >= 12
|
|
case MALI_CS_OPCODE_RUN_IDVS2: {
|
|
cs_unpack(bytes, CS_RUN_IDVS2, I);
|
|
pandecode_run_idvs2(ctx, fp, qctx, &I);
|
|
break;
|
|
}
|
|
#else
|
|
case MALI_CS_OPCODE_RUN_IDVS: {
|
|
cs_unpack(bytes, CS_RUN_IDVS, I);
|
|
pandecode_run_idvs(ctx, fp, qctx, &I);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case MALI_CS_OPCODE_RUN_FRAGMENT: {
|
|
cs_unpack(bytes, CS_RUN_FRAGMENT, I);
|
|
pandecode_run_fragment(ctx, fp, qctx, &I);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
|
|
cs_unpack(bytes, CS_RUN_FULLSCREEN, I);
|
|
pandecode_run_fullscreen(ctx, fp, qctx, &I);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
|
|
cs_unpack(bytes, CS_RUN_COMPUTE_INDIRECT, I);
|
|
pandecode_run_compute_indirect(ctx, fp, qctx, &I);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_MOVE48: {
|
|
cs_unpack(bytes, CS_MOVE48, I);
|
|
|
|
qctx->regs[I.destination + 0] = (uint32_t)I.immediate;
|
|
qctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_MOVE32: {
|
|
cs_unpack(bytes, CS_MOVE32, I);
|
|
|
|
qctx->regs[I.destination] = I.immediate;
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_LOAD_MULTIPLE: {
|
|
cs_unpack(bytes, CS_LOAD_MULTIPLE, I);
|
|
uint64_t addr =
|
|
((uint64_t)qctx->regs[I.address + 1] << 32) | qctx->regs[I.address];
|
|
addr += I.offset;
|
|
|
|
uint32_t *src =
|
|
pandecode_fetch_gpu_mem(ctx, addr, util_last_bit(I.mask) * 4);
|
|
|
|
for (uint32_t i = 0; i < 16; i++) {
|
|
if (I.mask & BITFIELD_BIT(i))
|
|
qctx->regs[I.base_register + i] = src[i];
|
|
}
|
|
break;
|
|
}
|
|
|
|
#if PAN_ARCH >= 11
|
|
case MALI_CS_OPCODE_LOGIC_OP32: {
|
|
cs_unpack(bytes, CS_LOGIC_OP32, I);
|
|
|
|
uint32_t *dest = &qctx->regs[I.destination];
|
|
uint32_t source_0 = qctx->regs[I.source_0];
|
|
uint32_t source_1 = qctx->regs[I.source_1];
|
|
uint32_t mode_0 = I.mode & 1;
|
|
uint32_t mode_1 = (I.mode >> 1) & 1;
|
|
uint32_t mode_2 = (I.mode >> 2) & 1;
|
|
uint32_t mode_3 = (I.mode >> 3) & 1;
|
|
|
|
if (I.index == MALI_CS_LOGIC_OP_INDEX_INDEX)
|
|
source_1 = (1 << source_1);
|
|
|
|
uint32_t result = 0;
|
|
for (int i = 0; i < 32; i++) {
|
|
uint32_t a_n = (source_0 >> i) & 1;
|
|
uint32_t b_n = (source_1 >> i) & 1;
|
|
|
|
uint32_t tmp = 0;
|
|
tmp |= mode_0 & a_n & b_n;
|
|
tmp |= mode_1 & a_n & ~b_n;
|
|
tmp |= mode_2 & ~a_n & b_n;
|
|
tmp |= mode_3 & ~a_n & ~b_n;
|
|
result |= tmp << i;
|
|
}
|
|
|
|
*dest = result;
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if PAN_ARCH >= 13
|
|
case MALI_CS_OPCODE_ARITH_IMM32: {
|
|
cs_unpack(bytes, CS_ARITH_IMM32_BASE, I);
|
|
|
|
uint32_t *dest = &qctx->regs[I.destination];
|
|
uint32_t source = qctx->regs[I.source];
|
|
uint32_t imm = I.immediate;
|
|
uint8_t bf_position = imm & 0xff;
|
|
uint8_t bf_width = (imm >> 8) & 0xff;
|
|
uint16_t bf_imm = (imm >> 16) & 0xffff;
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_ADD_IMM32: {
|
|
*dest = source + imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_LSHIFT_IMM32: {
|
|
*dest = source << imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_RSHIFT_IMM_U32: {
|
|
*dest = source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_RSHIFT_IMM_S32: {
|
|
*dest = (int32_t)source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_BFEXT_U32: {
|
|
uint32_t mask = (1 << bf_width) - 1;
|
|
*dest = (source >> bf_position) & mask;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_BFEXT_S32: {
|
|
uint32_t mask = (1 << bf_width) - 1;
|
|
uint32_t tmp = (source >> bf_position) & mask;
|
|
*dest = util_sign_extend(tmp, bf_width);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_BFINS_IMM32: {
|
|
uint32_t mask0 = (1 << bf_width) - 1;
|
|
uint32_t mask1 = mask0 << bf_position;
|
|
uint32_t tmp = bf_imm << bf_position;
|
|
*dest = (tmp & mask1) | (source & ~mask1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_UMIN_IMM32: {
|
|
*dest = MIN2(source, imm);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_IMM32 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG32: {
|
|
cs_unpack(bytes, CS_ARITH_REG32_BASE, I);
|
|
|
|
uint32_t *dest = &qctx->regs[I.destination];
|
|
uint32_t source_0 = qctx->regs[I.source_0];
|
|
uint32_t source_1 = qctx->regs[I.source_1];
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_UMIN32: {
|
|
*dest = MIN2(source_0, source_1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_ADD32: {
|
|
*dest = source_0 + source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_SUB32: {
|
|
*dest = source_0 - source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_LSHIFT32: {
|
|
*dest = source_0 << source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_RSHIFT_U32: {
|
|
*dest = source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_RSHIFT_S32: {
|
|
*dest = (int32_t)source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_BFINS32: {
|
|
uint8_t bf_position = I.immediate & 0xff;
|
|
uint8_t bf_width = (I.immediate >> 8) & 0xff;
|
|
uint32_t mask0 = (1 << bf_width) - 1;
|
|
uint32_t mask1 = mask0 << bf_position;
|
|
uint32_t tmp = source_1 << bf_position;
|
|
*dest = (tmp & mask1) | (source_0 & ~mask1);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_REG32 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_IMM64: {
|
|
cs_unpack(bytes, CS_ARITH_IMM64_BASE, I);
|
|
|
|
uint64_t *dest = (uint64_t *)&qctx->regs[I.destination];
|
|
uint64_t source =
|
|
((uint64_t)qctx->regs[I.source + 1] << 32) | qctx->regs[I.source];
|
|
uint64_t imm = I.immediate;
|
|
uint8_t bf_position = imm & 0xff;
|
|
uint8_t bf_width = (imm >> 8) & 0xff;
|
|
uint16_t bf_imm = (imm >> 16) & 0xffff;
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_ADD_IMM64: {
|
|
*dest = source + imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_LSHIFT_IMM64: {
|
|
*dest = source << imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_RSHIFT_IMM_U64: {
|
|
*dest = source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_RSHIFT_IMM_S64: {
|
|
*dest = (int64_t)source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_BFEXT_U64: {
|
|
uint64_t mask = (1 << bf_width) - 1;
|
|
*dest = (source >> bf_position) & mask;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_BFEXT_S64: {
|
|
uint64_t mask = (1 << bf_width) - 1;
|
|
uint64_t tmp = (source >> bf_position) & mask;
|
|
*dest = util_sign_extend(tmp, bf_width);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_BFINS_IMM64: {
|
|
uint64_t mask0 = (1 << bf_width) - 1;
|
|
uint64_t mask1 = mask0 << bf_position;
|
|
uint64_t tmp = bf_imm << bf_position;
|
|
*dest = (tmp & mask1) | (source & ~mask1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_UMIN_IMM64: {
|
|
*dest = MIN2(source, imm);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_IMM64 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG64: {
|
|
cs_unpack(bytes, CS_ARITH_REG64_BASE, I);
|
|
|
|
uint64_t *dest = (uint64_t *)&qctx->regs[I.destination];
|
|
uint64_t source_0 =
|
|
((uint64_t)qctx->regs[I.source_0 + 1] << 32) | qctx->regs[I.source_0];
|
|
uint64_t source_1 =
|
|
((uint64_t)qctx->regs[I.source_1 + 1] << 32) | qctx->regs[I.source_1];
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_UMIN64: {
|
|
*dest = MIN2(source_0, source_1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_ADD64: {
|
|
*dest = source_0 + source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_SUB64: {
|
|
*dest = source_0 - source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_LSHIFT64: {
|
|
*dest = source_0 << source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_RSHIFT_U64: {
|
|
*dest = source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_RSHIFT_S64: {
|
|
*dest = (int64_t)source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_BFINS64: {
|
|
uint8_t bf_position = I.immediate & 0xff;
|
|
uint8_t bf_width = (I.immediate >> 8) & 0xff;
|
|
uint64_t mask0 = (1 << bf_width) - 1;
|
|
uint64_t mask1 = mask0 << bf_position;
|
|
uint64_t tmp = source_1 << bf_position;
|
|
*dest = (tmp & mask1) | (source_0 & ~mask1);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_REG64 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
#else
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
|
|
cs_unpack(bytes, CS_ADD_IMM32, I);
|
|
|
|
qctx->regs[I.destination] = qctx->regs[I.source] + I.immediate;
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
|
|
cs_unpack(bytes, CS_ADD_IMM64, I);
|
|
|
|
int64_t value =
|
|
(qctx->regs[I.source] | ((int64_t)qctx->regs[I.source + 1] << 32)) +
|
|
I.immediate;
|
|
|
|
qctx->regs[I.destination] = value;
|
|
qctx->regs[I.destination + 1] = value >> 32;
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case MALI_CS_OPCODE_CALL: {
|
|
cs_unpack(bytes, CS_CALL, I);
|
|
|
|
if (qctx->call_stack_depth == MAX_CALL_STACK_DEPTH) {
|
|
fprintf(stderr, "CS call stack overflow\n");
|
|
return false;
|
|
}
|
|
|
|
assert(qctx->call_stack_depth < MAX_CALL_STACK_DEPTH);
|
|
|
|
qctx->ip++;
|
|
|
|
/* Note: tail calls are not optimized in the hardware. */
|
|
assert(qctx->ip <= qctx->end);
|
|
|
|
unsigned depth = qctx->call_stack_depth++;
|
|
|
|
qctx->call_stack[depth].lr = qctx->ip;
|
|
qctx->call_stack[depth].end = qctx->end;
|
|
|
|
return interpret_cs_jump(ctx, qctx, I.address, I.length);
|
|
}
|
|
|
|
case MALI_CS_OPCODE_SET_EXCEPTION_HANDLER: {
|
|
cs_unpack(bytes, CS_SET_EXCEPTION_HANDLER, I);
|
|
|
|
assert(qctx->call_stack_depth < MAX_CALL_STACK_DEPTH);
|
|
|
|
qctx->ip++;
|
|
|
|
/* Note: tail calls are not optimized in the hardware. */
|
|
assert(qctx->ip <= qctx->end);
|
|
|
|
unsigned depth = qctx->call_stack_depth++;
|
|
|
|
qctx->call_stack[depth].lr = qctx->ip;
|
|
qctx->call_stack[depth].end = qctx->end;
|
|
|
|
/* Exception handler can use the full frame stack depth but we don't try
|
|
* to keep track of the nested JUMP/CALL as we don't know what will be
|
|
* the registers/memory content when the handler is triggered. */
|
|
qctx->in_exception_handler = true;
|
|
|
|
return interpret_cs_jump(ctx, qctx, I.address, I.length);
|
|
}
|
|
|
|
case MALI_CS_OPCODE_JUMP: {
|
|
cs_unpack(bytes, CS_JUMP, I);
|
|
|
|
if (qctx->call_stack_depth == 0) {
|
|
fprintf(stderr, "Cannot jump from the entrypoint\n");
|
|
return false;
|
|
}
|
|
|
|
return interpret_cs_jump(ctx, qctx, I.address, I.length);
|
|
}
|
|
|
|
case MALI_CS_OPCODE_BRANCH: {
|
|
cs_unpack(bytes, CS_BRANCH, I);
|
|
|
|
interpret_cs_branch(ctx, qctx, I.offset, I.condition, I.value);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
no_interpret:
|
|
|
|
/* Update IP first to point to the next instruction, so call doesn't
|
|
* require special handling (even for tail calls).
|
|
*/
|
|
qctx->ip++;
|
|
|
|
while (qctx->ip == qctx->end) {
|
|
/* Graceful termination */
|
|
if (qctx->call_stack_depth == 0)
|
|
return false;
|
|
|
|
/* Pop off the call stack */
|
|
unsigned old_depth = --qctx->call_stack_depth;
|
|
|
|
qctx->ip = qctx->call_stack[old_depth].lr;
|
|
qctx->end = qctx->call_stack[old_depth].end;
|
|
qctx->in_exception_handler = false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void
|
|
GENX(pandecode_interpret_cs)(struct pandecode_context *ctx, uint64_t queue,
|
|
uint32_t size, unsigned gpu_id, uint32_t *regs)
|
|
{
|
|
pandecode_dump_file_open(ctx);
|
|
|
|
uint64_t *cs = pandecode_fetch_gpu_mem(ctx, queue, size);
|
|
|
|
/* v10 has 96 registers. v12+ have 128. */
|
|
struct queue_ctx qctx = {
|
|
.nr_regs = PAN_ARCH >= 12 ? 128 : 96,
|
|
.regs = regs,
|
|
.ip = cs,
|
|
.end = cs + (size / 8),
|
|
.gpu_id = gpu_id,
|
|
|
|
/* If this is a kernel mode queue, we don't see the root ring buffer and
|
|
* we must adjust the initial call stack depth accordingly.
|
|
*/
|
|
.call_stack_depth = ctx->usermode_queue ? 0 : 1,
|
|
};
|
|
FILE *fp = ctx->dump_stream;
|
|
|
|
if (size) {
|
|
do {
|
|
uint64_t instr = *qctx.ip;
|
|
|
|
fprintf(fp, " ");
|
|
for (unsigned b = 0; b < 8; ++b)
|
|
fprintf(fp, " %02x", (uint8_t)(instr >> (8 * b)));
|
|
|
|
for (int i = 0; i < 1 + qctx.call_stack_depth; ++i)
|
|
fprintf(fp, " ");
|
|
|
|
print_cs_instr(fp, qctx.ip);
|
|
fprintf(fp, "\n");
|
|
} while (interpret_cs_instr(ctx, &qctx));
|
|
}
|
|
|
|
fflush(ctx->dump_stream);
|
|
pandecode_map_read_write(ctx);
|
|
}
|
|
|
|
struct cs_code_block {
|
|
struct list_head node;
|
|
unsigned start;
|
|
unsigned size;
|
|
struct util_dynarray predecessors;
|
|
unsigned successors[2];
|
|
};
|
|
|
|
struct cs_indirect_branch_target {
|
|
uint64_t address;
|
|
uint32_t length;
|
|
};
|
|
|
|
struct cs_indirect_branch {
|
|
unsigned instr_idx;
|
|
bool has_unknown_targets;
|
|
struct util_dynarray targets;
|
|
};
|
|
|
|
struct cs_code_cfg {
|
|
uint64_t *instrs;
|
|
unsigned instr_count;
|
|
struct cs_code_block **blk_map;
|
|
struct util_dynarray indirect_branches;
|
|
};
|
|
|
|
static struct cs_code_block *
|
|
cs_code_block_alloc(void *alloc_ctx, unsigned start, unsigned size)
|
|
{
|
|
struct cs_code_block *block = rzalloc(alloc_ctx, struct cs_code_block);
|
|
|
|
block->start = start;
|
|
block->size = size;
|
|
memset(block->successors, ~0, sizeof(block->successors));
|
|
list_inithead(&block->node);
|
|
util_dynarray_init(&block->predecessors, alloc_ctx);
|
|
return block;
|
|
}
|
|
|
|
static void
|
|
record_indirect_branch_target(struct cs_code_cfg *cfg,
|
|
struct list_head *blk_stack,
|
|
struct cs_code_block *cur_blk, unsigned blk_offs,
|
|
struct cs_indirect_branch *ibranch)
|
|
{
|
|
union {
|
|
uint32_t u32[256];
|
|
uint64_t u64[128];
|
|
} reg_file = {0};
|
|
|
|
list_add(&cur_blk->node, blk_stack);
|
|
list_for_each_entry(struct cs_code_block, blk, blk_stack, node) {
|
|
for (; blk_offs < blk->size &&
|
|
blk->start + blk_offs != ibranch->instr_idx;
|
|
blk_offs++) {
|
|
const uint64_t *instr = &cfg->instrs[blk->start + blk_offs];
|
|
cs_unpack(instr, CS_BASE, base);
|
|
switch (base.opcode) {
|
|
case MALI_CS_OPCODE_MOVE48: {
|
|
cs_unpack(instr, CS_MOVE48, I);
|
|
|
|
assert(I.destination % 2 == 0 &&
|
|
"Destination register should be aligned to 2");
|
|
|
|
reg_file.u64[I.destination / 2] = I.immediate;
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_MOVE32: {
|
|
cs_unpack(instr, CS_MOVE32, I);
|
|
reg_file.u32[I.destination] = I.immediate;
|
|
break;
|
|
}
|
|
|
|
#if PAN_ARCH >= 11
|
|
case MALI_CS_OPCODE_LOGIC_OP32: {
|
|
cs_unpack(instr, CS_LOGIC_OP32, I);
|
|
|
|
uint32_t *dest = ®_file.u32[I.destination];
|
|
uint32_t source_0 = reg_file.u32[I.source_0];
|
|
uint32_t source_1 = reg_file.u32[I.source_1];
|
|
uint32_t mode_0 = I.mode & 1;
|
|
uint32_t mode_1 = (I.mode >> 1) & 1;
|
|
uint32_t mode_2 = (I.mode >> 2) & 1;
|
|
uint32_t mode_3 = (I.mode >> 3) & 1;
|
|
|
|
if (I.index == MALI_CS_LOGIC_OP_INDEX_INDEX)
|
|
source_1 = (1 << source_1);
|
|
|
|
uint32_t result = 0;
|
|
for (int i = 0; i < 32; i++) {
|
|
uint32_t a_n = (source_0 >> i) & 1;
|
|
uint32_t b_n = (source_1 >> i) & 1;
|
|
|
|
uint32_t tmp = 0;
|
|
tmp |= mode_0 & a_n & b_n;
|
|
tmp |= mode_1 & a_n & ~b_n;
|
|
tmp |= mode_2 & ~a_n & b_n;
|
|
tmp |= mode_3 & ~a_n & ~b_n;
|
|
result |= tmp << i;
|
|
}
|
|
|
|
*dest = result;
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
#if PAN_ARCH >= 13
|
|
case MALI_CS_OPCODE_ARITH_IMM32: {
|
|
cs_unpack(instr, CS_ARITH_IMM32_BASE, I);
|
|
|
|
uint32_t *dest = ®_file.u32[I.destination];
|
|
uint32_t source = reg_file.u32[I.source];
|
|
uint32_t imm = I.immediate;
|
|
uint8_t bf_position = imm & 0xff;
|
|
uint8_t bf_width = (imm >> 8) & 0xff;
|
|
uint16_t bf_imm = (imm >> 16) & 0xffff;
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_ADD_IMM32: {
|
|
*dest = source + imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_LSHIFT_IMM32: {
|
|
*dest = source << imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_RSHIFT_IMM_U32: {
|
|
*dest = source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_RSHIFT_IMM_S32: {
|
|
*dest = (int32_t)source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_BFEXT_U32: {
|
|
uint32_t mask = (1 << bf_width) - 1;
|
|
*dest = (source >> bf_position) & mask;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_BFEXT_S32: {
|
|
uint32_t mask = (1 << bf_width) - 1;
|
|
uint32_t tmp = (source >> bf_position) & mask;
|
|
*dest = util_sign_extend(tmp, bf_width);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_BFINS_IMM32: {
|
|
uint32_t mask0 = (1 << bf_width) - 1;
|
|
uint32_t mask1 = mask0 << bf_position;
|
|
uint32_t tmp = bf_imm << bf_position;
|
|
*dest = (tmp & mask1) | (source & ~mask1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM32_SUB_OPCODE_UMIN_IMM32: {
|
|
*dest = MIN2(source, imm);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_IMM32 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG32: {
|
|
cs_unpack(instr, CS_ARITH_REG32_BASE, I);
|
|
|
|
uint32_t *dest = ®_file.u32[I.destination];
|
|
uint32_t source_0 = reg_file.u32[I.source_0];
|
|
uint32_t source_1 = reg_file.u32[I.source_1];
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_UMIN32: {
|
|
*dest = MIN2(source_0, source_1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_ADD32: {
|
|
*dest = source_0 + source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_SUB32: {
|
|
*dest = source_0 - source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_LSHIFT32: {
|
|
*dest = source_0 << source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_RSHIFT_U32: {
|
|
*dest = source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_RSHIFT_S32: {
|
|
*dest = (int32_t)source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG32_SUB_OPCODE_BFINS32: {
|
|
uint8_t bf_position = I.immediate & 0xff;
|
|
uint8_t bf_width = (I.immediate >> 8) & 0xff;
|
|
uint32_t mask0 = (1 << bf_width) - 1;
|
|
uint32_t mask1 = mask0 << bf_position;
|
|
uint32_t tmp = source_1 << bf_position;
|
|
*dest = (tmp & mask1) | (source_0 & ~mask1);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_REG32 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_IMM64: {
|
|
cs_unpack(instr, CS_ARITH_IMM64_BASE, I);
|
|
|
|
uint64_t *dest = ®_file.u64[I.destination / 2];
|
|
uint64_t source = reg_file.u64[I.source / 2];
|
|
uint64_t imm = I.immediate;
|
|
uint8_t bf_position = imm & 0xff;
|
|
uint8_t bf_width = (imm >> 8) & 0xff;
|
|
uint16_t bf_imm = (imm >> 16) & 0xffff;
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_ADD_IMM64: {
|
|
*dest = source + imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_LSHIFT_IMM64: {
|
|
*dest = source << imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_RSHIFT_IMM_U64: {
|
|
*dest = source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_RSHIFT_IMM_S64: {
|
|
*dest = (int64_t)source >> imm;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_BFEXT_U64: {
|
|
uint64_t mask = (1 << bf_width) - 1;
|
|
*dest = (source >> bf_position) & mask;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_BFEXT_S64: {
|
|
uint64_t mask = (1 << bf_width) - 1;
|
|
uint64_t tmp = (source >> bf_position) & mask;
|
|
*dest = util_sign_extend(tmp, bf_width);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_BFINS_IMM64: {
|
|
uint64_t mask0 = (1 << bf_width) - 1;
|
|
uint64_t mask1 = mask0 << bf_position;
|
|
uint64_t tmp = bf_imm << bf_position;
|
|
*dest = (tmp & mask1) | (source & ~mask1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_IMM64_SUB_OPCODE_UMIN_IMM64: {
|
|
*dest = MIN2(source, imm);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_IMM64 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG64: {
|
|
cs_unpack(instr, CS_ARITH_REG64_BASE, I);
|
|
|
|
uint64_t *dest = ®_file.u64[I.destination];
|
|
uint64_t source_0 = reg_file.u64[I.source_0 / 2];
|
|
uint64_t source_1 = reg_file.u64[I.source_1 / 2];
|
|
|
|
switch (I.sub_opcode) {
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_UMIN64: {
|
|
*dest = MIN2(source_0, source_1);
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_ADD64: {
|
|
*dest = source_0 + source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_SUB64: {
|
|
*dest = source_0 - source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_LSHIFT64: {
|
|
*dest = source_0 << source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_RSHIFT_U64: {
|
|
*dest = source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_RSHIFT_S64: {
|
|
*dest = (int64_t)source_0 >> source_1;
|
|
break;
|
|
}
|
|
case MALI_CS_ARITH_REG64_SUB_OPCODE_BFINS64: {
|
|
uint8_t bf_position = I.immediate & 0xff;
|
|
uint8_t bf_width = (I.immediate >> 8) & 0xff;
|
|
uint64_t mask0 = (1 << bf_width) - 1;
|
|
uint64_t mask1 = mask0 << bf_position;
|
|
uint64_t tmp = source_1 << bf_position;
|
|
*dest = (tmp & mask1) | (source_0 & ~mask1);
|
|
break;
|
|
}
|
|
default:
|
|
assert(0 && "unhandled ARITH_REG64 subopcode");
|
|
}
|
|
|
|
break;
|
|
}
|
|
#else
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
|
|
cs_unpack(instr, CS_ADD_IMM32, I);
|
|
reg_file.u32[I.destination] = reg_file.u32[I.source] + I.immediate;
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
|
|
cs_unpack(instr, CS_ADD_IMM64, I);
|
|
|
|
assert(I.destination % 2 == 0 &&
|
|
"Destination register should be aligned to 2");
|
|
assert(I.source % 2 == 0 &&
|
|
"Source register should be aligned to 2");
|
|
|
|
reg_file.u64[I.destination / 2] =
|
|
reg_file.u64[I.source / 2] + I.immediate;
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_COMPARE_SELECT32: {
|
|
cs_unpack(instr, CS_UMIN32, I);
|
|
reg_file.u32[I.destination] =
|
|
MIN2(reg_file.u32[I.source_1], reg_file.u32[I.source_0]);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
blk_offs = 0;
|
|
}
|
|
list_delinit(&cur_blk->node);
|
|
|
|
uint64_t *instr = &cfg->instrs[ibranch->instr_idx];
|
|
cs_unpack(instr, CS_JUMP, I);
|
|
|
|
assert(I.address % 2 == 0 && "Address register should be aligned to 2");
|
|
|
|
struct cs_indirect_branch_target target = {
|
|
.address = reg_file.u64[I.address / 2],
|
|
.length = reg_file.u32[I.length],
|
|
};
|
|
|
|
util_dynarray_append(&ibranch->targets, target);
|
|
}
|
|
|
|
static void
|
|
collect_indirect_branch_targets_recurse(struct cs_code_cfg *cfg,
|
|
struct list_head *blk_stack,
|
|
BITSET_WORD *track_map,
|
|
struct cs_code_block *cur_blk,
|
|
int instr_ptr,
|
|
struct cs_indirect_branch *ibranch)
|
|
{
|
|
for (; instr_ptr >= (int)cur_blk->start; instr_ptr--) {
|
|
assert(instr_ptr >= 0);
|
|
const uint64_t *instr = &cfg->instrs[instr_ptr];
|
|
cs_unpack(instr, CS_BASE, base);
|
|
switch (base.opcode) {
|
|
case MALI_CS_OPCODE_MOVE48: {
|
|
cs_unpack(instr, CS_MOVE48, I);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
BITSET_CLEAR(track_map, I.destination + 1);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_MOVE32: {
|
|
cs_unpack(instr, CS_MOVE32, I);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
break;
|
|
}
|
|
|
|
#if PAN_ARCH >= 13
|
|
case MALI_CS_OPCODE_ARITH_IMM32: {
|
|
cs_unpack(instr, CS_ARITH_IMM32_BASE, I);
|
|
if (BITSET_TEST(track_map, I.destination)) {
|
|
BITSET_SET(track_map, I.source);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_IMM64: {
|
|
cs_unpack(instr, CS_ARITH_IMM64_BASE, I);
|
|
if (BITSET_TEST(track_map, I.destination)) {
|
|
BITSET_SET(track_map, I.source);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
}
|
|
if (BITSET_TEST(track_map, I.destination + 1)) {
|
|
BITSET_SET(track_map, I.source + 1);
|
|
BITSET_CLEAR(track_map, I.destination + 1);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG32: {
|
|
cs_unpack(instr, CS_ARITH_REG32_BASE, I);
|
|
if (BITSET_TEST(track_map, I.destination)) {
|
|
BITSET_SET(track_map, I.source_1);
|
|
BITSET_SET(track_map, I.source_0);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ARITH_REG64: {
|
|
cs_unpack(instr, CS_ARITH_REG64_BASE, I);
|
|
if (BITSET_TEST(track_map, I.destination)) {
|
|
BITSET_SET(track_map, I.source_1);
|
|
BITSET_SET(track_map, I.source_0);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
}
|
|
if (BITSET_TEST(track_map, I.destination + 1)) {
|
|
BITSET_SET(track_map, I.source_1 + 1);
|
|
BITSET_SET(track_map, I.source_0 + 1);
|
|
BITSET_CLEAR(track_map, I.destination + 1);
|
|
}
|
|
break;
|
|
}
|
|
#else
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
|
|
cs_unpack(instr, CS_ADD_IMM32, I);
|
|
if (BITSET_TEST(track_map, I.destination)) {
|
|
BITSET_SET(track_map, I.source);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_ADD_IMMEDIATE64: {
|
|
cs_unpack(instr, CS_ADD_IMM64, I);
|
|
if (BITSET_TEST(track_map, I.destination)) {
|
|
BITSET_SET(track_map, I.source);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
}
|
|
if (BITSET_TEST(track_map, I.destination + 1)) {
|
|
BITSET_SET(track_map, I.source + 1);
|
|
BITSET_CLEAR(track_map, I.destination + 1);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_COMPARE_SELECT32: {
|
|
cs_unpack(instr, CS_UMIN32, I);
|
|
if (BITSET_TEST(track_map, I.destination)) {
|
|
BITSET_SET(track_map, I.source_1);
|
|
BITSET_SET(track_map, I.source_0);
|
|
BITSET_CLEAR(track_map, I.destination);
|
|
}
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case MALI_CS_OPCODE_LOAD_MULTIPLE: {
|
|
cs_unpack(instr, CS_LOAD_MULTIPLE, I);
|
|
for (unsigned i = 0; i < 16; i++) {
|
|
if ((I.mask & BITFIELD_BIT(i)) &&
|
|
BITSET_TEST(track_map, I.base_register + i)) {
|
|
ibranch->has_unknown_targets = true;
|
|
return;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_PROGRESS_LOAD: {
|
|
cs_unpack(instr, CS_PROGRESS_LOAD, I);
|
|
for (unsigned i = 0; i < 16; i++) {
|
|
if (BITSET_TEST(track_map, I.destination) ||
|
|
BITSET_TEST(track_map, I.destination + 1)) {
|
|
ibranch->has_unknown_targets = true;
|
|
return;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (__bitset_is_empty(track_map, BITSET_WORDS(256))) {
|
|
record_indirect_branch_target(cfg, blk_stack, cur_blk,
|
|
instr_ptr - cur_blk->start, ibranch);
|
|
return;
|
|
}
|
|
}
|
|
|
|
assert(!__bitset_is_empty(track_map, BITSET_WORDS(256)));
|
|
|
|
if (util_dynarray_num_elements(&cur_blk->predecessors, unsigned) == 0) {
|
|
ibranch->has_unknown_targets = true;
|
|
return;
|
|
}
|
|
|
|
list_add(&cur_blk->node, blk_stack);
|
|
util_dynarray_foreach(&cur_blk->predecessors, unsigned, pred) {
|
|
struct cs_code_block *prev_blk = cfg->blk_map[*pred];
|
|
|
|
/* If the node is already in the block stack, we skip it
|
|
* and consider this path leading to an unknown target. */
|
|
if (!list_is_empty(&cur_blk->node)) {
|
|
ibranch->has_unknown_targets = true;
|
|
continue;
|
|
}
|
|
|
|
collect_indirect_branch_targets_recurse(
|
|
cfg, blk_stack, track_map, prev_blk,
|
|
prev_blk->start + prev_blk->size - 1, ibranch);
|
|
}
|
|
list_delinit(&cur_blk->node);
|
|
|
|
return;
|
|
}
|
|
|
|
static void
|
|
collect_indirect_branch_targets(struct cs_code_cfg *cfg,
|
|
struct cs_indirect_branch *ibranch)
|
|
{
|
|
uint64_t *instr = &cfg->instrs[ibranch->instr_idx];
|
|
struct cs_code_block *cur_blk = cfg->blk_map[ibranch->instr_idx];
|
|
struct list_head blk_stack;
|
|
BITSET_DECLARE(track_map, 256) = {0};
|
|
|
|
list_inithead(&blk_stack);
|
|
|
|
cs_unpack(instr, CS_JUMP, I);
|
|
BITSET_SET(track_map, I.address);
|
|
BITSET_SET(track_map, I.address + 1);
|
|
BITSET_SET(track_map, I.length);
|
|
|
|
collect_indirect_branch_targets_recurse(cfg, &blk_stack, track_map, cur_blk,
|
|
ibranch->instr_idx - 1, ibranch);
|
|
}
|
|
|
|
static struct cs_code_cfg *
|
|
get_cs_cfg(struct pandecode_context *ctx, struct hash_table_u64 *symbols,
|
|
uint64_t bin, uint32_t bin_size)
|
|
{
|
|
uint32_t instr_count = bin_size / sizeof(uint64_t);
|
|
struct cs_code_cfg *cfg = _mesa_hash_table_u64_search(symbols, bin);
|
|
|
|
if (cfg) {
|
|
assert(cfg->instr_count == instr_count);
|
|
return cfg;
|
|
}
|
|
|
|
uint64_t *instrs = pandecode_fetch_gpu_mem(ctx, bin, bin_size);
|
|
|
|
cfg = rzalloc(symbols, struct cs_code_cfg);
|
|
_mesa_hash_table_u64_insert(symbols, bin, cfg);
|
|
|
|
util_dynarray_init(&cfg->indirect_branches, cfg);
|
|
|
|
cfg->blk_map = rzalloc_array(cfg, struct cs_code_block *, instr_count);
|
|
cfg->instrs = instrs;
|
|
cfg->instr_count = instr_count;
|
|
|
|
struct cs_code_block *block = cs_code_block_alloc(cfg, 0, 0);
|
|
|
|
for (unsigned i = 0; i < instr_count; i++) {
|
|
const uint64_t *instr = &instrs[i];
|
|
|
|
if (!cfg->blk_map[i]) {
|
|
cfg->blk_map[i] = block;
|
|
block->size++;
|
|
} else {
|
|
if (block->successors[0] == ~0)
|
|
block->successors[0] = i;
|
|
|
|
block = cfg->blk_map[i];
|
|
util_dynarray_append(&block->predecessors, i - 1);
|
|
}
|
|
|
|
cs_unpack(instr, CS_BASE, base);
|
|
|
|
if (base.opcode == MALI_CS_OPCODE_JUMP ||
|
|
base.opcode == MALI_CS_OPCODE_CALL) {
|
|
struct cs_indirect_branch ibranch = {
|
|
.instr_idx = i,
|
|
};
|
|
|
|
util_dynarray_append(&cfg->indirect_branches, ibranch);
|
|
}
|
|
|
|
if (base.opcode != MALI_CS_OPCODE_BRANCH)
|
|
continue;
|
|
|
|
cs_unpack(instr, CS_BRANCH, I);
|
|
|
|
unsigned target = MIN2(i + 1 + I.offset, instr_count);
|
|
|
|
/* If the target of the branch is the next instruction, it's just a NOP,
|
|
* and we consider it the same block. */
|
|
if (target == i + 1)
|
|
continue;
|
|
|
|
if (I.offset < 0 && cfg->blk_map[target]->start != target) {
|
|
struct cs_code_block *old = cfg->blk_map[target];
|
|
struct cs_code_block *new =
|
|
cs_code_block_alloc(cfg, target, old->start + old->size - target);
|
|
|
|
util_dynarray_append(&new->predecessors, target - 1);
|
|
memcpy(&new->successors, &old->successors, sizeof(new->successors));
|
|
|
|
old->successors[0] = target;
|
|
old->successors[1] = ~0;
|
|
old->size = new->start - old->start;
|
|
|
|
for (unsigned j = 0; j <= new->size; j++)
|
|
cfg->blk_map[new->start + j] = new;
|
|
}
|
|
|
|
if (I.offset > 0 && target < instr_count && !cfg->blk_map[target]) {
|
|
struct cs_code_block *new = cs_code_block_alloc(cfg, target, 1);
|
|
|
|
cfg->blk_map[target] = new;
|
|
util_dynarray_append(&new->predecessors, i);
|
|
}
|
|
|
|
block->successors[0] = target;
|
|
if (I.condition != MALI_CS_CONDITION_ALWAYS)
|
|
block->successors[1] = i + 1;
|
|
|
|
block = cs_code_block_alloc(cfg, i + 1, 0);
|
|
|
|
if (target == i + 1 || I.condition != MALI_CS_CONDITION_ALWAYS) {
|
|
util_dynarray_append(&block->predecessors, i);
|
|
}
|
|
}
|
|
|
|
util_dynarray_foreach(&cfg->indirect_branches, struct cs_indirect_branch,
|
|
ibranch)
|
|
{
|
|
collect_indirect_branch_targets(cfg, ibranch);
|
|
util_dynarray_foreach(&ibranch->targets, struct cs_indirect_branch_target,
|
|
target)
|
|
{
|
|
get_cs_cfg(ctx, symbols, target->address, target->length);
|
|
}
|
|
}
|
|
|
|
return cfg;
|
|
}
|
|
|
|
static void
|
|
print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
|
|
struct cs_code_cfg *cfg, const char *name)
|
|
{
|
|
pandecode_log(ctx, "%s@%" PRIx64 "{\n", name, bin);
|
|
unsigned ibranch_idx = 0;
|
|
|
|
ctx->indent++;
|
|
for (unsigned i = 0; i < cfg->instr_count; i++) {
|
|
if (i && cfg->blk_map[i - 1] != cfg->blk_map[i]) {
|
|
ctx->indent--;
|
|
pandecode_log(ctx, "label_%" PRIx64 ":\n", bin + i * sizeof(uint64_t));
|
|
ctx->indent++;
|
|
}
|
|
|
|
pandecode_make_indent(ctx);
|
|
print_cs_instr(ctx->dump_stream, &cfg->instrs[i]);
|
|
cs_unpack(&cfg->instrs[i], CS_BASE, base);
|
|
switch (base.opcode) {
|
|
case MALI_CS_OPCODE_JUMP:
|
|
case MALI_CS_OPCODE_CALL: {
|
|
struct cs_indirect_branch *ibranch = util_dynarray_element(
|
|
&cfg->indirect_branches, struct cs_indirect_branch, ibranch_idx);
|
|
|
|
assert(ibranch->instr_idx == i);
|
|
fprintf(ctx->dump_stream, " // ");
|
|
util_dynarray_foreach(&ibranch->targets,
|
|
struct cs_indirect_branch_target, target)
|
|
{
|
|
fprintf(ctx->dump_stream, "%scs@%" PRIx64,
|
|
target == ibranch->targets.data ? "" : ",",
|
|
target->address);
|
|
}
|
|
if (ibranch->has_unknown_targets)
|
|
fprintf(ctx->dump_stream, "%s??", ibranch->targets.size ? "," : "");
|
|
ibranch_idx++;
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_BRANCH: {
|
|
cs_unpack(&cfg->instrs[i], CS_BRANCH, I);
|
|
fprintf(ctx->dump_stream, " // ");
|
|
|
|
unsigned target = i + 1 + I.offset;
|
|
|
|
if (target < cfg->instr_count)
|
|
fprintf(ctx->dump_stream, "label_%" PRIx64,
|
|
bin + (target * sizeof(uint64_t)));
|
|
else
|
|
fprintf(ctx->dump_stream, "end_of_cs");
|
|
break;
|
|
}
|
|
|
|
#if PAN_ARCH >= 12
|
|
case MALI_CS_OPCODE_RUN_IDVS2:
|
|
#else
|
|
case MALI_CS_OPCODE_RUN_IDVS:
|
|
#endif
|
|
case MALI_CS_OPCODE_RUN_FRAGMENT:
|
|
case MALI_CS_OPCODE_RUN_FULLSCREEN:
|
|
case MALI_CS_OPCODE_RUN_COMPUTE:
|
|
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
|
|
fprintf(ctx->dump_stream, " // tracepoint_%" PRIx64,
|
|
bin + (i * sizeof(uint64_t)));
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
fprintf(ctx->dump_stream, "\n");
|
|
}
|
|
ctx->indent--;
|
|
pandecode_log(ctx, "} // %s@%" PRIx64 "\n\n", name, bin);
|
|
}
|
|
|
|
void
|
|
GENX(pandecode_cs_binary)(struct pandecode_context *ctx, uint64_t bin,
|
|
uint32_t bin_size, unsigned gpu_id)
|
|
{
|
|
if (!bin_size)
|
|
return;
|
|
|
|
pandecode_dump_file_open(ctx);
|
|
|
|
struct hash_table_u64 *symbols = _mesa_hash_table_u64_create(NULL);
|
|
struct cs_code_cfg *main_cfg = get_cs_cfg(ctx, symbols, bin, bin_size);
|
|
|
|
print_cs_binary(ctx, bin, main_cfg, "main_cs");
|
|
hash_table_u64_foreach(symbols, he)
|
|
{
|
|
struct cs_code_cfg *other_cfg = he.data;
|
|
if (other_cfg == main_cfg)
|
|
continue;
|
|
|
|
print_cs_binary(ctx, he.key, other_cfg, "cs");
|
|
}
|
|
|
|
ralloc_free(symbols);
|
|
|
|
pandecode_map_read_write(ctx);
|
|
}
|
|
|
|
void
|
|
GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
|
|
uint32_t trace_size, unsigned gpu_id)
|
|
{
|
|
pandecode_dump_file_open(ctx);
|
|
|
|
void *trace_data = pandecode_fetch_gpu_mem(ctx, trace, trace_size);
|
|
|
|
while (trace_size > 0) {
|
|
uint32_t regs[256] = {};
|
|
uint64_t *ip = trace_data;
|
|
|
|
uint64_t *instr = pandecode_fetch_gpu_mem(ctx, *ip, sizeof(*instr));
|
|
|
|
/* v10 has 96 registers. v12+ have 128. */
|
|
struct queue_ctx qctx = {
|
|
.nr_regs = PAN_ARCH >= 12 ? 128 : 96,
|
|
.regs = regs,
|
|
.ip = instr,
|
|
.end = instr + 1,
|
|
.gpu_id = gpu_id,
|
|
};
|
|
|
|
pandecode_make_indent(ctx);
|
|
print_cs_instr(ctx->dump_stream, instr);
|
|
fprintf(ctx->dump_stream, " // from tracepoint_%" PRIx64 "\n", *ip);
|
|
|
|
cs_unpack(instr, CS_BASE, base);
|
|
|
|
switch (base.opcode) {
|
|
#if PAN_ARCH >= 12
|
|
case MALI_CS_OPCODE_RUN_IDVS2: {
|
|
struct cs_run_idvs2_trace *idvs_trace = trace_data;
|
|
|
|
assert(trace_size >= sizeof(*idvs_trace));
|
|
cs_unpack(instr, CS_RUN_IDVS2, I);
|
|
memcpy(regs, idvs_trace->sr, sizeof(idvs_trace->sr));
|
|
|
|
if (I.draw_id_register_enable)
|
|
regs[I.draw_id] = idvs_trace->draw_id;
|
|
|
|
pandecode_run_idvs2(ctx, ctx->dump_stream, &qctx, &I);
|
|
trace_data = idvs_trace + 1;
|
|
trace_size -= sizeof(*idvs_trace);
|
|
break;
|
|
}
|
|
#else
|
|
case MALI_CS_OPCODE_RUN_IDVS: {
|
|
struct cs_run_idvs_trace *idvs_trace = trace_data;
|
|
|
|
assert(trace_size >= sizeof(*idvs_trace));
|
|
cs_unpack(instr, CS_RUN_IDVS, I);
|
|
memcpy(regs, idvs_trace->sr, sizeof(idvs_trace->sr));
|
|
|
|
if (I.draw_id_register_enable)
|
|
regs[I.draw_id] = idvs_trace->draw_id;
|
|
|
|
pandecode_run_idvs(ctx, ctx->dump_stream, &qctx, &I);
|
|
trace_data = idvs_trace + 1;
|
|
trace_size -= sizeof(*idvs_trace);
|
|
break;
|
|
}
|
|
#endif
|
|
|
|
case MALI_CS_OPCODE_RUN_FRAGMENT: {
|
|
struct cs_run_fragment_trace *frag_trace = trace_data;
|
|
|
|
assert(trace_size >= sizeof(*frag_trace));
|
|
cs_unpack(instr, CS_RUN_FRAGMENT, I);
|
|
memcpy(®s[40], frag_trace->sr, sizeof(frag_trace->sr));
|
|
pandecode_run_fragment(ctx, ctx->dump_stream, &qctx, &I);
|
|
trace_data = frag_trace + 1;
|
|
trace_size -= sizeof(*frag_trace);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_FULLSCREEN: {
|
|
struct cs_run_fullscreen_trace *fs_trace = trace_data;
|
|
|
|
assert(trace_size >= sizeof(*fs_trace));
|
|
cs_unpack(instr, CS_RUN_FULLSCREEN, I);
|
|
regs[I.dcd + 0] = (uint32_t)(fs_trace->dcd);
|
|
regs[I.dcd + 1] = (uint32_t)(fs_trace->dcd >> 32);
|
|
uint32_t sr_idx = 0;
|
|
u_foreach_bit64(b, CS_RUN_FULLSCREEN_SR_MASK)
|
|
regs[b] = fs_trace->sr[sr_idx++];
|
|
pandecode_run_fullscreen(ctx, ctx->dump_stream, &qctx, &I);
|
|
trace_data = fs_trace + 1;
|
|
trace_size -= sizeof(*fs_trace);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_COMPUTE: {
|
|
struct cs_run_compute_trace *comp_trace = trace_data;
|
|
|
|
assert(trace_size >= sizeof(*comp_trace));
|
|
cs_unpack(instr, CS_RUN_COMPUTE, I);
|
|
memcpy(regs, comp_trace->sr, sizeof(comp_trace->sr));
|
|
pandecode_run_compute(ctx, ctx->dump_stream, &qctx, &I);
|
|
trace_data = comp_trace + 1;
|
|
trace_size -= sizeof(*comp_trace);
|
|
break;
|
|
}
|
|
|
|
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: {
|
|
struct cs_run_compute_trace *comp_trace = trace_data;
|
|
|
|
assert(trace_size >= sizeof(*comp_trace));
|
|
cs_unpack(instr, CS_RUN_COMPUTE_INDIRECT, I);
|
|
memcpy(regs, comp_trace->sr, sizeof(comp_trace->sr));
|
|
pandecode_run_compute_indirect(ctx, ctx->dump_stream, &qctx, &I);
|
|
trace_data = comp_trace + 1;
|
|
trace_size -= sizeof(*comp_trace);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
assert(!"Invalid trace packet");
|
|
break;
|
|
}
|
|
|
|
pandecode_log(ctx, "\n");
|
|
}
|
|
|
|
fflush(ctx->dump_stream);
|
|
pandecode_map_read_write(ctx);
|
|
}
|
|
#endif
|