mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 04:20:08 +01:00
pan/decode: Add support for decoding CSF
Add support to pandecode for Mali architecture v10, featuring the new command stream frontend (CSF). This replaces the "job chain" with a new Command Execution Unit (CEU) that runs a domain-specific assembly language. That requires us to refactor pandecode substantially, splitting out JM-only code from shared JM/CSF common code, and adding new CSF-only decode routines to disassemble and interpret CSF command streams and pretty-printing the data structures hit. This is of course impossible to do properly, since the CEU is pretty easily Turing-complete and hence subject to the halting problem. But we implement some simple heuristics to follow jumps that are just good enough for the simple command streams emitting by both the DDK and Panfrost. Signed-off-by: Alyssa Rosenzweig <alyssa@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20837>
This commit is contained in:
parent
102d4292d5
commit
f2740ac69c
8 changed files with 1730 additions and 871 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -32,6 +32,7 @@
|
|||
#include "wrap.h"
|
||||
|
||||
extern FILE *pandecode_dump_stream;
|
||||
extern unsigned pandecode_indent;
|
||||
|
||||
void pandecode_dump_file_open(void);
|
||||
|
||||
|
|
@ -85,6 +86,8 @@ __pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size, int line,
|
|||
#define PANDECODE_PTR_VAR(name, gpu_va) \
|
||||
name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), __LINE__, __FILE__)
|
||||
|
||||
void pandecode_validate_buffer(mali_ptr addr, size_t sz);
|
||||
|
||||
/* Forward declare for all supported gens to permit thunking */
|
||||
void pandecode_jc_v4(mali_ptr jc_gpu_va, unsigned gpu_id);
|
||||
void pandecode_jc_v5(mali_ptr jc_gpu_va, unsigned gpu_id);
|
||||
|
|
@ -98,6 +101,9 @@ void pandecode_abort_on_fault_v6(mali_ptr jc_gpu_va);
|
|||
void pandecode_abort_on_fault_v7(mali_ptr jc_gpu_va);
|
||||
void pandecode_abort_on_fault_v9(mali_ptr jc_gpu_va);
|
||||
|
||||
void pandecode_cs_v10(mali_ptr queue, uint32_t size, unsigned gpu_id,
|
||||
uint32_t *regs);
|
||||
|
||||
static inline void
|
||||
pan_hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings)
|
||||
{
|
||||
|
|
@ -141,4 +147,116 @@ pan_hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings)
|
|||
fprintf(fp, "\n");
|
||||
}
|
||||
|
||||
/* Logging infrastructure */
|
||||
static void
|
||||
pandecode_make_indent(void)
|
||||
{
|
||||
for (unsigned i = 0; i < pandecode_indent; ++i)
|
||||
fprintf(pandecode_dump_stream, " ");
|
||||
}
|
||||
|
||||
static void PRINTFLIKE(1, 2) pandecode_log(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
pandecode_make_indent();
|
||||
va_start(ap, format);
|
||||
vfprintf(pandecode_dump_stream, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_log_cont(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, format);
|
||||
vfprintf(pandecode_dump_stream, format, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/* Convenience methods */
|
||||
#define DUMP_UNPACKED(T, var, ...) \
|
||||
{ \
|
||||
pandecode_log(__VA_ARGS__); \
|
||||
pan_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \
|
||||
}
|
||||
|
||||
#define DUMP_CL(T, cl, ...) \
|
||||
{ \
|
||||
pan_unpack(cl, T, temp); \
|
||||
DUMP_UNPACKED(T, temp, __VA_ARGS__); \
|
||||
}
|
||||
|
||||
#define DUMP_SECTION(A, S, cl, ...) \
|
||||
{ \
|
||||
pan_section_unpack(cl, A, S, temp); \
|
||||
pandecode_log(__VA_ARGS__); \
|
||||
pan_section_print(pandecode_dump_stream, A, S, temp, \
|
||||
(pandecode_indent + 1) * 2); \
|
||||
}
|
||||
|
||||
#define MAP_ADDR(T, addr, cl) \
|
||||
const uint8_t *cl = pandecode_fetch_gpu_mem(addr, pan_size(T));
|
||||
|
||||
#define DUMP_ADDR(T, addr, ...) \
|
||||
{ \
|
||||
MAP_ADDR(T, addr, cl) \
|
||||
DUMP_CL(T, cl, __VA_ARGS__); \
|
||||
}
|
||||
|
||||
void pandecode_shader_disassemble(mali_ptr shader_ptr, unsigned gpu_id);
|
||||
|
||||
#ifdef PAN_ARCH
|
||||
|
||||
/* Information about the framebuffer passed back for additional analysis */
|
||||
struct pandecode_fbd {
|
||||
unsigned rt_count;
|
||||
bool has_extra;
|
||||
};
|
||||
|
||||
struct pandecode_fbd GENX(pandecode_fbd)(uint64_t gpu_va, bool is_fragment,
|
||||
unsigned gpu_id);
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
void GENX(pandecode_dcd)(const struct MALI_DRAW *p, unsigned unused,
|
||||
unsigned gpu_id);
|
||||
#else
|
||||
void GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type,
|
||||
unsigned gpu_id);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH <= 5
|
||||
void GENX(pandecode_texture)(mali_ptr u, unsigned tex);
|
||||
#else
|
||||
void GENX(pandecode_texture)(const void *cl, unsigned tex);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
mali_ptr GENX(pandecode_blend)(void *descs, int rt_no, mali_ptr frag_shader);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
void GENX(pandecode_tiler)(mali_ptr gpu_va, unsigned gpu_id);
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
void GENX(pandecode_shader_environment)(const struct MALI_SHADER_ENVIRONMENT *p,
|
||||
unsigned gpu_id);
|
||||
|
||||
void GENX(pandecode_resource_tables)(mali_ptr addr, const char *label);
|
||||
|
||||
void GENX(pandecode_fau)(mali_ptr addr, unsigned count, const char *name);
|
||||
|
||||
mali_ptr GENX(pandecode_shader)(mali_ptr addr, const char *label,
|
||||
unsigned gpu_id);
|
||||
|
||||
void GENX(pandecode_blend_descs)(mali_ptr blend, unsigned count,
|
||||
mali_ptr frag_shader, unsigned gpu_id);
|
||||
|
||||
void GENX(pandecode_depth_stencil)(mali_ptr addr);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* __MMAP_TRACE_H__ */
|
||||
|
|
|
|||
|
|
@ -36,8 +36,14 @@
|
|||
#include "util/u_dynarray.h"
|
||||
#include "decode.h"
|
||||
|
||||
#include "compiler/bifrost/disassemble.h"
|
||||
#include "compiler/valhall/disassemble.h"
|
||||
#include "midgard/disassemble.h"
|
||||
|
||||
FILE *pandecode_dump_stream;
|
||||
|
||||
unsigned pandecode_indent;
|
||||
|
||||
/* Memory handling */
|
||||
|
||||
static struct rb_tree mmap_tree;
|
||||
|
|
@ -99,6 +105,43 @@ pandecode_find_mapped_gpu_mem_containing(uint64_t addr)
|
|||
return mem;
|
||||
}
|
||||
|
||||
/*
|
||||
* To check for memory safety issues, validates that the given pointer in GPU
|
||||
* memory is valid, containing at least sz bytes. This function is a tool to
|
||||
* detect GPU-side memory bugs by validating pointers.
|
||||
*/
|
||||
void
|
||||
pandecode_validate_buffer(mali_ptr addr, size_t sz)
|
||||
{
|
||||
if (!addr) {
|
||||
pandecode_log("// XXX: null pointer deref\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Find a BO */
|
||||
|
||||
struct pandecode_mapped_memory *bo =
|
||||
pandecode_find_mapped_gpu_mem_containing(addr);
|
||||
|
||||
if (!bo) {
|
||||
pandecode_log("// XXX: invalid memory dereference\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Bounds check */
|
||||
|
||||
unsigned offset = addr - bo->gpu_va;
|
||||
unsigned total = offset + sz;
|
||||
|
||||
if (total > bo->length) {
|
||||
pandecode_log("// XXX: buffer overrun. "
|
||||
"Chunk of size %zu at offset %d in buffer of size %zu. "
|
||||
"Overrun by %zu bytes. \n",
|
||||
sz, offset, bo->length, total - bo->length);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_map_read_write(void)
|
||||
{
|
||||
|
|
@ -218,7 +261,7 @@ pandecode_dump_file_open(void)
|
|||
const char *dump_file_base =
|
||||
debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
|
||||
if (force_stderr || !strcmp(dump_file_base, "stderr"))
|
||||
pandecode_dump_stream = stderr;
|
||||
pandecode_dump_stream = stdout; // stderr;
|
||||
else {
|
||||
char buffer[1024];
|
||||
snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
|
||||
|
|
@ -357,3 +400,47 @@ pandecode_jc(mali_ptr jc_gpu_va, unsigned gpu_id)
|
|||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_cs(mali_ptr queue_gpu_va, uint32_t size, unsigned gpu_id,
|
||||
uint32_t *regs)
|
||||
{
|
||||
simple_mtx_lock(&pandecode_lock);
|
||||
|
||||
switch (pan_arch(gpu_id)) {
|
||||
case 10:
|
||||
pandecode_cs_v10(queue_gpu_va, size, gpu_id, regs);
|
||||
break;
|
||||
default:
|
||||
unreachable("Unsupported architecture");
|
||||
}
|
||||
|
||||
simple_mtx_unlock(&pandecode_lock);
|
||||
}
|
||||
|
||||
void
|
||||
pandecode_shader_disassemble(mali_ptr shader_ptr, unsigned gpu_id)
|
||||
{
|
||||
uint8_t *PANDECODE_PTR_VAR(code, shader_ptr);
|
||||
|
||||
/* Compute maximum possible size */
|
||||
struct pandecode_mapped_memory *mem =
|
||||
pandecode_find_mapped_gpu_mem_containing(shader_ptr);
|
||||
size_t sz = mem->length - (shader_ptr - mem->gpu_va);
|
||||
|
||||
/* Print some boilerplate to clearly denote the assembly (which doesn't
|
||||
* obey indentation rules), and actually do the disassembly! */
|
||||
|
||||
pandecode_log_cont("\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n", code,
|
||||
shader_ptr, sz);
|
||||
|
||||
if (pan_arch(gpu_id) >= 9) {
|
||||
disassemble_valhall(pandecode_dump_stream, (const uint64_t *)code, sz,
|
||||
true);
|
||||
} else if (pan_arch(gpu_id) >= 6)
|
||||
disassemble_bifrost(pandecode_dump_stream, code, sz, false);
|
||||
else
|
||||
disassemble_midgard(pandecode_dump_stream, code, sz, gpu_id, true);
|
||||
|
||||
pandecode_log_cont("\n\n");
|
||||
}
|
||||
|
|
|
|||
764
src/panfrost/lib/genxml/decode_csf.c
Normal file
764
src/panfrost/lib/genxml/decode_csf.c
Normal file
|
|
@ -0,0 +1,764 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2023 Collabora, Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "decode.h"
|
||||
|
||||
#if PAN_ARCH >= 10
|
||||
/* Limit for Mali-G610. -1 because we're not including the active frame */
|
||||
#define MAX_CALL_STACK_DEPTH (8 - 1)
|
||||
|
||||
struct queue_ctx {
|
||||
/* Size of CSHWIF register file in 32-bit registers */
|
||||
unsigned nr_regs;
|
||||
|
||||
/* CSHWIF register file */
|
||||
uint32_t *regs;
|
||||
|
||||
/* Current instruction pointer (CPU pointer for convenience) */
|
||||
uint64_t *ip;
|
||||
|
||||
/* Current instruction end pointer */
|
||||
uint64_t *end;
|
||||
|
||||
/* Call stack. Depth=0 means root */
|
||||
struct {
|
||||
/* Link register to return to */
|
||||
uint64_t *lr;
|
||||
|
||||
/* End pointer, there is a return (or exit) after */
|
||||
uint64_t *end;
|
||||
} call_stack[MAX_CALL_STACK_DEPTH];
|
||||
uint8_t call_stack_depth;
|
||||
|
||||
unsigned gpu_id;
|
||||
};
|
||||
|
||||
static void
|
||||
dump_cshwif_registers(struct queue_ctx *ctx)
|
||||
{
|
||||
for (unsigned i = 0; i < ctx->nr_regs; ++i)
|
||||
fprintf(pandecode_dump_stream, "r%u: %X\n", i, ctx->regs[i]);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
cs_get_u32(struct queue_ctx *ctx, uint8_t reg)
|
||||
{
|
||||
assert(reg < ctx->nr_regs);
|
||||
return ctx->regs[reg];
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
cs_get_u64(struct queue_ctx *ctx, uint8_t reg)
|
||||
{
|
||||
return (((uint64_t)cs_get_u32(ctx, reg + 1)) << 32) | cs_get_u32(ctx, reg);
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_run_compute(FILE *fp, struct queue_ctx *ctx,
|
||||
struct MALI_CEU_RUN_COMPUTE *I)
|
||||
{
|
||||
const char *axes[4] = {"x_axis", "y_axis", "z_axis"};
|
||||
|
||||
/* Print the instruction. Ignore the selects and the flags override
|
||||
* since we'll print them implicitly later.
|
||||
*/
|
||||
fprintf(fp, "RUN_COMPUTE.%s #%u\n", axes[I->task_axis], I->task_increment);
|
||||
|
||||
pandecode_indent++;
|
||||
|
||||
unsigned reg_srt = 0 + (I->srt_select * 2);
|
||||
unsigned reg_fau = 8 + (I->fau_select * 2);
|
||||
unsigned reg_spd = 16 + (I->spd_select * 2);
|
||||
unsigned reg_tsd = 24 + (I->tsd_select * 2);
|
||||
|
||||
GENX(pandecode_resource_tables)(cs_get_u64(ctx, reg_srt), "Resources");
|
||||
|
||||
mali_ptr fau = cs_get_u64(ctx, reg_fau);
|
||||
|
||||
if (fau)
|
||||
GENX(pandecode_fau)(fau & BITFIELD64_MASK(48), fau >> 56, "FAU");
|
||||
|
||||
GENX(pandecode_shader)(cs_get_u64(ctx, reg_spd), "Shader", ctx->gpu_id);
|
||||
|
||||
DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, reg_tsd),
|
||||
"Local Storage @%" PRIx64 ":\n", cs_get_u64(ctx, reg_tsd));
|
||||
|
||||
pandecode_log("Global attribute offset: %u\n", cs_get_u32(ctx, 32));
|
||||
DUMP_CL(COMPUTE_SIZE_WORKGROUP, &ctx->regs[33], "Workgroup size\n");
|
||||
pandecode_log("Job offset X: %u\n", cs_get_u32(ctx, 34));
|
||||
pandecode_log("Job offset Y: %u\n", cs_get_u32(ctx, 35));
|
||||
pandecode_log("Job offset Z: %u\n", cs_get_u32(ctx, 36));
|
||||
pandecode_log("Job size X: %u\n", cs_get_u32(ctx, 37));
|
||||
pandecode_log("Job size Y: %u\n", cs_get_u32(ctx, 38));
|
||||
pandecode_log("Job size Z: %u\n", cs_get_u32(ctx, 39));
|
||||
|
||||
pandecode_indent--;
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_run_idvs(FILE *fp, struct queue_ctx *ctx, struct MALI_CEU_RUN_IDVS *I)
|
||||
{
|
||||
/* Print the instruction. Ignore the selects and the flags override
|
||||
* since we'll print them implicitly later.
|
||||
*/
|
||||
fprintf(fp, "RUN_IDVS%s", I->malloc_enable ? "" : ".no_malloc");
|
||||
|
||||
if (I->draw_id_register_enable)
|
||||
fprintf(fp, " r%u", I->draw_id);
|
||||
|
||||
fprintf(fp, "\n");
|
||||
|
||||
pandecode_indent++;
|
||||
|
||||
/* Merge flag overrides with the register flags */
|
||||
uint32_t tiler_flags_raw = cs_get_u64(ctx, 56);
|
||||
tiler_flags_raw |= I->flags_override;
|
||||
pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags);
|
||||
|
||||
unsigned reg_position_srt = 0;
|
||||
unsigned reg_position_fau = 8;
|
||||
unsigned reg_position_tsd = 24;
|
||||
|
||||
unsigned reg_vary_srt = I->varying_srt_select ? 2 : 0;
|
||||
unsigned reg_vary_fau = I->varying_fau_select ? 10 : 8;
|
||||
unsigned reg_vary_tsd = I->varying_tsd_select ? 26 : 24;
|
||||
|
||||
unsigned reg_frag_srt = I->fragment_srt_select ? 4 : 0;
|
||||
unsigned reg_frag_fau = 12;
|
||||
unsigned reg_frag_tsd = I->fragment_tsd_select ? 28 : 24;
|
||||
|
||||
uint64_t position_srt = cs_get_u64(ctx, reg_position_srt);
|
||||
uint64_t vary_srt = cs_get_u64(ctx, reg_vary_srt);
|
||||
uint64_t frag_srt = cs_get_u64(ctx, reg_frag_srt);
|
||||
|
||||
GENX(pandecode_resource_tables)(position_srt, "Position resources");
|
||||
GENX(pandecode_resource_tables)(vary_srt, "Varying resources");
|
||||
GENX(pandecode_resource_tables)(frag_srt, "Fragment resources");
|
||||
|
||||
mali_ptr position_fau = cs_get_u64(ctx, reg_position_fau);
|
||||
mali_ptr vary_fau = cs_get_u64(ctx, reg_vary_fau);
|
||||
mali_ptr fragment_fau = cs_get_u64(ctx, reg_frag_fau);
|
||||
|
||||
if (position_fau) {
|
||||
uint64_t lo = position_fau & BITFIELD64_MASK(48);
|
||||
uint64_t hi = position_fau >> 56;
|
||||
|
||||
GENX(pandecode_fau)(lo, hi, "Position FAU");
|
||||
}
|
||||
|
||||
if (vary_fau) {
|
||||
uint64_t lo = vary_fau & BITFIELD64_MASK(48);
|
||||
uint64_t hi = vary_fau >> 56;
|
||||
|
||||
GENX(pandecode_fau)(lo, hi, "Varying FAU");
|
||||
}
|
||||
|
||||
if (fragment_fau) {
|
||||
uint64_t lo = fragment_fau & BITFIELD64_MASK(48);
|
||||
uint64_t hi = fragment_fau >> 56;
|
||||
|
||||
GENX(pandecode_fau)(lo, hi, "Fragment FAU");
|
||||
}
|
||||
|
||||
GENX(pandecode_shader)(cs_get_u64(ctx, 16), "Position shader", ctx->gpu_id);
|
||||
|
||||
if (tiler_flags.secondary_shader) {
|
||||
uint64_t ptr = cs_get_u64(ctx, 18);
|
||||
|
||||
GENX(pandecode_shader)(ptr, "Varying shader", ctx->gpu_id);
|
||||
}
|
||||
|
||||
GENX(pandecode_shader)(cs_get_u64(ctx, 20), "Fragment shader", ctx->gpu_id);
|
||||
|
||||
DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, 24),
|
||||
"Position Local Storage @%" PRIx64 ":\n",
|
||||
cs_get_u64(ctx, reg_position_tsd));
|
||||
DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, 24),
|
||||
"Varying Local Storage @%" PRIx64 ":\n",
|
||||
cs_get_u64(ctx, reg_vary_tsd));
|
||||
DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, 30),
|
||||
"Fragment Local Storage @%" PRIx64 ":\n",
|
||||
cs_get_u64(ctx, reg_frag_tsd));
|
||||
|
||||
pandecode_log("Global attribute offset: %u\n", cs_get_u32(ctx, 32));
|
||||
pandecode_log("Index count: %u\n", cs_get_u32(ctx, 33));
|
||||
pandecode_log("Instance count: %u\n", cs_get_u32(ctx, 34));
|
||||
|
||||
if (tiler_flags.index_type)
|
||||
pandecode_log("Index offset: %u\n", cs_get_u32(ctx, 35));
|
||||
|
||||
pandecode_log("Vertex offset: %u\n", cs_get_u32(ctx, 36));
|
||||
pandecode_log("Instance offset: %u\n", cs_get_u32(ctx, 37));
|
||||
pandecode_log("Tiler DCD flags2: %X\n", cs_get_u32(ctx, 38));
|
||||
|
||||
if (tiler_flags.index_type)
|
||||
pandecode_log("Index array size: %u\n", cs_get_u32(ctx, 39));
|
||||
|
||||
GENX(pandecode_tiler)(cs_get_u64(ctx, 40), ctx->gpu_id);
|
||||
|
||||
DUMP_CL(SCISSOR, &ctx->regs[42], "Scissor\n");
|
||||
pandecode_log("Low depth clamp: %f\n", uif(cs_get_u32(ctx, 44)));
|
||||
pandecode_log("High depth clamp: %f\n", uif(cs_get_u32(ctx, 45)));
|
||||
pandecode_log("Occlusion: %" PRIx64 "\n", cs_get_u64(ctx, 46));
|
||||
|
||||
if (tiler_flags.secondary_shader)
|
||||
pandecode_log("Varying allocation: %u\n", cs_get_u32(ctx, 48));
|
||||
|
||||
mali_ptr blend = cs_get_u64(ctx, 50);
|
||||
GENX(pandecode_blend_descs)(blend & ~7, blend & 7, 0, ctx->gpu_id);
|
||||
|
||||
DUMP_ADDR(DEPTH_STENCIL, cs_get_u64(ctx, 52), "Depth/stencil");
|
||||
|
||||
if (tiler_flags.index_type)
|
||||
pandecode_log("Indices: %" PRIx64 "\n", cs_get_u64(ctx, 54));
|
||||
|
||||
DUMP_UNPACKED(PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
|
||||
DUMP_CL(DCD_FLAGS_0, &ctx->regs[57], "DCD Flags 0\n");
|
||||
DUMP_CL(DCD_FLAGS_1, &ctx->regs[58], "DCD Flags 1\n");
|
||||
DUMP_CL(PRIMITIVE_SIZE, &ctx->regs[60], "Primitive size\n");
|
||||
|
||||
pandecode_indent--;
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_run_fragment(struct queue_ctx *ctx, struct MALI_CEU_RUN_FRAGMENT *I)
|
||||
{
|
||||
pandecode_indent++;
|
||||
|
||||
DUMP_CL(SCISSOR, &ctx->regs[42], "Scissor\n");
|
||||
|
||||
/* TODO: Tile enable map */
|
||||
GENX(pandecode_fbd)(cs_get_u64(ctx, 40), true, ctx->gpu_id);
|
||||
|
||||
pandecode_indent--;
|
||||
}
|
||||
|
||||
static void
|
||||
print_indirect(unsigned address, int16_t offset, FILE *fp)
|
||||
{
|
||||
if (offset)
|
||||
fprintf(fp, "[d%u + %d]", address, offset);
|
||||
else
|
||||
fprintf(fp, "[d%u]", address);
|
||||
}
|
||||
|
||||
static void
|
||||
print_reg_tuple(unsigned base, uint16_t mask, FILE *fp)
|
||||
{
|
||||
bool first_reg = true;
|
||||
|
||||
u_foreach_bit(i, mask) {
|
||||
fprintf(fp, "%sr%u", first_reg ? "" : ":", base + i);
|
||||
first_reg = false;
|
||||
}
|
||||
|
||||
if (mask == 0)
|
||||
fprintf(fp, "_");
|
||||
}
|
||||
|
||||
static void
|
||||
disassemble_ceu_instr(uint64_t dword, unsigned indent, bool verbose, FILE *fp,
|
||||
struct queue_ctx *ctx)
|
||||
{
|
||||
if (verbose) {
|
||||
fprintf(fp, " ");
|
||||
for (unsigned b = 0; b < 8; ++b)
|
||||
fprintf(fp, " %02x", (uint8_t)(dword >> (8 * b)));
|
||||
}
|
||||
|
||||
for (int i = 0; i < indent; ++i)
|
||||
fprintf(fp, " ");
|
||||
|
||||
/* Unpack the base so we get the opcode */
|
||||
uint8_t *bytes = (uint8_t *)&dword;
|
||||
pan_unpack(bytes, CEU_BASE, base);
|
||||
|
||||
switch (base.opcode) {
|
||||
case MALI_CEU_OPCODE_NOP: {
|
||||
pan_unpack(bytes, CEU_NOP, I);
|
||||
|
||||
if (I.ignored)
|
||||
fprintf(fp, "NOP // 0x%" PRIX64 "\n", I.ignored);
|
||||
else
|
||||
fprintf(fp, "NOP\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_MOVE: {
|
||||
pan_unpack(bytes, CEU_MOVE, I);
|
||||
|
||||
fprintf(fp, "MOVE d%u, #0x%" PRIX64 "\n", I.destination, I.immediate);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_MOVE32: {
|
||||
pan_unpack(bytes, CEU_MOVE32, I);
|
||||
fprintf(fp, "MOVE32 r%u, #0x%X\n", I.destination, I.immediate);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_WAIT: {
|
||||
bool first = true;
|
||||
pan_unpack(bytes, CEU_WAIT, I);
|
||||
fprintf(fp, "WAIT ");
|
||||
|
||||
u_foreach_bit(i, I.slots) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
|
||||
fprintf(fp, "\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_RUN_COMPUTE: {
|
||||
pan_unpack(bytes, CEU_RUN_COMPUTE, I);
|
||||
pandecode_run_compute(fp, ctx, &I);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_RUN_IDVS: {
|
||||
pan_unpack(bytes, CEU_RUN_IDVS, I);
|
||||
pandecode_run_idvs(fp, ctx, &I);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_RUN_FRAGMENT: {
|
||||
pan_unpack(bytes, CEU_RUN_FRAGMENT, I);
|
||||
fprintf(fp, "RUN_FRAGMENT%s\n",
|
||||
I.enable_tem ? ".tile_enable_map_enable" : "");
|
||||
pandecode_run_fragment(ctx, &I);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_ADD_IMMEDIATE32: {
|
||||
pan_unpack(bytes, CEU_ADD_IMMEDIATE32, I);
|
||||
|
||||
fprintf(fp, "ADD_IMMEDIATE32 r%u, r%u, #%d\n", I.destination, I.source,
|
||||
I.immediate);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_ADD_IMMEDIATE64: {
|
||||
pan_unpack(bytes, CEU_ADD_IMMEDIATE64, I);
|
||||
|
||||
fprintf(fp, "ADD_IMMEDIATE64 d%u, d%u, #%d\n", I.destination, I.source,
|
||||
I.immediate);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_LOAD_MULTIPLE: {
|
||||
pan_unpack(bytes, CEU_LOAD_MULTIPLE, I);
|
||||
|
||||
fprintf(fp, "LOAD_MULTIPLE ");
|
||||
print_reg_tuple(I.base, I.mask, fp);
|
||||
fprintf(fp, ", ");
|
||||
print_indirect(I.address, I.offset, fp);
|
||||
fprintf(fp, "\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_STORE_MULTIPLE: {
|
||||
pan_unpack(bytes, CEU_STORE_MULTIPLE, I);
|
||||
|
||||
fprintf(fp, "STORE_MULTIPLE ");
|
||||
print_indirect(I.address, I.offset, fp);
|
||||
fprintf(fp, ", ");
|
||||
print_reg_tuple(I.base, I.mask, fp);
|
||||
fprintf(fp, "\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_SET_SB_ENTRY: {
|
||||
pan_unpack(bytes, CEU_SET_SB_ENTRY, I);
|
||||
|
||||
fprintf(fp, "SET_SB_ENTRY #%u, #%u\n", I.endpoint_entry, I.other_entry);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_SYNC_ADD32: {
|
||||
pan_unpack(bytes, CEU_SYNC_ADD32, I);
|
||||
bool first = true;
|
||||
fprintf(fp, "SYNC_ADD32%s%s signal(%u), wait(",
|
||||
I.error_propagate ? ".error_propagate" : "",
|
||||
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
|
||||
|
||||
u_foreach_bit(i, I.wait_mask) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
|
||||
fprintf(fp, ") [d%u], r%u\n", I.address, I.data);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_SYNC_ADD64: {
|
||||
pan_unpack(bytes, CEU_SYNC_ADD64, I);
|
||||
bool first = true;
|
||||
fprintf(fp, "SYNC_ADD64%s%s signal(%u), wait(",
|
||||
I.error_propagate ? ".error_propagate" : "",
|
||||
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
|
||||
|
||||
u_foreach_bit(i, I.wait_mask) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
|
||||
fprintf(fp, ") [d%u], d%u\n", I.address, I.data);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_SYNC_SET32: {
|
||||
pan_unpack(bytes, CEU_SYNC_SET32, I);
|
||||
bool first = true;
|
||||
fprintf(fp, "SYNC_SET32.%s%s signal(%u), wait(",
|
||||
I.error_propagate ? ".error_propagate" : "",
|
||||
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
|
||||
|
||||
u_foreach_bit(i, I.wait_mask) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
|
||||
fprintf(fp, ") [d%u], r%u\n", I.address, I.data);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_SYNC_SET64: {
|
||||
pan_unpack(bytes, CEU_SYNC_SET64, I);
|
||||
bool first = true;
|
||||
fprintf(fp, "SYNC_SET64.%s%s signal(%u), wait(",
|
||||
I.error_propagate ? ".error_propagate" : "",
|
||||
I.scope_csg ? ".csg" : ".system", I.scoreboard_slot);
|
||||
|
||||
u_foreach_bit(i, I.wait_mask) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
|
||||
fprintf(fp, ") [d%u], d%u\n", I.address, I.data);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_CALL: {
|
||||
pan_unpack(bytes, CEU_CALL, I);
|
||||
fprintf(fp, "CALL d%u, r%u\n", I.address, I.length);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_JUMP: {
|
||||
pan_unpack(bytes, CEU_JUMP, I);
|
||||
fprintf(fp, "JUMP d%u, r%u\n", I.address, I.length);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_REQ_RESOURCE: {
|
||||
pan_unpack(bytes, CEU_REQ_RESOURCE, I);
|
||||
|
||||
fprintf(fp, "REQ_RESOURCE");
|
||||
if (I.compute)
|
||||
fprintf(fp, ".compute");
|
||||
if (I.fragment)
|
||||
fprintf(fp, ".fragment");
|
||||
if (I.tiler)
|
||||
fprintf(fp, ".tiler");
|
||||
if (I.idvs)
|
||||
fprintf(fp, ".idvs");
|
||||
fprintf(fp, "\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_SYNC_WAIT32: {
|
||||
pan_unpack(bytes, CEU_SYNC_WAIT32, I);
|
||||
|
||||
fprintf(fp, "SYNC_WAIT32%s%s d%u, r%u\n", I.invert ? ".gt" : ".le",
|
||||
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_SYNC_WAIT64: {
|
||||
pan_unpack(bytes, CEU_SYNC_WAIT64, I);
|
||||
|
||||
fprintf(fp, "SYNC_WAIT64%s%s d%u, d%u\n", I.invert ? ".gt" : ".le",
|
||||
I.error_reject ? ".reject" : ".inherit", I.address, I.data);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_UMIN32: {
|
||||
pan_unpack(bytes, CEU_UMIN32, I);
|
||||
|
||||
fprintf(fp, "UMIN32 r%u, r%u, r%u\n", I.destination, I.source_1,
|
||||
I.source_2);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_BRANCH: {
|
||||
pan_unpack(bytes, CEU_BRANCH, I);
|
||||
|
||||
static const char *condition[] = {
|
||||
"le", "gt", "eq", "ne", "lt", "ge", "always",
|
||||
};
|
||||
fprintf(fp, "BRANCH.%s r%u, #%d\n", condition[I.condition], I.value,
|
||||
I.offset);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_FLUSH_CACHE2: {
|
||||
pan_unpack(bytes, CEU_FLUSH_CACHE2, I);
|
||||
static const char *mode[] = {
|
||||
"nop",
|
||||
"clean",
|
||||
"INVALID",
|
||||
"clean_invalidate",
|
||||
};
|
||||
|
||||
fprintf(fp, "FLUSH_CACHE2.%s_l2.%s_lsc%s r%u, signal(%u), wait(",
|
||||
mode[I.l2_flush_mode], mode[I.lsc_flush_mode],
|
||||
I.other_invalidate ? ".invalidate_other" : "", I.latest_flush_id,
|
||||
I.scoreboard_entry);
|
||||
|
||||
bool first = true;
|
||||
u_foreach_bit(i, I.scoreboard_mask) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
fprintf(fp, ")\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_FINISH_TILING: {
|
||||
pan_unpack(bytes, CEU_FINISH_TILING, I);
|
||||
fprintf(fp, "FINISH_TILING\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_FINISH_FRAGMENT: {
|
||||
pan_unpack(bytes, CEU_FINISH_FRAGMENT, I);
|
||||
|
||||
bool first = true;
|
||||
fprintf(fp, "FINISH_FRAGMENT.%s, d%u, d%u, signal(%u), wait(",
|
||||
I.increment_fragment_completed ? ".frag_end" : "",
|
||||
I.last_heap_chunk, I.first_heap_chunk, I.scoreboard_entry);
|
||||
|
||||
u_foreach_bit(i, I.wait_mask) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
fprintf(fp, ")\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_HEAP_OPERATION: {
|
||||
pan_unpack(bytes, CEU_HEAP_OPERATION, I);
|
||||
const char *counter_names[] = {"vt_start", "vt_end", NULL, "frag_end"};
|
||||
bool first = true;
|
||||
fprintf(fp, "HEAP_OPERATION.%s signal(%u), wait(",
|
||||
counter_names[I.operation], I.scoreboard_entry);
|
||||
|
||||
u_foreach_bit(i, I.wait_mask) {
|
||||
fprintf(fp, "%s%u", first ? "" : ",", i);
|
||||
first = false;
|
||||
}
|
||||
|
||||
fprintf(fp, ")\n");
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_HEAP_SET: {
|
||||
pan_unpack(bytes, CEU_HEAP_SET, I);
|
||||
fprintf(fp, "HEAP_SET d%u\n", I.address);
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
fprintf(fp, "INVALID_%u 0x%" PRIX64 "\n", base.opcode, base.data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
interpret_ceu_jump(struct queue_ctx *ctx, uint64_t reg_address,
|
||||
uint32_t reg_length)
|
||||
{
|
||||
uint32_t address_lo = ctx->regs[reg_address];
|
||||
uint32_t address_hi = ctx->regs[reg_address + 1];
|
||||
uint32_t length = ctx->regs[reg_length];
|
||||
|
||||
if (length % 8) {
|
||||
fprintf(stderr, "CS call alignment error\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Map the entire subqueue now */
|
||||
uint64_t address = ((uint64_t)address_hi << 32) | address_lo;
|
||||
uint64_t *cs = pandecode_fetch_gpu_mem(address, length);
|
||||
|
||||
ctx->ip = cs;
|
||||
ctx->end = cs + (length / 8);
|
||||
|
||||
/* Skip the usual IP update */
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Interpret a single instruction of the CEU, updating the register file,
|
||||
* instruction pointer, and call stack. Memory access and GPU controls are
|
||||
* ignored for now.
|
||||
*
|
||||
* Returns true if execution should continue.
|
||||
*/
|
||||
static bool
|
||||
interpret_ceu_instr(struct queue_ctx *ctx)
|
||||
{
|
||||
/* Unpack the base so we get the opcode */
|
||||
uint8_t *bytes = (uint8_t *)ctx->ip;
|
||||
pan_unpack(bytes, CEU_BASE, base);
|
||||
|
||||
assert(ctx->ip < ctx->end);
|
||||
|
||||
switch (base.opcode) {
|
||||
case MALI_CEU_OPCODE_MOVE: {
|
||||
pan_unpack(bytes, CEU_MOVE, I);
|
||||
|
||||
ctx->regs[I.destination + 0] = (uint32_t)I.immediate;
|
||||
ctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32);
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_MOVE32: {
|
||||
pan_unpack(bytes, CEU_MOVE32, I);
|
||||
|
||||
ctx->regs[I.destination] = I.immediate;
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_ADD_IMMEDIATE32: {
|
||||
pan_unpack(bytes, CEU_ADD_IMMEDIATE32, I);
|
||||
|
||||
ctx->regs[I.destination] = ctx->regs[I.source] + I.immediate;
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_ADD_IMMEDIATE64: {
|
||||
pan_unpack(bytes, CEU_ADD_IMMEDIATE64, I);
|
||||
|
||||
int64_t value =
|
||||
(ctx->regs[I.source] | ((int64_t)ctx->regs[I.source + 1] << 32)) +
|
||||
I.immediate;
|
||||
|
||||
ctx->regs[I.destination] = value;
|
||||
ctx->regs[I.destination + 1] = value >> 32;
|
||||
break;
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_CALL: {
|
||||
pan_unpack(bytes, CEU_CALL, I);
|
||||
|
||||
if (ctx->call_stack_depth == MAX_CALL_STACK_DEPTH) {
|
||||
fprintf(stderr, "CS call stack overflow\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
assert(ctx->call_stack_depth < MAX_CALL_STACK_DEPTH);
|
||||
|
||||
ctx->ip++;
|
||||
|
||||
/* Note: tail calls are not optimized in the hardware. */
|
||||
assert(ctx->ip <= ctx->end);
|
||||
|
||||
unsigned depth = ctx->call_stack_depth++;
|
||||
|
||||
ctx->call_stack[depth].lr = ctx->ip;
|
||||
ctx->call_stack[depth].end = ctx->end;
|
||||
|
||||
return interpret_ceu_jump(ctx, I.address, I.length);
|
||||
}
|
||||
|
||||
case MALI_CEU_OPCODE_JUMP: {
|
||||
pan_unpack(bytes, CEU_CALL, I);
|
||||
|
||||
if (ctx->call_stack_depth == 0) {
|
||||
fprintf(stderr, "Cannot jump from the entrypoint\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return interpret_ceu_jump(ctx, I.address, I.length);
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update IP first to point to the next instruction, so call doesn't
|
||||
* require special handling (even for tail calls).
|
||||
*/
|
||||
ctx->ip++;
|
||||
|
||||
while (ctx->ip == ctx->end) {
|
||||
/* Graceful termination */
|
||||
if (ctx->call_stack_depth == 0)
|
||||
return false;
|
||||
|
||||
/* Pop off the call stack */
|
||||
unsigned old_depth = --ctx->call_stack_depth;
|
||||
|
||||
ctx->ip = ctx->call_stack[old_depth].lr;
|
||||
ctx->end = ctx->call_stack[old_depth].end;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
GENX(pandecode_cs)(mali_ptr queue, uint32_t size, unsigned gpu_id,
|
||||
uint32_t *regs)
|
||||
{
|
||||
pandecode_dump_file_open();
|
||||
|
||||
uint64_t *cs = pandecode_fetch_gpu_mem(queue, size);
|
||||
|
||||
struct queue_ctx ctx = {
|
||||
/* Mali-G610 has 96 registers. Other devices not yet supported, we can
|
||||
* make this configurable later when we encounter new Malis.
|
||||
*/
|
||||
.nr_regs = 96,
|
||||
.regs = regs,
|
||||
.ip = cs,
|
||||
.end = cs + (size / 8),
|
||||
.gpu_id = gpu_id,
|
||||
};
|
||||
|
||||
if (size) {
|
||||
do {
|
||||
disassemble_ceu_instr(*(ctx.ip), 1 + ctx.call_stack_depth, true,
|
||||
pandecode_dump_stream, &ctx);
|
||||
} while (interpret_ceu_instr(&ctx));
|
||||
}
|
||||
|
||||
fflush(pandecode_dump_stream);
|
||||
pandecode_map_read_write();
|
||||
}
|
||||
#endif
|
||||
669
src/panfrost/lib/genxml/decode_jm.c
Normal file
669
src/panfrost/lib/genxml/decode_jm.c
Normal file
|
|
@ -0,0 +1,669 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2019 Alyssa Rosenzweig
|
||||
* Copyright (C) 2017-2019 Connor Abbott
|
||||
* Copyright (C) 2019 Collabora, Ltd.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "genxml/gen_macros.h"
|
||||
#include "util/set.h"
|
||||
#include "decode.h"
|
||||
|
||||
#if PAN_ARCH <= 9
|
||||
|
||||
static void
|
||||
pandecode_primitive(const void *p)
|
||||
{
|
||||
pan_unpack(p, PRIMITIVE, primitive);
|
||||
DUMP_UNPACKED(PRIMITIVE, primitive, "Primitive:\n");
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
/* Validate an index buffer is present if we need one. TODO: verify
|
||||
* relationship between invocation_count and index_count */
|
||||
|
||||
if (primitive.indices) {
|
||||
/* Grab the size */
|
||||
unsigned size = (primitive.index_type == MALI_INDEX_TYPE_UINT32)
|
||||
? sizeof(uint32_t)
|
||||
: primitive.index_type;
|
||||
|
||||
/* Ensure we got a size, and if so, validate the index buffer
|
||||
* is large enough to hold a full set of indices of the given
|
||||
* size */
|
||||
|
||||
if (!size)
|
||||
pandecode_log("// XXX: index size missing\n");
|
||||
else
|
||||
pandecode_validate_buffer(primitive.indices,
|
||||
primitive.index_count * size);
|
||||
} else if (primitive.index_type)
|
||||
pandecode_log("// XXX: unexpected index size\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
static void
|
||||
pandecode_attributes(mali_ptr addr, int count, bool varying,
|
||||
enum mali_job_type job_type)
|
||||
{
|
||||
char *prefix = varying ? "Varying" : "Attribute";
|
||||
assert(addr);
|
||||
|
||||
if (!count) {
|
||||
pandecode_log("// warn: No %s records\n", prefix);
|
||||
return;
|
||||
}
|
||||
|
||||
MAP_ADDR(ATTRIBUTE_BUFFER, addr, cl);
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp);
|
||||
DUMP_UNPACKED(ATTRIBUTE_BUFFER, temp, "%s:\n", prefix);
|
||||
|
||||
switch (temp.type) {
|
||||
case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION:
|
||||
case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: {
|
||||
pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER),
|
||||
ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2);
|
||||
pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT,
|
||||
temp2, (pandecode_indent + 1) * 2);
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
case MALI_ATTRIBUTE_TYPE_3D_LINEAR:
|
||||
case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: {
|
||||
pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D),
|
||||
ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2);
|
||||
pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D,
|
||||
temp2, (pandecode_indent + 1) * 2);
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pandecode_attribute_meta(int count, mali_ptr attribute, bool varying)
|
||||
{
|
||||
unsigned max = 0;
|
||||
|
||||
for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) {
|
||||
MAP_ADDR(ATTRIBUTE, attribute, cl);
|
||||
pan_unpack(cl, ATTRIBUTE, a);
|
||||
DUMP_UNPACKED(ATTRIBUTE, a, "%s:\n", varying ? "Varying" : "Attribute");
|
||||
max = MAX2(max, a.buffer_index);
|
||||
}
|
||||
|
||||
pandecode_log("\n");
|
||||
return MIN2(max + 1, 256);
|
||||
}
|
||||
|
||||
/* return bits [lo, hi) of word */
|
||||
static u32
|
||||
bits(u32 word, u32 lo, u32 hi)
|
||||
{
|
||||
if (hi - lo >= 32)
|
||||
return word; // avoid undefined behavior with the shift
|
||||
|
||||
if (lo >= 32)
|
||||
return 0;
|
||||
|
||||
return (word >> lo) & ((1 << (hi - lo)) - 1);
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_invocation(const void *i)
|
||||
{
|
||||
/* Decode invocation_count. See the comment before the definition of
|
||||
* invocation_count for an explanation.
|
||||
*/
|
||||
pan_unpack(i, INVOCATION, invocation);
|
||||
|
||||
unsigned size_x =
|
||||
bits(invocation.invocations, 0, invocation.size_y_shift) + 1;
|
||||
unsigned size_y = bits(invocation.invocations, invocation.size_y_shift,
|
||||
invocation.size_z_shift) +
|
||||
1;
|
||||
unsigned size_z = bits(invocation.invocations, invocation.size_z_shift,
|
||||
invocation.workgroups_x_shift) +
|
||||
1;
|
||||
|
||||
unsigned groups_x =
|
||||
bits(invocation.invocations, invocation.workgroups_x_shift,
|
||||
invocation.workgroups_y_shift) +
|
||||
1;
|
||||
unsigned groups_y =
|
||||
bits(invocation.invocations, invocation.workgroups_y_shift,
|
||||
invocation.workgroups_z_shift) +
|
||||
1;
|
||||
unsigned groups_z =
|
||||
bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1;
|
||||
|
||||
pandecode_log("Invocation (%d, %d, %d) x (%d, %d, %d)\n", size_x, size_y,
|
||||
size_z, groups_x, groups_y, groups_z);
|
||||
|
||||
DUMP_UNPACKED(INVOCATION, invocation, "Invocation:\n")
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_textures(mali_ptr textures, unsigned texture_count)
|
||||
{
|
||||
if (!textures)
|
||||
return;
|
||||
|
||||
pandecode_log("Textures %" PRIx64 ":\n", textures);
|
||||
pandecode_indent++;
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
const void *cl =
|
||||
pandecode_fetch_gpu_mem(textures, pan_size(TEXTURE) * texture_count);
|
||||
|
||||
for (unsigned tex = 0; tex < texture_count; ++tex)
|
||||
GENX(pandecode_texture)(cl + pan_size(TEXTURE) * tex, tex);
|
||||
#else
|
||||
mali_ptr *PANDECODE_PTR_VAR(u, textures);
|
||||
|
||||
for (int tex = 0; tex < texture_count; ++tex) {
|
||||
mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr));
|
||||
char *a = pointer_as_memory_reference(*u);
|
||||
pandecode_log("%s,\n", a);
|
||||
free(a);
|
||||
}
|
||||
|
||||
/* Now, finally, descend down into the texture descriptor */
|
||||
for (unsigned tex = 0; tex < texture_count; ++tex) {
|
||||
mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr));
|
||||
GENX(pandecode_texture)(*u, tex);
|
||||
}
|
||||
#endif
|
||||
pandecode_indent--;
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_samplers(mali_ptr samplers, unsigned sampler_count)
|
||||
{
|
||||
pandecode_log("Samplers %" PRIx64 ":\n", samplers);
|
||||
pandecode_indent++;
|
||||
|
||||
for (int i = 0; i < sampler_count; ++i)
|
||||
DUMP_ADDR(SAMPLER, samplers + (pan_size(SAMPLER) * i), "Sampler %d:\n",
|
||||
i);
|
||||
|
||||
pandecode_indent--;
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_uniform_buffers(mali_ptr pubufs, int ubufs_count)
|
||||
{
|
||||
uint64_t *PANDECODE_PTR_VAR(ubufs, pubufs);
|
||||
|
||||
for (int i = 0; i < ubufs_count; i++) {
|
||||
mali_ptr addr = (ubufs[i] >> 10) << 2;
|
||||
unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0;
|
||||
|
||||
pandecode_validate_buffer(addr, size);
|
||||
|
||||
char *ptr = pointer_as_memory_reference(addr);
|
||||
pandecode_log("ubuf_%d[%u] = %s;\n", i, size, ptr);
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_uniforms(mali_ptr uniforms, unsigned uniform_count)
|
||||
{
|
||||
pandecode_validate_buffer(uniforms, uniform_count * 16);
|
||||
|
||||
char *ptr = pointer_as_memory_reference(uniforms);
|
||||
pandecode_log("vec4 uniforms[%u] = %s;\n", uniform_count, ptr);
|
||||
free(ptr);
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
void
|
||||
GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type,
|
||||
unsigned gpu_id)
|
||||
{
|
||||
#if PAN_ARCH >= 5
|
||||
struct pandecode_fbd fbd_info = {.rt_count = 1};
|
||||
#endif
|
||||
|
||||
if (PAN_ARCH >= 6 || (PAN_ARCH == 5 && job_type != MALI_JOB_TYPE_TILER)) {
|
||||
#if PAN_ARCH >= 5
|
||||
DUMP_ADDR(LOCAL_STORAGE, p->thread_storage & ~1, "Local Storage:\n");
|
||||
#endif
|
||||
} else {
|
||||
#if PAN_ARCH <= 5
|
||||
GENX(pandecode_fbd)(p->fbd, false, gpu_id);
|
||||
#endif
|
||||
}
|
||||
|
||||
int varying_count = 0, attribute_count = 0, uniform_count = 0,
|
||||
uniform_buffer_count = 0;
|
||||
int texture_count = 0, sampler_count = 0;
|
||||
|
||||
if (p->state) {
|
||||
uint32_t *cl =
|
||||
pandecode_fetch_gpu_mem(p->state, pan_size(RENDERER_STATE));
|
||||
|
||||
pan_unpack(cl, RENDERER_STATE, state);
|
||||
|
||||
if (state.shader.shader & ~0xF)
|
||||
pandecode_shader_disassemble(state.shader.shader & ~0xF, gpu_id);
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX);
|
||||
|
||||
if (idvs && state.secondary_shader)
|
||||
pandecode_shader_disassemble(state.secondary_shader, gpu_id);
|
||||
#endif
|
||||
DUMP_UNPACKED(RENDERER_STATE, state, "State:\n");
|
||||
pandecode_indent++;
|
||||
|
||||
/* Save for dumps */
|
||||
attribute_count = state.shader.attribute_count;
|
||||
varying_count = state.shader.varying_count;
|
||||
texture_count = state.shader.texture_count;
|
||||
sampler_count = state.shader.sampler_count;
|
||||
uniform_buffer_count = state.properties.uniform_buffer_count;
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
uniform_count = state.preload.uniform_count;
|
||||
#else
|
||||
uniform_count = state.properties.uniform_count;
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH == 4
|
||||
mali_ptr shader = state.blend_shader & ~0xF;
|
||||
if (state.multisample_misc.blend_shader && shader)
|
||||
pandecode_shader_disassemble(shader, gpu_id);
|
||||
#endif
|
||||
pandecode_indent--;
|
||||
pandecode_log("\n");
|
||||
|
||||
/* MRT blend fields are used whenever MFBD is used, with
|
||||
* per-RT descriptors */
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
if ((job_type == MALI_JOB_TYPE_TILER ||
|
||||
job_type == MALI_JOB_TYPE_FRAGMENT) &&
|
||||
(PAN_ARCH >= 6 || p->thread_storage & MALI_FBD_TAG_IS_MFBD)) {
|
||||
void *blend_base = ((void *)cl) + pan_size(RENDERER_STATE);
|
||||
|
||||
for (unsigned i = 0; i < fbd_info.rt_count; i++) {
|
||||
mali_ptr shader =
|
||||
GENX(pandecode_blend)(blend_base, i, state.shader.shader);
|
||||
if (shader & ~0xF)
|
||||
pandecode_shader_disassemble(shader, gpu_id);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} else
|
||||
pandecode_log("// XXX: missing shader descriptor\n");
|
||||
|
||||
if (p->viewport) {
|
||||
DUMP_ADDR(VIEWPORT, p->viewport, "Viewport:\n");
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
unsigned max_attr_index = 0;
|
||||
|
||||
if (p->attributes)
|
||||
max_attr_index =
|
||||
pandecode_attribute_meta(attribute_count, p->attributes, false);
|
||||
|
||||
if (p->attribute_buffers)
|
||||
pandecode_attributes(p->attribute_buffers, max_attr_index, false,
|
||||
job_type);
|
||||
|
||||
if (p->varyings) {
|
||||
varying_count =
|
||||
pandecode_attribute_meta(varying_count, p->varyings, true);
|
||||
}
|
||||
|
||||
if (p->varying_buffers)
|
||||
pandecode_attributes(p->varying_buffers, varying_count, true, job_type);
|
||||
|
||||
if (p->uniform_buffers) {
|
||||
if (uniform_buffer_count)
|
||||
pandecode_uniform_buffers(p->uniform_buffers, uniform_buffer_count);
|
||||
else
|
||||
pandecode_log("// warn: UBOs specified but not referenced\n");
|
||||
} else if (uniform_buffer_count)
|
||||
pandecode_log("// XXX: UBOs referenced but not specified\n");
|
||||
|
||||
/* We don't want to actually dump uniforms, but we do need to validate
|
||||
* that the counts we were given are sane */
|
||||
|
||||
if (p->push_uniforms) {
|
||||
if (uniform_count)
|
||||
pandecode_uniforms(p->push_uniforms, uniform_count);
|
||||
else
|
||||
pandecode_log("// warn: Uniforms specified but not referenced\n");
|
||||
} else if (uniform_count)
|
||||
pandecode_log("// XXX: Uniforms referenced but not specified\n");
|
||||
|
||||
if (p->textures)
|
||||
pandecode_textures(p->textures, texture_count);
|
||||
|
||||
if (p->samplers)
|
||||
pandecode_samplers(p->samplers, sampler_count);
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER *h,
|
||||
mali_ptr job, unsigned gpu_id)
|
||||
{
|
||||
struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
pan_section_unpack(p, COMPUTE_JOB, DRAW, draw);
|
||||
GENX(pandecode_dcd)(&draw, h->type, gpu_id);
|
||||
|
||||
pandecode_log("Vertex Job Payload:\n");
|
||||
pandecode_indent++;
|
||||
pandecode_invocation(pan_section_ptr(p, COMPUTE_JOB, INVOCATION));
|
||||
DUMP_SECTION(COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n");
|
||||
DUMP_UNPACKED(DRAW, draw, "Draw:\n");
|
||||
pandecode_indent--;
|
||||
pandecode_log("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
pandecode_write_value_job(mali_ptr job)
|
||||
{
|
||||
struct mali_write_value_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u);
|
||||
DUMP_SECTION(WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n");
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_cache_flush_job(mali_ptr job)
|
||||
{
|
||||
struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u);
|
||||
DUMP_SECTION(CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n");
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_tiler_job(const struct MALI_JOB_HEADER *h, mali_ptr job,
|
||||
unsigned gpu_id)
|
||||
{
|
||||
struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
pan_section_unpack(p, TILER_JOB, DRAW, draw);
|
||||
GENX(pandecode_dcd)(&draw, h->type, gpu_id);
|
||||
pandecode_log("Tiler Job Payload:\n");
|
||||
pandecode_indent++;
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION));
|
||||
#endif
|
||||
|
||||
pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE));
|
||||
DUMP_UNPACKED(DRAW, draw, "Draw:\n");
|
||||
|
||||
DUMP_SECTION(TILER_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n");
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr);
|
||||
GENX(pandecode_tiler)(tiler_ptr.address, gpu_id);
|
||||
|
||||
#if PAN_ARCH >= 9
|
||||
DUMP_SECTION(TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n");
|
||||
DUMP_SECTION(TILER_JOB, VERTEX_COUNT, p, "Vertex count:\n");
|
||||
DUMP_SECTION(TILER_JOB, SCISSOR, p, "Scissor:\n");
|
||||
DUMP_SECTION(TILER_JOB, INDICES, p, "Indices:\n");
|
||||
#else
|
||||
pan_section_unpack(p, TILER_JOB, PADDING, padding);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
pandecode_indent--;
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_fragment_job(mali_ptr job, unsigned gpu_id)
|
||||
{
|
||||
struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s);
|
||||
|
||||
UNUSED struct pandecode_fbd info =
|
||||
GENX(pandecode_fbd)(s.framebuffer, true, gpu_id);
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
unsigned expected_tag = 0;
|
||||
|
||||
/* Compute the tag for the tagged pointer. This contains the type of
|
||||
* FBD (MFBD/SFBD), and in the case of an MFBD, information about which
|
||||
* additional structures follow the MFBD header (an extra payload or
|
||||
* not, as well as a count of render targets) */
|
||||
|
||||
expected_tag = MALI_FBD_TAG_IS_MFBD;
|
||||
if (info.has_extra)
|
||||
expected_tag |= MALI_FBD_TAG_HAS_ZS_RT;
|
||||
|
||||
expected_tag |= MALI_FBD_TAG_IS_MFBD | (MALI_POSITIVE(info.rt_count) << 2);
|
||||
#endif
|
||||
|
||||
DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n");
|
||||
|
||||
#if PAN_ARCH >= 5
|
||||
/* The FBD is a tagged pointer */
|
||||
|
||||
unsigned tag = (s.framebuffer & MALI_FBD_TAG_MASK);
|
||||
|
||||
if (tag != expected_tag)
|
||||
pandecode_log("// XXX: expected FBD tag %X but got %X\n", expected_tag,
|
||||
tag);
|
||||
#endif
|
||||
|
||||
pandecode_log("\n");
|
||||
}
|
||||
|
||||
#if PAN_ARCH == 6 || PAN_ARCH == 7
|
||||
static void
|
||||
pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h, mali_ptr job,
|
||||
unsigned gpu_id)
|
||||
{
|
||||
struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
|
||||
pandecode_log("Vertex:\n");
|
||||
pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw);
|
||||
GENX(pandecode_dcd)(&vert_draw, h->type, gpu_id);
|
||||
DUMP_UNPACKED(DRAW, vert_draw, "Vertex Draw:\n");
|
||||
|
||||
pandecode_log("Fragment:\n");
|
||||
pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw);
|
||||
GENX(pandecode_dcd)(&frag_draw, MALI_JOB_TYPE_FRAGMENT, gpu_id);
|
||||
DUMP_UNPACKED(DRAW, frag_draw, "Fragment Draw:\n");
|
||||
|
||||
pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr);
|
||||
pandecode_log("Tiler Job Payload:\n");
|
||||
pandecode_indent++;
|
||||
GENX(pandecode_tiler)(tiler_ptr.address, gpu_id);
|
||||
pandecode_indent--;
|
||||
|
||||
pandecode_invocation(pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION));
|
||||
pandecode_primitive(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE));
|
||||
|
||||
DUMP_SECTION(INDEXED_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n");
|
||||
|
||||
pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if PAN_ARCH == 9
|
||||
static void
|
||||
pandecode_malloc_vertex_job(mali_ptr job, unsigned gpu_id)
|
||||
{
|
||||
struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
|
||||
DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE, p, "Primitive:\n");
|
||||
DUMP_SECTION(MALLOC_VERTEX_JOB, INSTANCE_COUNT, p, "Instance count:\n");
|
||||
DUMP_SECTION(MALLOC_VERTEX_JOB, ALLOCATION, p, "Allocation:\n");
|
||||
DUMP_SECTION(MALLOC_VERTEX_JOB, TILER, p, "Tiler:\n");
|
||||
DUMP_SECTION(MALLOC_VERTEX_JOB, SCISSOR, p, "Scissor:\n");
|
||||
DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n");
|
||||
DUMP_SECTION(MALLOC_VERTEX_JOB, INDICES, p, "Indices:\n");
|
||||
|
||||
pan_section_unpack(p, MALLOC_VERTEX_JOB, DRAW, dcd);
|
||||
|
||||
pan_section_unpack(p, MALLOC_VERTEX_JOB, TILER, tiler_ptr);
|
||||
pandecode_log("Tiler Job Payload:\n");
|
||||
pandecode_indent++;
|
||||
if (tiler_ptr.address)
|
||||
GENX(pandecode_tiler)(tiler_ptr.address, gpu_id);
|
||||
else
|
||||
pandecode_log("<omitted>\n");
|
||||
pandecode_indent--;
|
||||
|
||||
GENX(pandecode_dcd)(&dcd, 0, gpu_id);
|
||||
|
||||
pan_section_unpack(p, MALLOC_VERTEX_JOB, POSITION, position);
|
||||
pan_section_unpack(p, MALLOC_VERTEX_JOB, VARYING, varying);
|
||||
GENX(pandecode_shader_environment)(&position, gpu_id);
|
||||
GENX(pandecode_shader_environment)(&varying, gpu_id);
|
||||
}
|
||||
|
||||
static void
|
||||
pandecode_compute_job(mali_ptr job, unsigned gpu_id)
|
||||
{
|
||||
struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job);
|
||||
pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload);
|
||||
|
||||
GENX(pandecode_shader_environment)(&payload.compute, gpu_id);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Trace a job chain at a particular GPU address, interpreted for a particular
|
||||
* GPU using the job manager.
|
||||
*/
|
||||
void
|
||||
GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id)
|
||||
{
|
||||
pandecode_dump_file_open();
|
||||
|
||||
struct set *va_set = _mesa_pointer_set_create(NULL);
|
||||
struct set_entry *entry = NULL;
|
||||
|
||||
mali_ptr next_job = 0;
|
||||
|
||||
do {
|
||||
struct mali_job_header_packed *hdr =
|
||||
PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed);
|
||||
|
||||
entry = _mesa_set_search(va_set, hdr);
|
||||
if (entry != NULL) {
|
||||
fprintf(stdout, "Job list has a cycle\n");
|
||||
break;
|
||||
}
|
||||
|
||||
pan_unpack(hdr, JOB_HEADER, h);
|
||||
next_job = h.next;
|
||||
|
||||
DUMP_UNPACKED(JOB_HEADER, h, "Job Header (%" PRIx64 "):\n", jc_gpu_va);
|
||||
pandecode_log("\n");
|
||||
|
||||
switch (h.type) {
|
||||
case MALI_JOB_TYPE_WRITE_VALUE:
|
||||
pandecode_write_value_job(jc_gpu_va);
|
||||
break;
|
||||
|
||||
case MALI_JOB_TYPE_CACHE_FLUSH:
|
||||
pandecode_cache_flush_job(jc_gpu_va);
|
||||
break;
|
||||
|
||||
case MALI_JOB_TYPE_TILER:
|
||||
pandecode_tiler_job(&h, jc_gpu_va, gpu_id);
|
||||
break;
|
||||
|
||||
#if PAN_ARCH <= 7
|
||||
case MALI_JOB_TYPE_VERTEX:
|
||||
case MALI_JOB_TYPE_COMPUTE:
|
||||
pandecode_vertex_compute_geometry_job(&h, jc_gpu_va, gpu_id);
|
||||
break;
|
||||
|
||||
#if PAN_ARCH >= 6
|
||||
case MALI_JOB_TYPE_INDEXED_VERTEX:
|
||||
pandecode_indexed_vertex_job(&h, jc_gpu_va, gpu_id);
|
||||
break;
|
||||
#endif
|
||||
#else
|
||||
case MALI_JOB_TYPE_COMPUTE:
|
||||
pandecode_compute_job(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
|
||||
case MALI_JOB_TYPE_MALLOC_VERTEX:
|
||||
pandecode_malloc_vertex_job(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
#endif
|
||||
|
||||
case MALI_JOB_TYPE_FRAGMENT:
|
||||
pandecode_fragment_job(jc_gpu_va, gpu_id);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Track the latest visited job CPU VA to detect cycles */
|
||||
_mesa_set_add(va_set, hdr);
|
||||
} while ((jc_gpu_va = next_job));
|
||||
|
||||
_mesa_set_destroy(va_set, NULL);
|
||||
|
||||
fflush(pandecode_dump_stream);
|
||||
pandecode_map_read_write();
|
||||
}
|
||||
|
||||
void
|
||||
GENX(pandecode_abort_on_fault)(mali_ptr jc_gpu_va)
|
||||
{
|
||||
mali_ptr next_job = 0;
|
||||
|
||||
do {
|
||||
pan_unpack(PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed),
|
||||
JOB_HEADER, h);
|
||||
next_job = h.next;
|
||||
|
||||
/* Ensure the job is marked COMPLETE */
|
||||
if (h.exception_status != 0x1) {
|
||||
fprintf(stderr, "Incomplete job or timeout\n");
|
||||
fflush(NULL);
|
||||
abort();
|
||||
}
|
||||
} while ((jc_gpu_va = next_job));
|
||||
|
||||
pandecode_map_read_write();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -93,6 +93,9 @@ pan_arch(unsigned gpu_id)
|
|||
#elif (PAN_ARCH == 9)
|
||||
#define GENX(X) X##_v9
|
||||
#include "genxml/v9_pack.h"
|
||||
#elif (PAN_ARCH == 10)
|
||||
#define GENX(X) X##_v10
|
||||
#include "genxml/v10_pack.h"
|
||||
#else
|
||||
#error "Need to add suffixing macro for this architecture"
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@
|
|||
# SOFTWARE.
|
||||
|
||||
pan_packers = []
|
||||
foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9']
|
||||
foreach packer : ['common', 'v4', 'v5', 'v6', 'v7', 'v9', 'v10']
|
||||
pan_packers += custom_target(
|
||||
packer + '_pack.h',
|
||||
input : ['gen_pack.py', packer + '.xml'],
|
||||
|
|
@ -37,10 +37,10 @@ idep_pan_packers = declare_dependency(
|
|||
|
||||
libpanfrost_decode_per_arch = []
|
||||
|
||||
foreach ver : ['4', '5', '6', '7', '9']
|
||||
foreach ver : ['4', '5', '6', '7', '9', '10']
|
||||
libpanfrost_decode_per_arch += static_library(
|
||||
'pandecode-arch-v' + ver,
|
||||
['decode.c', pan_packers],
|
||||
['decode.c', 'decode_jm.c', 'decode_csf.c', pan_packers],
|
||||
include_directories : [inc_include, inc_src, inc_panfrost],
|
||||
dependencies : [dep_libdrm, idep_pan_packers, idep_nir],
|
||||
c_args : [no_override_init_args, '-DPAN_ARCH=' + ver],
|
||||
|
|
|
|||
|
|
@ -55,6 +55,9 @@ void pandecode_inject_free(uint64_t gpu_va, unsigned sz);
|
|||
|
||||
void pandecode_jc(uint64_t jc_gpu_va, unsigned gpu_id);
|
||||
|
||||
void pandecode_cs(mali_ptr queue_gpu_va, uint32_t size, unsigned gpu_id,
|
||||
uint32_t *regs);
|
||||
|
||||
void pandecode_abort_on_fault(uint64_t jc_gpu_va, unsigned gpu_id);
|
||||
|
||||
#endif /* __MMAP_TRACE_H__ */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue