diff --git a/src/gallium/drivers/panfrost/pan_context.c b/src/gallium/drivers/panfrost/pan_context.c index 15df6e945b3..6c381bce00c 100644 --- a/src/gallium/drivers/panfrost/pan_context.c +++ b/src/gallium/drivers/panfrost/pan_context.c @@ -114,7 +114,7 @@ panfrost_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence, } if (dev->debug & PAN_DBG_TRACE) - pandecode_next_frame(); + pandecode_next_frame(dev->decode_ctx); } static void diff --git a/src/gallium/drivers/panfrost/pan_job.c b/src/gallium/drivers/panfrost/pan_job.c index 98b063d0a76..be9e6757a3b 100644 --- a/src/gallium/drivers/panfrost/pan_job.c +++ b/src/gallium/drivers/panfrost/pan_job.c @@ -685,14 +685,14 @@ panfrost_batch_submit_ioctl(struct panfrost_batch *batch, drmSyncobjWait(dev->fd, &out_sync, 1, INT64_MAX, 0, NULL); if (dev->debug & PAN_DBG_TRACE) - pandecode_jc(submit.jc, dev->gpu_id); + pandecode_jc(dev->decode_ctx, submit.jc, dev->gpu_id); if (dev->debug & PAN_DBG_DUMP) - pandecode_dump_mappings(); + pandecode_dump_mappings(dev->decode_ctx); /* Jobs won't be complete if blackhole rendering, that's ok */ if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC) - pandecode_abort_on_fault(submit.jc, dev->gpu_id); + pandecode_abort_on_fault(dev->decode_ctx, submit.jc, dev->gpu_id); } return 0; diff --git a/src/gallium/drivers/panfrost/pan_resource.c b/src/gallium/drivers/panfrost/pan_resource.c index a75d4532211..4e114f2ccbf 100644 --- a/src/gallium/drivers/panfrost/pan_resource.c +++ b/src/gallium/drivers/panfrost/pan_resource.c @@ -1004,7 +1004,8 @@ panfrost_ptr_map(struct pipe_context *pctx, struct pipe_resource *resource, panfrost_bo_mmap(bo); if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) - pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL); + pandecode_inject_mmap(dev->decode_ctx, bo->ptr.gpu, bo->ptr.cpu, bo->size, + NULL); /* Upgrade writes to uninitialized ranges to UNSYNCHRONIZED */ if ((usage & PIPE_MAP_WRITE) && resource->target == PIPE_BUFFER && diff --git a/src/panfrost/lib/genxml/decode.c b/src/panfrost/lib/genxml/decode.c index 7850bad8b09..b1344e04821 100644 --- a/src/panfrost/lib/genxml/decode.c +++ b/src/panfrost/lib/genxml/decode.c @@ -45,11 +45,12 @@ * larger FBD */ static void -pandecode_midgard_tiler_descriptor(const struct mali_tiler_context_packed *tp, +pandecode_midgard_tiler_descriptor(struct pandecode_context *ctx, + const struct mali_tiler_context_packed *tp, const struct mali_tiler_weights_packed *wp) { pan_unpack(tp, TILER_CONTEXT, t); - DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler:\n"); + DUMP_UNPACKED(ctx, TILER_CONTEXT, t, "Tiler:\n"); /* We've never seen weights used in practice, but they exist */ pan_unpack(wp, TILER_WEIGHTS, w); @@ -65,55 +66,58 @@ pandecode_midgard_tiler_descriptor(const struct mali_tiler_context_packed *tp, nonzero_weights |= w.weight7 != 0x0; if (nonzero_weights) - DUMP_UNPACKED(TILER_WEIGHTS, w, "Tiler Weights:\n"); + DUMP_UNPACKED(ctx, TILER_WEIGHTS, w, "Tiler Weights:\n"); } #endif #if PAN_ARCH >= 5 static void -pandecode_render_target(uint64_t gpu_va, unsigned gpu_id, +pandecode_render_target(struct pandecode_context *ctx, uint64_t gpu_va, + unsigned gpu_id, const struct MALI_FRAMEBUFFER_PARAMETERS *fb) { - pandecode_log("Color Render Targets @%" PRIx64 ":\n", gpu_va); - pandecode_indent++; + pandecode_log(ctx, "Color Render Targets @%" PRIx64 ":\n", gpu_va); + ctx->indent++; for (int i = 0; i < (fb->render_target_count); i++) { mali_ptr rt_va = gpu_va + i * pan_size(RENDER_TARGET); const struct mali_render_target_packed *PANDECODE_PTR_VAR( - rtp, (mali_ptr)rt_va); - DUMP_CL(RENDER_TARGET, rtp, "Color Render Target %d:\n", i); + ctx, rtp, (mali_ptr)rt_va); + DUMP_CL(ctx, RENDER_TARGET, rtp, "Color Render Target %d:\n", i); } - pandecode_indent--; - pandecode_log("\n"); + ctx->indent--; + pandecode_log(ctx, "\n"); } #endif #if PAN_ARCH >= 6 static void -pandecode_sample_locations(const void *fb) +pandecode_sample_locations(struct pandecode_context *ctx, const void *fb) { pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params); - const u16 *PANDECODE_PTR_VAR(samples, params.sample_locations); + const u16 *PANDECODE_PTR_VAR(ctx, samples, params.sample_locations); - pandecode_log("Sample locations @%" PRIx64 ":\n", params.sample_locations); + pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n", + params.sample_locations); for (int i = 0; i < 33; i++) { - pandecode_log(" (%d, %d),\n", samples[2 * i] - 128, + pandecode_log(ctx, " (%d, %d),\n", samples[2 * i] - 128, samples[2 * i + 1] - 128); } } #endif struct pandecode_fbd -GENX(pandecode_fbd)(uint64_t gpu_va, bool is_fragment, unsigned gpu_id) +GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, + bool is_fragment, unsigned gpu_id) { - const void *PANDECODE_PTR_VAR(fb, (mali_ptr)gpu_va); + const void *PANDECODE_PTR_VAR(ctx, fb, (mali_ptr)gpu_va); pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params); - DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, params, "Parameters:\n"); + DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n"); #if PAN_ARCH >= 6 - pandecode_sample_locations(fb); + pandecode_sample_locations(ctx, fb); unsigned dcd_size = pan_size(DRAW); unsigned job_type_param = 0; @@ -123,64 +127,64 @@ GENX(pandecode_fbd)(uint64_t gpu_va, bool is_fragment, unsigned gpu_id) #endif if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const void *PANDECODE_PTR_VAR(dcd, + const void *PANDECODE_PTR_VAR(ctx, dcd, params.frame_shader_dcds + (0 * dcd_size)); pan_unpack(dcd, DRAW, draw); - pandecode_log("Pre frame 0 @%" PRIx64 " (mode=%d):\n", + pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n", params.frame_shader_dcds, params.pre_frame_0); - GENX(pandecode_dcd)(&draw, job_type_param, gpu_id); + GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); } if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const void *PANDECODE_PTR_VAR(dcd, + const void *PANDECODE_PTR_VAR(ctx, dcd, params.frame_shader_dcds + (1 * dcd_size)); pan_unpack(dcd, DRAW, draw); - pandecode_log("Pre frame 1 @%" PRIx64 ":\n", + pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n", params.frame_shader_dcds + (1 * dcd_size)); - GENX(pandecode_dcd)(&draw, job_type_param, gpu_id); + GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); } if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const void *PANDECODE_PTR_VAR(dcd, + const void *PANDECODE_PTR_VAR(ctx, dcd, params.frame_shader_dcds + (2 * dcd_size)); pan_unpack(dcd, DRAW, draw); - pandecode_log("Post frame:\n"); - GENX(pandecode_dcd)(&draw, job_type_param, gpu_id); + pandecode_log(ctx, "Post frame:\n"); + GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); } #else - DUMP_SECTION(FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n"); + DUMP_SECTION(ctx, FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n"); const void *t = pan_section_ptr(fb, FRAMEBUFFER, TILER); const void *w = pan_section_ptr(fb, FRAMEBUFFER, TILER_WEIGHTS); - pandecode_midgard_tiler_descriptor(t, w); + pandecode_midgard_tiler_descriptor(ctx, t, w); #endif - pandecode_log("Framebuffer @%" PRIx64 ":\n", gpu_va); - pandecode_indent++; + pandecode_log(ctx, "Framebuffer @%" PRIx64 ":\n", gpu_va); + ctx->indent++; - DUMP_UNPACKED(FRAMEBUFFER_PARAMETERS, params, "Parameters:\n"); + DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n"); #if PAN_ARCH >= 6 if (params.tiler) - GENX(pandecode_tiler)(params.tiler, gpu_id); + GENX(pandecode_tiler)(ctx, params.tiler, gpu_id); #endif - pandecode_indent--; - pandecode_log("\n"); + ctx->indent--; + pandecode_log(ctx, "\n"); #if PAN_ARCH >= 5 gpu_va += pan_size(FRAMEBUFFER); if (params.has_zs_crc_extension) { const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR( - zs_crc, (mali_ptr)gpu_va); - DUMP_CL(ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n"); - pandecode_log("\n"); + ctx, zs_crc, (mali_ptr)gpu_va); + DUMP_CL(ctx, ZS_CRC_EXTENSION, zs_crc, "ZS CRC Extension:\n"); + pandecode_log(ctx, "\n"); gpu_va += pan_size(ZS_CRC_EXTENSION); } if (is_fragment) - pandecode_render_target(gpu_va, gpu_id, ¶ms); + pandecode_render_target(ctx, gpu_va, gpu_id, ¶ms); return (struct pandecode_fbd){ .rt_count = params.render_target_count, @@ -201,10 +205,11 @@ GENX(pandecode_fbd)(uint64_t gpu_va, bool is_fragment, unsigned gpu_id) #if PAN_ARCH >= 5 mali_ptr -GENX(pandecode_blend)(void *descs, int rt_no, mali_ptr frag_shader) +GENX(pandecode_blend)(struct pandecode_context *ctx, void *descs, int rt_no, + mali_ptr frag_shader) { pan_unpack(descs + (rt_no * pan_size(BLEND)), BLEND, b); - DUMP_UNPACKED(BLEND, b, "Blend RT %d:\n", rt_no); + DUMP_UNPACKED(ctx, BLEND, b, "Blend RT %d:\n", rt_no); #if PAN_ARCH >= 6 if (b.internal.mode != MALI_BLEND_MODE_SHADER) return 0; @@ -231,7 +236,8 @@ panfrost_is_yuv_format(uint32_t packed) } static void -pandecode_texture_payload(mali_ptr payload, const struct MALI_TEXTURE *tex) +pandecode_texture_payload(struct pandecode_context *ctx, mali_ptr payload, + const struct MALI_TEXTURE *tex) { unsigned nr_samples = tex->dimension == MALI_TEXTURE_DIMENSION_3D ? 1 : tex->sample_count; @@ -257,8 +263,8 @@ pandecode_texture_payload(mali_ptr payload, const struct MALI_TEXTURE *tex) #define PANDECODE_EMIT_TEX_PAYLOAD_DESC(T, msg) \ for (int i = 0; i < bitmap_count; ++i) { \ uint64_t addr = payload + pan_size(T) * i; \ - pan_unpack(PANDECODE_PTR(addr, void), T, s); \ - DUMP_UNPACKED(T, s, msg " @%" PRIx64 ":\n", addr) \ + pan_unpack(PANDECODE_PTR(ctx, addr, void), T, s); \ + DUMP_UNPACKED(ctx, T, s, msg " @%" PRIx64 ":\n", addr) \ } #if PAN_ARCH <= 5 @@ -277,7 +283,7 @@ pandecode_texture_payload(mali_ptr payload, const struct MALI_TEXTURE *tex) "Surface With Stride"); break; default: - fprintf(pandecode_dump_stream, "Unknown surface descriptor type %X\n", + fprintf(ctx->dump_stream, "Unknown surface descriptor type %X\n", tex->surface_type); break; } @@ -299,25 +305,26 @@ pandecode_texture_payload(mali_ptr payload, const struct MALI_TEXTURE *tex) #if PAN_ARCH <= 5 void -GENX(pandecode_texture)(mali_ptr u, unsigned tex) +GENX(pandecode_texture)(struct pandecode_context *ctx, mali_ptr u, unsigned tex) { - const uint8_t *cl = pandecode_fetch_gpu_mem(u, pan_size(TEXTURE)); + const uint8_t *cl = pandecode_fetch_gpu_mem(ctx, u, pan_size(TEXTURE)); pan_unpack(cl, TEXTURE, temp); - DUMP_UNPACKED(TEXTURE, temp, "Texture:\n") + DUMP_UNPACKED(ctx, TEXTURE, temp, "Texture:\n") - pandecode_indent++; - pandecode_texture_payload(u + pan_size(TEXTURE), &temp); - pandecode_indent--; + ctx->indent++; + pandecode_texture_payload(ctx, u + pan_size(TEXTURE), &temp); + ctx->indent--; } #else void -GENX(pandecode_texture)(const void *cl, unsigned tex) +GENX(pandecode_texture)(struct pandecode_context *ctx, const void *cl, + unsigned tex) { pan_unpack(cl, TEXTURE, temp); - DUMP_UNPACKED(TEXTURE, temp, "Texture:\n") + DUMP_UNPACKED(ctx, TEXTURE, temp, "Texture:\n") - pandecode_indent++; + ctx->indent++; #if PAN_ARCH >= 9 int plane_count = temp.levels * temp.array_size; @@ -327,66 +334,70 @@ GENX(pandecode_texture)(const void *cl, unsigned tex) plane_count *= 6; for (unsigned i = 0; i < plane_count; ++i) - DUMP_ADDR(PLANE, temp.surfaces + i * pan_size(PLANE), "Plane %u:\n", i); + DUMP_ADDR(ctx, PLANE, temp.surfaces + i * pan_size(PLANE), "Plane %u:\n", + i); #else - pandecode_texture_payload(temp.surfaces, &temp); + pandecode_texture_payload(ctx, temp.surfaces, &temp); #endif - pandecode_indent--; + ctx->indent--; } #endif #if PAN_ARCH >= 6 void -GENX(pandecode_tiler)(mali_ptr gpu_va, unsigned gpu_id) +GENX(pandecode_tiler)(struct pandecode_context *ctx, mali_ptr gpu_va, + unsigned gpu_id) { - pan_unpack(PANDECODE_PTR(gpu_va, void), TILER_CONTEXT, t); + pan_unpack(PANDECODE_PTR(ctx, gpu_va, void), TILER_CONTEXT, t); if (t.heap) { - pan_unpack(PANDECODE_PTR(t.heap, void), TILER_HEAP, h); - DUMP_UNPACKED(TILER_HEAP, h, "Tiler Heap:\n"); + pan_unpack(PANDECODE_PTR(ctx, t.heap, void), TILER_HEAP, h); + DUMP_UNPACKED(ctx, TILER_HEAP, h, "Tiler Heap:\n"); } - DUMP_UNPACKED(TILER_CONTEXT, t, "Tiler Context @%" PRIx64 ":\n", gpu_va); + DUMP_UNPACKED(ctx, TILER_CONTEXT, t, "Tiler Context @%" PRIx64 ":\n", + gpu_va); } #endif #if PAN_ARCH >= 9 void -GENX(pandecode_fau)(mali_ptr addr, unsigned count, const char *name) +GENX(pandecode_fau)(struct pandecode_context *ctx, mali_ptr addr, + unsigned count, const char *name) { if (count == 0) return; - const uint32_t *PANDECODE_PTR_VAR(raw, addr); + const uint32_t *PANDECODE_PTR_VAR(ctx, raw, addr); - pandecode_validate_buffer(addr, count * 8); + pandecode_validate_buffer(ctx, addr, count * 8); - fprintf(pandecode_dump_stream, "%s @%" PRIx64 ":\n", name, addr); + fprintf(ctx->dump_stream, "%s @%" PRIx64 ":\n", name, addr); for (unsigned i = 0; i < count; ++i) { - fprintf(pandecode_dump_stream, " %08X %08X\n", raw[2 * i], - raw[2 * i + 1]); + fprintf(ctx->dump_stream, " %08X %08X\n", raw[2 * i], raw[2 * i + 1]); } - fprintf(pandecode_dump_stream, "\n"); + fprintf(ctx->dump_stream, "\n"); } mali_ptr -GENX(pandecode_shader)(mali_ptr addr, const char *label, unsigned gpu_id) +GENX(pandecode_shader)(struct pandecode_context *ctx, mali_ptr addr, + const char *label, unsigned gpu_id) { - MAP_ADDR(SHADER_PROGRAM, addr, cl); + MAP_ADDR(ctx, SHADER_PROGRAM, addr, cl); pan_unpack(cl, SHADER_PROGRAM, desc); assert(desc.type == 8); - DUMP_UNPACKED(SHADER_PROGRAM, desc, "%s Shader @%" PRIx64 ":\n", label, + DUMP_UNPACKED(ctx, SHADER_PROGRAM, desc, "%s Shader @%" PRIx64 ":\n", label, addr); - pandecode_shader_disassemble(desc.binary, gpu_id); + pandecode_shader_disassemble(ctx, desc.binary, gpu_id); return desc.binary; } static void -pandecode_resources(mali_ptr addr, unsigned size) +pandecode_resources(struct pandecode_context *ctx, mali_ptr addr, unsigned size) { - const uint8_t *cl = pandecode_fetch_gpu_mem(addr, size); + const uint8_t *cl = pandecode_fetch_gpu_mem(ctx, addr, size); assert((size % 0x20) == 0); for (unsigned i = 0; i < size; i += 0x20) { @@ -394,96 +405,101 @@ pandecode_resources(mali_ptr addr, unsigned size) switch (type) { case MALI_DESCRIPTOR_TYPE_SAMPLER: - DUMP_CL(SAMPLER, cl + i, "Sampler @%" PRIx64 ":\n", addr + i); + DUMP_CL(ctx, SAMPLER, cl + i, "Sampler @%" PRIx64 ":\n", addr + i); break; case MALI_DESCRIPTOR_TYPE_TEXTURE: - pandecode_log("Texture @%" PRIx64 "\n", addr + i); - GENX(pandecode_texture)(cl + i, i); + pandecode_log(ctx, "Texture @%" PRIx64 "\n", addr + i); + GENX(pandecode_texture)(ctx, cl + i, i); break; case MALI_DESCRIPTOR_TYPE_ATTRIBUTE: - DUMP_CL(ATTRIBUTE, cl + i, "Attribute @%" PRIx64 ":\n", addr + i); + DUMP_CL(ctx, ATTRIBUTE, cl + i, "Attribute @%" PRIx64 ":\n", addr + i); break; case MALI_DESCRIPTOR_TYPE_BUFFER: - DUMP_CL(BUFFER, cl + i, "Buffer @%" PRIx64 ":\n", addr + i); + DUMP_CL(ctx, BUFFER, cl + i, "Buffer @%" PRIx64 ":\n", addr + i); break; default: - fprintf(pandecode_dump_stream, "Unknown descriptor type %X\n", type); + fprintf(ctx->dump_stream, "Unknown descriptor type %X\n", type); break; } } } void -GENX(pandecode_resource_tables)(mali_ptr addr, const char *label) +GENX(pandecode_resource_tables)(struct pandecode_context *ctx, mali_ptr addr, + const char *label) { unsigned count = addr & 0x3F; addr = addr & ~0x3F; const uint8_t *cl = - pandecode_fetch_gpu_mem(addr, MALI_RESOURCE_LENGTH * count); + pandecode_fetch_gpu_mem(ctx, addr, MALI_RESOURCE_LENGTH * count); for (unsigned i = 0; i < count; ++i) { pan_unpack(cl + i * MALI_RESOURCE_LENGTH, RESOURCE, entry); - DUMP_UNPACKED(RESOURCE, entry, "Entry %u @%" PRIx64 ":\n", i, + DUMP_UNPACKED(ctx, RESOURCE, entry, "Entry %u @%" PRIx64 ":\n", i, addr + i * MALI_RESOURCE_LENGTH); - pandecode_indent += 2; + ctx->indent += 2; if (entry.address) - pandecode_resources(entry.address, entry.size); - pandecode_indent -= 2; + pandecode_resources(ctx, entry.address, entry.size); + ctx->indent -= 2; } } void -GENX(pandecode_depth_stencil)(mali_ptr addr) +GENX(pandecode_depth_stencil)(struct pandecode_context *ctx, mali_ptr addr) { - MAP_ADDR(DEPTH_STENCIL, addr, cl); + MAP_ADDR(ctx, DEPTH_STENCIL, addr, cl); pan_unpack(cl, DEPTH_STENCIL, desc); - DUMP_UNPACKED(DEPTH_STENCIL, desc, "Depth/stencil"); + DUMP_UNPACKED(ctx, DEPTH_STENCIL, desc, "Depth/stencil"); } void -GENX(pandecode_shader_environment)(const struct MALI_SHADER_ENVIRONMENT *p, +GENX(pandecode_shader_environment)(struct pandecode_context *ctx, + const struct MALI_SHADER_ENVIRONMENT *p, unsigned gpu_id) { if (p->shader) - GENX(pandecode_shader)(p->shader, "Shader", gpu_id); + GENX(pandecode_shader)(ctx, p->shader, "Shader", gpu_id); if (p->resources) - GENX(pandecode_resource_tables)(p->resources, "Resources"); + GENX(pandecode_resource_tables)(ctx, p->resources, "Resources"); if (p->thread_storage) - DUMP_ADDR(LOCAL_STORAGE, p->thread_storage, "Local Storage:\n"); + DUMP_ADDR(ctx, LOCAL_STORAGE, p->thread_storage, "Local Storage:\n"); if (p->fau) - GENX(pandecode_fau)(p->fau, p->fau_count, "FAU"); + GENX(pandecode_fau)(ctx, p->fau, p->fau_count, "FAU"); } void -GENX(pandecode_blend_descs)(mali_ptr blend, unsigned count, - mali_ptr frag_shader, unsigned gpu_id) +GENX(pandecode_blend_descs)(struct pandecode_context *ctx, mali_ptr blend, + unsigned count, mali_ptr frag_shader, + unsigned gpu_id) { for (unsigned i = 0; i < count; ++i) { - struct mali_blend_packed *PANDECODE_PTR_VAR(blend_descs, blend); + struct mali_blend_packed *PANDECODE_PTR_VAR(ctx, blend_descs, blend); mali_ptr blend_shader = - GENX(pandecode_blend)(blend_descs, i, frag_shader); + GENX(pandecode_blend)(ctx, blend_descs, i, frag_shader); if (blend_shader) { - fprintf(pandecode_dump_stream, "Blend shader %u @%" PRIx64 "", i, + fprintf(ctx->dump_stream, "Blend shader %u @%" PRIx64 "", i, blend_shader); - pandecode_shader_disassemble(blend_shader, gpu_id); + pandecode_shader_disassemble(ctx, blend_shader, gpu_id); } } } void -GENX(pandecode_dcd)(const struct MALI_DRAW *p, unsigned unused, unsigned gpu_id) +GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p, + unsigned unused, unsigned gpu_id) { mali_ptr frag_shader = 0; - GENX(pandecode_depth_stencil)(p->depth_stencil); - GENX(pandecode_blend_descs)(p->blend, p->blend_count, frag_shader, gpu_id); - GENX(pandecode_shader_environment)(&p->shader, gpu_id); - DUMP_UNPACKED(DRAW, *p, "Draw:\n"); + GENX(pandecode_depth_stencil)(ctx, p->depth_stencil); + GENX(pandecode_blend_descs) + (ctx, p->blend, p->blend_count, frag_shader, gpu_id); + GENX(pandecode_shader_environment)(ctx, &p->shader, gpu_id); + DUMP_UNPACKED(ctx, DRAW, *p, "Draw:\n"); } #endif diff --git a/src/panfrost/lib/genxml/decode.h b/src/panfrost/lib/genxml/decode.h index 517153314ba..98166c52c4d 100644 --- a/src/panfrost/lib/genxml/decode.h +++ b/src/panfrost/lib/genxml/decode.h @@ -28,13 +28,22 @@ #include "genxml/gen_macros.h" #include "util/rb_tree.h" +#include "util/simple_mtx.h" +#include "util/u_dynarray.h" #include "wrap.h" -extern FILE *pandecode_dump_stream; -extern unsigned pandecode_indent; +struct pandecode_context { + int id; /* only used for the filename */ + FILE *dump_stream; + unsigned indent; + struct rb_tree mmap_tree; + struct util_dynarray ro_mappings; + int dump_frame_count; + simple_mtx_t lock; +}; -void pandecode_dump_file_open(void); +void pandecode_dump_file_open(struct pandecode_context *ctx); struct pandecode_mapped_memory { struct rb_node node; @@ -45,21 +54,22 @@ struct pandecode_mapped_memory { char name[32]; }; -char *pointer_as_memory_reference(uint64_t ptr); +char *pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr); struct pandecode_mapped_memory * -pandecode_find_mapped_gpu_mem_containing(uint64_t addr); +pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx, + uint64_t addr); -void pandecode_map_read_write(void); +void pandecode_map_read_write(struct pandecode_context *ctx); -void pandecode_dump_mappings(void); +void pandecode_dump_mappings(struct pandecode_context *ctx); static inline void * -__pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size, int line, - const char *filename) +__pandecode_fetch_gpu_mem(struct pandecode_context *ctx, uint64_t gpu_va, + size_t size, int line, const char *filename) { const struct pandecode_mapped_memory *mem = - pandecode_find_mapped_gpu_mem_containing(gpu_va); + pandecode_find_mapped_gpu_mem_containing(ctx, gpu_va); if (!mem) { fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va, @@ -72,97 +82,110 @@ __pandecode_fetch_gpu_mem(uint64_t gpu_va, size_t size, int line, return mem->addr + gpu_va - mem->gpu_va; } -#define pandecode_fetch_gpu_mem(gpu_va, size) \ - __pandecode_fetch_gpu_mem(gpu_va, size, __LINE__, __FILE__) +#define pandecode_fetch_gpu_mem(ctx, gpu_va, size) \ + __pandecode_fetch_gpu_mem(ctx, gpu_va, size, __LINE__, __FILE__) /* Returns a validated pointer to mapped GPU memory with the given pointer type, * size automatically determined from the pointer type */ -#define PANDECODE_PTR(gpu_va, type) \ - ((type *)(__pandecode_fetch_gpu_mem(gpu_va, sizeof(type), __LINE__, \ +#define PANDECODE_PTR(ctx, gpu_va, type) \ + ((type *)(__pandecode_fetch_gpu_mem(ctx, gpu_va, sizeof(type), __LINE__, \ __FILE__))) /* Usage: PANDECODE_PTR_VAR(name, gpu_va) */ -#define PANDECODE_PTR_VAR(name, gpu_va) \ - name = __pandecode_fetch_gpu_mem(gpu_va, sizeof(*name), __LINE__, __FILE__) +#define PANDECODE_PTR_VAR(ctx, name, gpu_va) \ + name = __pandecode_fetch_gpu_mem(ctx, gpu_va, sizeof(*name), __LINE__, \ + __FILE__) -void pandecode_validate_buffer(mali_ptr addr, size_t sz); +void pandecode_validate_buffer(struct pandecode_context *ctx, mali_ptr addr, + size_t sz); /* Forward declare for all supported gens to permit thunking */ -void pandecode_jc_v4(mali_ptr jc_gpu_va, unsigned gpu_id); -void pandecode_jc_v5(mali_ptr jc_gpu_va, unsigned gpu_id); -void pandecode_jc_v6(mali_ptr jc_gpu_va, unsigned gpu_id); -void pandecode_jc_v7(mali_ptr jc_gpu_va, unsigned gpu_id); -void pandecode_jc_v9(mali_ptr jc_gpu_va, unsigned gpu_id); +void pandecode_jc_v4(struct pandecode_context *ctx, mali_ptr jc_gpu_va, + unsigned gpu_id); +void pandecode_jc_v5(struct pandecode_context *ctx, mali_ptr jc_gpu_va, + unsigned gpu_id); +void pandecode_jc_v6(struct pandecode_context *ctx, mali_ptr jc_gpu_va, + unsigned gpu_id); +void pandecode_jc_v7(struct pandecode_context *ctx, mali_ptr jc_gpu_va, + unsigned gpu_id); +void pandecode_jc_v9(struct pandecode_context *ctx, mali_ptr jc_gpu_va, + unsigned gpu_id); -void pandecode_abort_on_fault_v4(mali_ptr jc_gpu_va); -void pandecode_abort_on_fault_v5(mali_ptr jc_gpu_va); -void pandecode_abort_on_fault_v6(mali_ptr jc_gpu_va); -void pandecode_abort_on_fault_v7(mali_ptr jc_gpu_va); -void pandecode_abort_on_fault_v9(mali_ptr jc_gpu_va); +void pandecode_abort_on_fault_v4(struct pandecode_context *ctx, + mali_ptr jc_gpu_va); +void pandecode_abort_on_fault_v5(struct pandecode_context *ctx, + mali_ptr jc_gpu_va); +void pandecode_abort_on_fault_v6(struct pandecode_context *ctx, + mali_ptr jc_gpu_va); +void pandecode_abort_on_fault_v7(struct pandecode_context *ctx, + mali_ptr jc_gpu_va); +void pandecode_abort_on_fault_v9(struct pandecode_context *ctx, + mali_ptr jc_gpu_va); -void pandecode_cs_v10(mali_ptr queue, uint32_t size, unsigned gpu_id, - uint32_t *regs); +void pandecode_cs_v10(struct pandecode_context *ctx, mali_ptr queue, + uint32_t size, unsigned gpu_id, uint32_t *regs); /* Logging infrastructure */ static void -pandecode_make_indent(void) +pandecode_make_indent(struct pandecode_context *ctx) { - for (unsigned i = 0; i < pandecode_indent; ++i) - fprintf(pandecode_dump_stream, " "); + for (unsigned i = 0; i < ctx->indent; ++i) + fprintf(ctx->dump_stream, " "); } -static inline void PRINTFLIKE(1, 2) pandecode_log(const char *format, ...) +static inline void PRINTFLIKE(2, 3) + pandecode_log(struct pandecode_context *ctx, const char *format, ...) { va_list ap; - pandecode_make_indent(); + pandecode_make_indent(ctx); va_start(ap, format); - vfprintf(pandecode_dump_stream, format, ap); + vfprintf(ctx->dump_stream, format, ap); va_end(ap); } static inline void -pandecode_log_cont(const char *format, ...) +pandecode_log_cont(struct pandecode_context *ctx, const char *format, ...) { va_list ap; va_start(ap, format); - vfprintf(pandecode_dump_stream, format, ap); + vfprintf(ctx->dump_stream, format, ap); va_end(ap); } /* Convenience methods */ -#define DUMP_UNPACKED(T, var, ...) \ +#define DUMP_UNPACKED(ctx, T, var, ...) \ { \ - pandecode_log(__VA_ARGS__); \ - pan_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \ + pandecode_log(ctx, __VA_ARGS__); \ + pan_print(ctx->dump_stream, T, var, (ctx->indent + 1) * 2); \ } -#define DUMP_CL(T, cl, ...) \ +#define DUMP_CL(ctx, T, cl, ...) \ { \ pan_unpack(cl, T, temp); \ - DUMP_UNPACKED(T, temp, __VA_ARGS__); \ + DUMP_UNPACKED(ctx, T, temp, __VA_ARGS__); \ } -#define DUMP_SECTION(A, S, cl, ...) \ +#define DUMP_SECTION(ctx, A, S, cl, ...) \ { \ pan_section_unpack(cl, A, S, temp); \ - pandecode_log(__VA_ARGS__); \ - pan_section_print(pandecode_dump_stream, A, S, temp, \ - (pandecode_indent + 1) * 2); \ + pandecode_log(ctx, __VA_ARGS__); \ + pan_section_print(ctx->dump_stream, A, S, temp, (ctx->indent + 1) * 2); \ } -#define MAP_ADDR(T, addr, cl) \ - const uint8_t *cl = pandecode_fetch_gpu_mem(addr, pan_size(T)); +#define MAP_ADDR(ctx, T, addr, cl) \ + const uint8_t *cl = pandecode_fetch_gpu_mem(ctx, addr, pan_size(T)); -#define DUMP_ADDR(T, addr, ...) \ +#define DUMP_ADDR(ctx, T, addr, ...) \ { \ - MAP_ADDR(T, addr, cl) \ - DUMP_CL(T, cl, __VA_ARGS__); \ + MAP_ADDR(ctx, T, addr, cl) \ + DUMP_CL(ctx, T, cl, __VA_ARGS__); \ } -void pandecode_shader_disassemble(mali_ptr shader_ptr, unsigned gpu_id); +void pandecode_shader_disassemble(struct pandecode_context *ctx, + mali_ptr shader_ptr, unsigned gpu_id); #ifdef PAN_ARCH @@ -172,46 +195,58 @@ struct pandecode_fbd { bool has_extra; }; -struct pandecode_fbd GENX(pandecode_fbd)(uint64_t gpu_va, bool is_fragment, +struct pandecode_fbd GENX(pandecode_fbd)(struct pandecode_context *ctx, + uint64_t gpu_va, bool is_fragment, unsigned gpu_id); #if PAN_ARCH >= 9 -void GENX(pandecode_dcd)(const struct MALI_DRAW *p, unsigned unused, +void GENX(pandecode_dcd)(struct pandecode_context *ctx, + const struct MALI_DRAW *p, unsigned unused, unsigned gpu_id); #else -void GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type, +void GENX(pandecode_dcd)(struct pandecode_context *ctx, + const struct MALI_DRAW *p, enum mali_job_type job_type, unsigned gpu_id); #endif #if PAN_ARCH <= 5 -void GENX(pandecode_texture)(mali_ptr u, unsigned tex); +void GENX(pandecode_texture)(struct pandecode_context *ctx, mali_ptr u, + unsigned tex); #else -void GENX(pandecode_texture)(const void *cl, unsigned tex); +void GENX(pandecode_texture)(struct pandecode_context *ctx, const void *cl, + unsigned tex); #endif #if PAN_ARCH >= 5 -mali_ptr GENX(pandecode_blend)(void *descs, int rt_no, mali_ptr frag_shader); +mali_ptr GENX(pandecode_blend)(struct pandecode_context *ctx, void *descs, + int rt_no, mali_ptr frag_shader); #endif #if PAN_ARCH >= 6 -void GENX(pandecode_tiler)(mali_ptr gpu_va, unsigned gpu_id); +void GENX(pandecode_tiler)(struct pandecode_context *ctx, mali_ptr gpu_va, + unsigned gpu_id); #endif #if PAN_ARCH >= 9 -void GENX(pandecode_shader_environment)(const struct MALI_SHADER_ENVIRONMENT *p, +void GENX(pandecode_shader_environment)(struct pandecode_context *ctx, + const struct MALI_SHADER_ENVIRONMENT *p, unsigned gpu_id); -void GENX(pandecode_resource_tables)(mali_ptr addr, const char *label); +void GENX(pandecode_resource_tables)(struct pandecode_context *ctx, + mali_ptr addr, const char *label); -void GENX(pandecode_fau)(mali_ptr addr, unsigned count, const char *name); +void GENX(pandecode_fau)(struct pandecode_context *ctx, mali_ptr addr, + unsigned count, const char *name); -mali_ptr GENX(pandecode_shader)(mali_ptr addr, const char *label, - unsigned gpu_id); +mali_ptr GENX(pandecode_shader)(struct pandecode_context *ctx, mali_ptr addr, + const char *label, unsigned gpu_id); -void GENX(pandecode_blend_descs)(mali_ptr blend, unsigned count, - mali_ptr frag_shader, unsigned gpu_id); +void GENX(pandecode_blend_descs)(struct pandecode_context *ctx, mali_ptr blend, + unsigned count, mali_ptr frag_shader, + unsigned gpu_id); -void GENX(pandecode_depth_stencil)(mali_ptr addr); +void GENX(pandecode_depth_stencil)(struct pandecode_context *ctx, + mali_ptr addr); #endif #endif diff --git a/src/panfrost/lib/genxml/decode_common.c b/src/panfrost/lib/genxml/decode_common.c index 85c37e10b78..fc298316c71 100644 --- a/src/panfrost/lib/genxml/decode_common.c +++ b/src/panfrost/lib/genxml/decode_common.c @@ -31,9 +31,7 @@ #include #include "util/macros.h" -#include "util/simple_mtx.h" #include "util/u_debug.h" -#include "util/u_dynarray.h" #include "util/u_hexdump.h" #include "decode.h" @@ -41,17 +39,9 @@ #include "compiler/valhall/disassemble.h" #include "midgard/disassemble.h" -FILE *pandecode_dump_stream; - -unsigned pandecode_indent; - -/* Memory handling */ - -static struct rb_tree mmap_tree; - -static struct util_dynarray ro_mappings; - -static simple_mtx_t pandecode_lock = SIMPLE_MTX_INITIALIZER; +/* Used to distiguish dumped files, otherwise we would have to print the ctx + * pointer, which is annoying for the user since it changes with every run */ +static int num_ctxs = 0; #define to_mapped_memory(x) \ rb_node_data(struct pandecode_mapped_memory, x, node) @@ -80,27 +70,31 @@ pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs) } static struct pandecode_mapped_memory * -pandecode_find_mapped_gpu_mem_containing_rw(uint64_t addr) +pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context *ctx, + uint64_t addr) { - simple_mtx_assert_locked(&pandecode_lock); + simple_mtx_assert_locked(&ctx->lock); - struct rb_node *node = rb_tree_search(&mmap_tree, &addr, pandecode_cmp_key); + struct rb_node *node = + rb_tree_search(&ctx->mmap_tree, &addr, pandecode_cmp_key); return to_mapped_memory(node); } struct pandecode_mapped_memory * -pandecode_find_mapped_gpu_mem_containing(uint64_t addr) +pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx, + uint64_t addr) { - simple_mtx_assert_locked(&pandecode_lock); + simple_mtx_assert_locked(&ctx->lock); struct pandecode_mapped_memory *mem = - pandecode_find_mapped_gpu_mem_containing_rw(addr); + pandecode_find_mapped_gpu_mem_containing_rw(ctx, addr); if (mem && mem->addr && !mem->ro) { mprotect(mem->addr, mem->length, PROT_READ); mem->ro = true; - util_dynarray_append(&ro_mappings, struct pandecode_mapped_memory *, mem); + util_dynarray_append(&ctx->ro_mappings, struct pandecode_mapped_memory *, + mem); } return mem; @@ -112,20 +106,21 @@ pandecode_find_mapped_gpu_mem_containing(uint64_t addr) * detect GPU-side memory bugs by validating pointers. */ void -pandecode_validate_buffer(mali_ptr addr, size_t sz) +pandecode_validate_buffer(struct pandecode_context *ctx, mali_ptr addr, + size_t sz) { if (!addr) { - pandecode_log("// XXX: null pointer deref\n"); + pandecode_log(ctx, "// XXX: null pointer deref\n"); return; } /* Find a BO */ struct pandecode_mapped_memory *bo = - pandecode_find_mapped_gpu_mem_containing(addr); + pandecode_find_mapped_gpu_mem_containing(ctx, addr); if (!bo) { - pandecode_log("// XXX: invalid memory dereference\n"); + pandecode_log(ctx, "// XXX: invalid memory dereference\n"); return; } @@ -135,7 +130,8 @@ pandecode_validate_buffer(mali_ptr addr, size_t sz) unsigned total = offset + sz; if (total > bo->length) { - pandecode_log("// XXX: buffer overrun. " + pandecode_log(ctx, + "// XXX: buffer overrun. " "Chunk of size %zu at offset %d in buffer of size %zu. " "Overrun by %zu bytes. \n", sz, offset, bo->length, total - bo->length); @@ -144,22 +140,24 @@ pandecode_validate_buffer(mali_ptr addr, size_t sz) } void -pandecode_map_read_write(void) +pandecode_map_read_write(struct pandecode_context *ctx) { - simple_mtx_assert_locked(&pandecode_lock); + simple_mtx_assert_locked(&ctx->lock); - util_dynarray_foreach(&ro_mappings, struct pandecode_mapped_memory *, mem) { + util_dynarray_foreach(&ctx->ro_mappings, struct pandecode_mapped_memory *, + mem) { (*mem)->ro = false; mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE); } - util_dynarray_clear(&ro_mappings); + util_dynarray_clear(&ctx->ro_mappings); } static void -pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va, +pandecode_add_name(struct pandecode_context *ctx, + struct pandecode_mapped_memory *mem, uint64_t gpu_va, const char *name) { - simple_mtx_assert_locked(&pandecode_lock); + simple_mtx_assert_locked(&ctx->lock); if (!name) { /* If we don't have a name, assign one */ @@ -172,19 +170,20 @@ pandecode_add_name(struct pandecode_mapped_memory *mem, uint64_t gpu_va, } void -pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, const char *name) +pandecode_inject_mmap(struct pandecode_context *ctx, uint64_t gpu_va, void *cpu, + unsigned sz, const char *name) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); /* First, search if we already mapped this and are just updating an address */ struct pandecode_mapped_memory *existing = - pandecode_find_mapped_gpu_mem_containing_rw(gpu_va); + pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va); if (existing && existing->gpu_va == gpu_va) { existing->length = sz; existing->addr = cpu; - pandecode_add_name(existing, gpu_va, name); + pandecode_add_name(ctx, existing, gpu_va, name); } else { /* Otherwise, add a fresh mapping */ struct pandecode_mapped_memory *mapped_mem = NULL; @@ -193,45 +192,46 @@ pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, const char *name) mapped_mem->gpu_va = gpu_va; mapped_mem->length = sz; mapped_mem->addr = cpu; - pandecode_add_name(mapped_mem, gpu_va, name); + pandecode_add_name(ctx, mapped_mem, gpu_va, name); /* Add it to the tree */ - rb_tree_insert(&mmap_tree, &mapped_mem->node, pandecode_cmp); + rb_tree_insert(&ctx->mmap_tree, &mapped_mem->node, pandecode_cmp); } - simple_mtx_unlock(&pandecode_lock); + simple_mtx_unlock(&ctx->lock); } void -pandecode_inject_free(uint64_t gpu_va, unsigned sz) +pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va, + unsigned sz) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); struct pandecode_mapped_memory *mem = - pandecode_find_mapped_gpu_mem_containing_rw(gpu_va); + pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va); if (mem) { assert(mem->gpu_va == gpu_va); assert(mem->length == sz); - rb_tree_remove(&mmap_tree, &mem->node); + rb_tree_remove(&ctx->mmap_tree, &mem->node); free(mem); } - simple_mtx_unlock(&pandecode_lock); + simple_mtx_unlock(&ctx->lock); } char * -pointer_as_memory_reference(uint64_t ptr) +pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr) { - simple_mtx_assert_locked(&pandecode_lock); + simple_mtx_assert_locked(&ctx->lock); struct pandecode_mapped_memory *mapped; char *out = malloc(128); /* Try to find the corresponding mapped zone */ - mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr); + mapped = pandecode_find_mapped_gpu_mem_containing_rw(ctx, ptr); if (mapped) { snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va)); @@ -244,32 +244,25 @@ pointer_as_memory_reference(uint64_t ptr) return out; } -static int pandecode_dump_frame_count = 0; - -static bool force_stderr = false; - void -pandecode_dump_file_open(void) +pandecode_dump_file_open(struct pandecode_context *ctx) { - simple_mtx_assert_locked(&pandecode_lock); - - if (pandecode_dump_stream) - return; + simple_mtx_assert_locked(&ctx->lock); /* This does a getenv every frame, so it is possible to use * setenv to change the base at runtime. */ const char *dump_file_base = debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump"); - if (force_stderr || !strcmp(dump_file_base, "stderr")) - pandecode_dump_stream = stdout; // stderr; - else { + if (!strcmp(dump_file_base, "stderr")) + ctx->dump_stream = stderr; + else if (!ctx->dump_stream) { char buffer[1024]; - snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, - pandecode_dump_frame_count); + snprintf(buffer, sizeof(buffer), "%s.ctx-%d.%04d", dump_file_base, + ctx->id, ctx->dump_frame_count); printf("pandecode: dump command stream to file %s\n", buffer); - pandecode_dump_stream = fopen(buffer, "w"); - if (!pandecode_dump_stream) + ctx->dump_stream = fopen(buffer, "w"); + if (!ctx->dump_stream) fprintf(stderr, "pandecode: failed to open command stream log file %s\n", buffer); @@ -277,171 +270,191 @@ pandecode_dump_file_open(void) } static void -pandecode_dump_file_close(void) +pandecode_dump_file_close(struct pandecode_context *ctx) { - simple_mtx_assert_locked(&pandecode_lock); + simple_mtx_assert_locked(&ctx->lock); - if (pandecode_dump_stream && pandecode_dump_stream != stderr) { - if (fclose(pandecode_dump_stream)) + if (ctx->dump_stream && ctx->dump_stream != stderr) { + if (fclose(ctx->dump_stream)) perror("pandecode: dump file"); - pandecode_dump_stream = NULL; + ctx->dump_stream = NULL; } } -void -pandecode_initialize(bool to_stderr) +struct pandecode_context * +pandecode_create_context(bool to_stderr) { - force_stderr = to_stderr; - rb_tree_init(&mmap_tree); - util_dynarray_init(&ro_mappings, NULL); + struct pandecode_context *ctx = calloc(1, sizeof(*ctx)); + + /* Not thread safe, but we shouldn't ever hit this, and even if we do, the + * worst that could happen is having the files dumped with their filenames + * in a different order. */ + ctx->id = num_ctxs++; + + /* This will be initialized later and can be changed at run time through + * the PANDECODE_DUMP_FILE environment variable. + */ + ctx->dump_stream = to_stderr ? stderr : NULL; + + rb_tree_init(&ctx->mmap_tree); + util_dynarray_init(&ctx->ro_mappings, NULL); + + simple_mtx_t mtx_init = SIMPLE_MTX_INITIALIZER; + memcpy(&ctx->lock, &mtx_init, sizeof(simple_mtx_t)); + + return ctx; } void -pandecode_next_frame(void) +pandecode_next_frame(struct pandecode_context *ctx) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); - pandecode_dump_file_close(); - pandecode_dump_frame_count++; + pandecode_dump_file_close(ctx); + ctx->dump_frame_count++; - simple_mtx_unlock(&pandecode_lock); + simple_mtx_unlock(&ctx->lock); } void -pandecode_close(void) +pandecode_destroy_context(struct pandecode_context *ctx) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); - rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &mmap_tree, node) { - rb_tree_remove(&mmap_tree, &it->node); + rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &ctx->mmap_tree, + node) { + rb_tree_remove(&ctx->mmap_tree, &it->node); free(it); } - util_dynarray_fini(&ro_mappings); - pandecode_dump_file_close(); + util_dynarray_fini(&ctx->ro_mappings); + pandecode_dump_file_close(ctx); - simple_mtx_unlock(&pandecode_lock); + simple_mtx_unlock(&ctx->lock); + + free(ctx); } void -pandecode_dump_mappings(void) +pandecode_dump_mappings(struct pandecode_context *ctx) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); - pandecode_dump_file_open(); + pandecode_dump_file_open(ctx); - rb_tree_foreach(struct pandecode_mapped_memory, it, &mmap_tree, node) { + rb_tree_foreach(struct pandecode_mapped_memory, it, &ctx->mmap_tree, node) { if (!it->addr || !it->length) continue; - fprintf(pandecode_dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name, + fprintf(ctx->dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name, it->gpu_va); - u_hexdump(pandecode_dump_stream, it->addr, it->length, false); - fprintf(pandecode_dump_stream, "\n"); + u_hexdump(ctx->dump_stream, it->addr, it->length, false); + fprintf(ctx->dump_stream, "\n"); } - fflush(pandecode_dump_stream); - simple_mtx_unlock(&pandecode_lock); + fflush(ctx->dump_stream); + simple_mtx_unlock(&ctx->lock); } void -pandecode_abort_on_fault(mali_ptr jc_gpu_va, unsigned gpu_id) +pandecode_abort_on_fault(struct pandecode_context *ctx, mali_ptr jc_gpu_va, + unsigned gpu_id) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); switch (pan_arch(gpu_id)) { case 4: - pandecode_abort_on_fault_v4(jc_gpu_va); + pandecode_abort_on_fault_v4(ctx, jc_gpu_va); break; case 5: - pandecode_abort_on_fault_v5(jc_gpu_va); + pandecode_abort_on_fault_v5(ctx, jc_gpu_va); break; case 6: - pandecode_abort_on_fault_v6(jc_gpu_va); + pandecode_abort_on_fault_v6(ctx, jc_gpu_va); break; case 7: - pandecode_abort_on_fault_v7(jc_gpu_va); + pandecode_abort_on_fault_v7(ctx, jc_gpu_va); break; case 9: - pandecode_abort_on_fault_v9(jc_gpu_va); + pandecode_abort_on_fault_v9(ctx, jc_gpu_va); break; default: unreachable("Unsupported architecture"); } - simple_mtx_unlock(&pandecode_lock); + simple_mtx_unlock(&ctx->lock); } void -pandecode_jc(mali_ptr jc_gpu_va, unsigned gpu_id) +pandecode_jc(struct pandecode_context *ctx, mali_ptr jc_gpu_va, unsigned gpu_id) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); switch (pan_arch(gpu_id)) { case 4: - pandecode_jc_v4(jc_gpu_va, gpu_id); + pandecode_jc_v4(ctx, jc_gpu_va, gpu_id); break; case 5: - pandecode_jc_v5(jc_gpu_va, gpu_id); + pandecode_jc_v5(ctx, jc_gpu_va, gpu_id); break; case 6: - pandecode_jc_v6(jc_gpu_va, gpu_id); + pandecode_jc_v6(ctx, jc_gpu_va, gpu_id); break; case 7: - pandecode_jc_v7(jc_gpu_va, gpu_id); + pandecode_jc_v7(ctx, jc_gpu_va, gpu_id); break; case 9: - pandecode_jc_v9(jc_gpu_va, gpu_id); + pandecode_jc_v9(ctx, jc_gpu_va, gpu_id); break; default: unreachable("Unsupported architecture"); } - simple_mtx_unlock(&pandecode_lock); + simple_mtx_unlock(&ctx->lock); } void -pandecode_cs(mali_ptr queue_gpu_va, uint32_t size, unsigned gpu_id, - uint32_t *regs) +pandecode_cs(struct pandecode_context *ctx, mali_ptr queue_gpu_va, + uint32_t size, unsigned gpu_id, uint32_t *regs) { - simple_mtx_lock(&pandecode_lock); + simple_mtx_lock(&ctx->lock); switch (pan_arch(gpu_id)) { case 10: - pandecode_cs_v10(queue_gpu_va, size, gpu_id, regs); + pandecode_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs); break; default: unreachable("Unsupported architecture"); } - simple_mtx_unlock(&pandecode_lock); + simple_mtx_unlock(&ctx->lock); } void -pandecode_shader_disassemble(mali_ptr shader_ptr, unsigned gpu_id) +pandecode_shader_disassemble(struct pandecode_context *ctx, mali_ptr shader_ptr, + unsigned gpu_id) { - uint8_t *PANDECODE_PTR_VAR(code, shader_ptr); + uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr); /* Compute maximum possible size */ struct pandecode_mapped_memory *mem = - pandecode_find_mapped_gpu_mem_containing(shader_ptr); + pandecode_find_mapped_gpu_mem_containing(ctx, shader_ptr); size_t sz = mem->length - (shader_ptr - mem->gpu_va); /* Print some boilerplate to clearly denote the assembly (which doesn't * obey indentation rules), and actually do the disassembly! */ - pandecode_log_cont("\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n", code, - shader_ptr, sz); + pandecode_log_cont(ctx, "\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n", + code, shader_ptr, sz); if (pan_arch(gpu_id) >= 9) { - disassemble_valhall(pandecode_dump_stream, (const uint64_t *)code, sz, - true); + disassemble_valhall(ctx->dump_stream, (const uint64_t *)code, sz, true); } else if (pan_arch(gpu_id) >= 6) - disassemble_bifrost(pandecode_dump_stream, code, sz, false); + disassemble_bifrost(ctx->dump_stream, code, sz, false); else - disassemble_midgard(pandecode_dump_stream, code, sz, gpu_id, true); + disassemble_midgard(ctx->dump_stream, code, sz, gpu_id, true); - pandecode_log_cont("\n\n"); + pandecode_log_cont(ctx, "\n\n"); } diff --git a/src/panfrost/lib/genxml/decode_csf.c b/src/panfrost/lib/genxml/decode_csf.c index 21be3ade8ef..667b9a8763d 100644 --- a/src/panfrost/lib/genxml/decode_csf.c +++ b/src/panfrost/lib/genxml/decode_csf.c @@ -55,21 +55,21 @@ struct queue_ctx { }; static uint32_t -cs_get_u32(struct queue_ctx *ctx, uint8_t reg) +cs_get_u32(struct queue_ctx *qctx, uint8_t reg) { - assert(reg < ctx->nr_regs); - return ctx->regs[reg]; + assert(reg < qctx->nr_regs); + return qctx->regs[reg]; } static uint64_t -cs_get_u64(struct queue_ctx *ctx, uint8_t reg) +cs_get_u64(struct queue_ctx *qctx, uint8_t reg) { - return (((uint64_t)cs_get_u32(ctx, reg + 1)) << 32) | cs_get_u32(ctx, reg); + return (((uint64_t)cs_get_u32(qctx, reg + 1)) << 32) | cs_get_u32(qctx, reg); } static void -pandecode_run_compute(FILE *fp, struct queue_ctx *ctx, - struct MALI_CEU_RUN_COMPUTE *I) +pandecode_run_compute(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, struct MALI_CEU_RUN_COMPUTE *I) { const char *axes[4] = {"x_axis", "y_axis", "z_axis"}; @@ -78,39 +78,41 @@ pandecode_run_compute(FILE *fp, struct queue_ctx *ctx, */ fprintf(fp, "RUN_COMPUTE.%s #%u\n", axes[I->task_axis], I->task_increment); - pandecode_indent++; + ctx->indent++; unsigned reg_srt = 0 + (I->srt_select * 2); unsigned reg_fau = 8 + (I->fau_select * 2); unsigned reg_spd = 16 + (I->spd_select * 2); unsigned reg_tsd = 24 + (I->tsd_select * 2); - GENX(pandecode_resource_tables)(cs_get_u64(ctx, reg_srt), "Resources"); + GENX(pandecode_resource_tables)(ctx, cs_get_u64(qctx, reg_srt), "Resources"); - mali_ptr fau = cs_get_u64(ctx, reg_fau); + mali_ptr fau = cs_get_u64(qctx, reg_fau); if (fau) - GENX(pandecode_fau)(fau & BITFIELD64_MASK(48), fau >> 56, "FAU"); + GENX(pandecode_fau)(ctx, fau & BITFIELD64_MASK(48), fau >> 56, "FAU"); - GENX(pandecode_shader)(cs_get_u64(ctx, reg_spd), "Shader", ctx->gpu_id); + GENX(pandecode_shader) + (ctx, cs_get_u64(qctx, reg_spd), "Shader", qctx->gpu_id); - DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, reg_tsd), - "Local Storage @%" PRIx64 ":\n", cs_get_u64(ctx, reg_tsd)); + DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, reg_tsd), + "Local Storage @%" PRIx64 ":\n", cs_get_u64(qctx, reg_tsd)); - pandecode_log("Global attribute offset: %u\n", cs_get_u32(ctx, 32)); - DUMP_CL(COMPUTE_SIZE_WORKGROUP, &ctx->regs[33], "Workgroup size\n"); - pandecode_log("Job offset X: %u\n", cs_get_u32(ctx, 34)); - pandecode_log("Job offset Y: %u\n", cs_get_u32(ctx, 35)); - pandecode_log("Job offset Z: %u\n", cs_get_u32(ctx, 36)); - pandecode_log("Job size X: %u\n", cs_get_u32(ctx, 37)); - pandecode_log("Job size Y: %u\n", cs_get_u32(ctx, 38)); - pandecode_log("Job size Z: %u\n", cs_get_u32(ctx, 39)); + pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32)); + DUMP_CL(ctx, COMPUTE_SIZE_WORKGROUP, &qctx->regs[33], "Workgroup size\n"); + pandecode_log(ctx, "Job offset X: %u\n", cs_get_u32(qctx, 34)); + pandecode_log(ctx, "Job offset Y: %u\n", cs_get_u32(qctx, 35)); + pandecode_log(ctx, "Job offset Z: %u\n", cs_get_u32(qctx, 36)); + pandecode_log(ctx, "Job size X: %u\n", cs_get_u32(qctx, 37)); + pandecode_log(ctx, "Job size Y: %u\n", cs_get_u32(qctx, 38)); + pandecode_log(ctx, "Job size Z: %u\n", cs_get_u32(qctx, 39)); - pandecode_indent--; + ctx->indent--; } static void -pandecode_run_idvs(FILE *fp, struct queue_ctx *ctx, struct MALI_CEU_RUN_IDVS *I) +pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, struct MALI_CEU_RUN_IDVS *I) { /* Print the instruction. Ignore the selects and the flags override * since we'll print them implicitly later. @@ -122,10 +124,10 @@ pandecode_run_idvs(FILE *fp, struct queue_ctx *ctx, struct MALI_CEU_RUN_IDVS *I) fprintf(fp, "\n"); - pandecode_indent++; + ctx->indent++; /* Merge flag overrides with the register flags */ - uint32_t tiler_flags_raw = cs_get_u64(ctx, 56); + uint32_t tiler_flags_raw = cs_get_u64(qctx, 56); tiler_flags_raw |= I->flags_override; pan_unpack(&tiler_flags_raw, PRIMITIVE_FLAGS, tiler_flags); @@ -141,110 +143,113 @@ pandecode_run_idvs(FILE *fp, struct queue_ctx *ctx, struct MALI_CEU_RUN_IDVS *I) unsigned reg_frag_fau = 12; unsigned reg_frag_tsd = I->fragment_tsd_select ? 28 : 24; - uint64_t position_srt = cs_get_u64(ctx, reg_position_srt); - uint64_t vary_srt = cs_get_u64(ctx, reg_vary_srt); - uint64_t frag_srt = cs_get_u64(ctx, reg_frag_srt); + uint64_t position_srt = cs_get_u64(qctx, reg_position_srt); + uint64_t vary_srt = cs_get_u64(qctx, reg_vary_srt); + uint64_t frag_srt = cs_get_u64(qctx, reg_frag_srt); - GENX(pandecode_resource_tables)(position_srt, "Position resources"); - GENX(pandecode_resource_tables)(vary_srt, "Varying resources"); - GENX(pandecode_resource_tables)(frag_srt, "Fragment resources"); + GENX(pandecode_resource_tables)(ctx, position_srt, "Position resources"); + GENX(pandecode_resource_tables)(ctx, vary_srt, "Varying resources"); + GENX(pandecode_resource_tables)(ctx, frag_srt, "Fragment resources"); - mali_ptr position_fau = cs_get_u64(ctx, reg_position_fau); - mali_ptr vary_fau = cs_get_u64(ctx, reg_vary_fau); - mali_ptr fragment_fau = cs_get_u64(ctx, reg_frag_fau); + mali_ptr position_fau = cs_get_u64(qctx, reg_position_fau); + mali_ptr vary_fau = cs_get_u64(qctx, reg_vary_fau); + mali_ptr fragment_fau = cs_get_u64(qctx, reg_frag_fau); if (position_fau) { uint64_t lo = position_fau & BITFIELD64_MASK(48); uint64_t hi = position_fau >> 56; - GENX(pandecode_fau)(lo, hi, "Position FAU"); + GENX(pandecode_fau)(ctx, lo, hi, "Position FAU"); } if (vary_fau) { uint64_t lo = vary_fau & BITFIELD64_MASK(48); uint64_t hi = vary_fau >> 56; - GENX(pandecode_fau)(lo, hi, "Varying FAU"); + GENX(pandecode_fau)(ctx, lo, hi, "Varying FAU"); } if (fragment_fau) { uint64_t lo = fragment_fau & BITFIELD64_MASK(48); uint64_t hi = fragment_fau >> 56; - GENX(pandecode_fau)(lo, hi, "Fragment FAU"); + GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU"); } - GENX(pandecode_shader)(cs_get_u64(ctx, 16), "Position shader", ctx->gpu_id); + GENX(pandecode_shader) + (ctx, cs_get_u64(qctx, 16), "Position shader", qctx->gpu_id); if (tiler_flags.secondary_shader) { - uint64_t ptr = cs_get_u64(ctx, 18); + uint64_t ptr = cs_get_u64(qctx, 18); - GENX(pandecode_shader)(ptr, "Varying shader", ctx->gpu_id); + GENX(pandecode_shader)(ctx, ptr, "Varying shader", qctx->gpu_id); } - GENX(pandecode_shader)(cs_get_u64(ctx, 20), "Fragment shader", ctx->gpu_id); + GENX(pandecode_shader) + (ctx, cs_get_u64(qctx, 20), "Fragment shader", qctx->gpu_id); - DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, 24), + DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, 24), "Position Local Storage @%" PRIx64 ":\n", - cs_get_u64(ctx, reg_position_tsd)); - DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, 24), + cs_get_u64(qctx, reg_position_tsd)); + DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, 24), "Varying Local Storage @%" PRIx64 ":\n", - cs_get_u64(ctx, reg_vary_tsd)); - DUMP_ADDR(LOCAL_STORAGE, cs_get_u64(ctx, 30), + cs_get_u64(qctx, reg_vary_tsd)); + DUMP_ADDR(ctx, LOCAL_STORAGE, cs_get_u64(qctx, 30), "Fragment Local Storage @%" PRIx64 ":\n", - cs_get_u64(ctx, reg_frag_tsd)); + cs_get_u64(qctx, reg_frag_tsd)); - pandecode_log("Global attribute offset: %u\n", cs_get_u32(ctx, 32)); - pandecode_log("Index count: %u\n", cs_get_u32(ctx, 33)); - pandecode_log("Instance count: %u\n", cs_get_u32(ctx, 34)); + pandecode_log(ctx, "Global attribute offset: %u\n", cs_get_u32(qctx, 32)); + pandecode_log(ctx, "Index count: %u\n", cs_get_u32(qctx, 33)); + pandecode_log(ctx, "Instance count: %u\n", cs_get_u32(qctx, 34)); if (tiler_flags.index_type) - pandecode_log("Index offset: %u\n", cs_get_u32(ctx, 35)); + pandecode_log(ctx, "Index offset: %u\n", cs_get_u32(qctx, 35)); - pandecode_log("Vertex offset: %d\n", cs_get_u32(ctx, 36)); - pandecode_log("Instance offset: %u\n", cs_get_u32(ctx, 37)); - pandecode_log("Tiler DCD flags2: %X\n", cs_get_u32(ctx, 38)); + pandecode_log(ctx, "Vertex offset: %d\n", cs_get_u32(qctx, 36)); + pandecode_log(ctx, "Instance offset: %u\n", cs_get_u32(qctx, 37)); + pandecode_log(ctx, "Tiler DCD flags2: %X\n", cs_get_u32(qctx, 38)); if (tiler_flags.index_type) - pandecode_log("Index array size: %u\n", cs_get_u32(ctx, 39)); + pandecode_log(ctx, "Index array size: %u\n", cs_get_u32(qctx, 39)); - GENX(pandecode_tiler)(cs_get_u64(ctx, 40), ctx->gpu_id); + GENX(pandecode_tiler)(ctx, cs_get_u64(qctx, 40), qctx->gpu_id); - DUMP_CL(SCISSOR, &ctx->regs[42], "Scissor\n"); - pandecode_log("Low depth clamp: %f\n", uif(cs_get_u32(ctx, 44))); - pandecode_log("High depth clamp: %f\n", uif(cs_get_u32(ctx, 45))); - pandecode_log("Occlusion: %" PRIx64 "\n", cs_get_u64(ctx, 46)); + DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); + pandecode_log(ctx, "Low depth clamp: %f\n", uif(cs_get_u32(qctx, 44))); + pandecode_log(ctx, "High depth clamp: %f\n", uif(cs_get_u32(qctx, 45))); + pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", cs_get_u64(qctx, 46)); if (tiler_flags.secondary_shader) - pandecode_log("Varying allocation: %u\n", cs_get_u32(ctx, 48)); + pandecode_log(ctx, "Varying allocation: %u\n", cs_get_u32(qctx, 48)); - mali_ptr blend = cs_get_u64(ctx, 50); - GENX(pandecode_blend_descs)(blend & ~7, blend & 7, 0, ctx->gpu_id); + mali_ptr blend = cs_get_u64(qctx, 50); + GENX(pandecode_blend_descs)(ctx, blend & ~7, blend & 7, 0, qctx->gpu_id); - DUMP_ADDR(DEPTH_STENCIL, cs_get_u64(ctx, 52), "Depth/stencil"); + DUMP_ADDR(ctx, DEPTH_STENCIL, cs_get_u64(qctx, 52), "Depth/stencil"); if (tiler_flags.index_type) - pandecode_log("Indices: %" PRIx64 "\n", cs_get_u64(ctx, 54)); + pandecode_log(ctx, "Indices: %" PRIx64 "\n", cs_get_u64(qctx, 54)); - DUMP_UNPACKED(PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n"); - DUMP_CL(DCD_FLAGS_0, &ctx->regs[57], "DCD Flags 0\n"); - DUMP_CL(DCD_FLAGS_1, &ctx->regs[58], "DCD Flags 1\n"); - DUMP_CL(PRIMITIVE_SIZE, &ctx->regs[60], "Primitive size\n"); + DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n"); + DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[57], "DCD Flags 0\n"); + DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[58], "DCD Flags 1\n"); + DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[60], "Primitive size\n"); - pandecode_indent--; + ctx->indent--; } static void -pandecode_run_fragment(struct queue_ctx *ctx, struct MALI_CEU_RUN_FRAGMENT *I) +pandecode_run_fragment(struct pandecode_context *ctx, struct queue_ctx *qctx, + struct MALI_CEU_RUN_FRAGMENT *I) { - pandecode_indent++; + ctx->indent++; - DUMP_CL(SCISSOR, &ctx->regs[42], "Scissor\n"); + DUMP_CL(ctx, SCISSOR, &qctx->regs[42], "Scissor\n"); /* TODO: Tile enable map */ - GENX(pandecode_fbd)(cs_get_u64(ctx, 40), true, ctx->gpu_id); + GENX(pandecode_fbd)(ctx, cs_get_u64(qctx, 40), true, qctx->gpu_id); - pandecode_indent--; + ctx->indent--; } static void @@ -271,8 +276,9 @@ print_reg_tuple(unsigned base, uint16_t mask, FILE *fp) } static void -disassemble_ceu_instr(uint64_t dword, unsigned indent, bool verbose, FILE *fp, - struct queue_ctx *ctx) +disassemble_ceu_instr(struct pandecode_context *ctx, uint64_t dword, + unsigned indent, bool verbose, FILE *fp, + struct queue_ctx *qctx) { if (verbose) { fprintf(fp, " "); @@ -327,13 +333,13 @@ disassemble_ceu_instr(uint64_t dword, unsigned indent, bool verbose, FILE *fp, case MALI_CEU_OPCODE_RUN_COMPUTE: { pan_unpack(bytes, CEU_RUN_COMPUTE, I); - pandecode_run_compute(fp, ctx, &I); + pandecode_run_compute(ctx, fp, qctx, &I); break; } case MALI_CEU_OPCODE_RUN_IDVS: { pan_unpack(bytes, CEU_RUN_IDVS, I); - pandecode_run_idvs(fp, ctx, &I); + pandecode_run_idvs(ctx, fp, qctx, &I); break; } @@ -341,7 +347,7 @@ disassemble_ceu_instr(uint64_t dword, unsigned indent, bool verbose, FILE *fp, pan_unpack(bytes, CEU_RUN_FRAGMENT, I); fprintf(fp, "RUN_FRAGMENT%s\n", I.enable_tem ? ".tile_enable_map_enable" : ""); - pandecode_run_fragment(ctx, &I); + pandecode_run_fragment(ctx, qctx, &I); break; } @@ -593,12 +599,12 @@ disassemble_ceu_instr(uint64_t dword, unsigned indent, bool verbose, FILE *fp, } static bool -interpret_ceu_jump(struct queue_ctx *ctx, uint64_t reg_address, - uint32_t reg_length) +interpret_ceu_jump(struct pandecode_context *ctx, struct queue_ctx *qctx, + uint64_t reg_address, uint32_t reg_length) { - uint32_t address_lo = ctx->regs[reg_address]; - uint32_t address_hi = ctx->regs[reg_address + 1]; - uint32_t length = ctx->regs[reg_length]; + uint32_t address_lo = qctx->regs[reg_address]; + uint32_t address_hi = qctx->regs[reg_address + 1]; + uint32_t length = qctx->regs[reg_length]; if (length % 8) { fprintf(stderr, "CS call alignment error\n"); @@ -607,10 +613,10 @@ interpret_ceu_jump(struct queue_ctx *ctx, uint64_t reg_address, /* Map the entire subqueue now */ uint64_t address = ((uint64_t)address_hi << 32) | address_lo; - uint64_t *cs = pandecode_fetch_gpu_mem(address, length); + uint64_t *cs = pandecode_fetch_gpu_mem(ctx, address, length); - ctx->ip = cs; - ctx->end = cs + (length / 8); + qctx->ip = cs; + qctx->end = cs + (length / 8); /* Skip the usual IP update */ return true; @@ -624,34 +630,34 @@ interpret_ceu_jump(struct queue_ctx *ctx, uint64_t reg_address, * Returns true if execution should continue. */ static bool -interpret_ceu_instr(struct queue_ctx *ctx) +interpret_ceu_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) { /* Unpack the base so we get the opcode */ - uint8_t *bytes = (uint8_t *)ctx->ip; + uint8_t *bytes = (uint8_t *)qctx->ip; pan_unpack(bytes, CEU_BASE, base); - assert(ctx->ip < ctx->end); + assert(qctx->ip < qctx->end); switch (base.opcode) { case MALI_CEU_OPCODE_MOVE: { pan_unpack(bytes, CEU_MOVE, I); - ctx->regs[I.destination + 0] = (uint32_t)I.immediate; - ctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32); + qctx->regs[I.destination + 0] = (uint32_t)I.immediate; + qctx->regs[I.destination + 1] = (uint32_t)(I.immediate >> 32); break; } case MALI_CEU_OPCODE_MOVE32: { pan_unpack(bytes, CEU_MOVE32, I); - ctx->regs[I.destination] = I.immediate; + qctx->regs[I.destination] = I.immediate; break; } case MALI_CEU_OPCODE_ADD_IMMEDIATE32: { pan_unpack(bytes, CEU_ADD_IMMEDIATE32, I); - ctx->regs[I.destination] = ctx->regs[I.source] + I.immediate; + qctx->regs[I.destination] = qctx->regs[I.source] + I.immediate; break; } @@ -659,46 +665,46 @@ interpret_ceu_instr(struct queue_ctx *ctx) pan_unpack(bytes, CEU_ADD_IMMEDIATE64, I); int64_t value = - (ctx->regs[I.source] | ((int64_t)ctx->regs[I.source + 1] << 32)) + + (qctx->regs[I.source] | ((int64_t)qctx->regs[I.source + 1] << 32)) + I.immediate; - ctx->regs[I.destination] = value; - ctx->regs[I.destination + 1] = value >> 32; + qctx->regs[I.destination] = value; + qctx->regs[I.destination + 1] = value >> 32; break; } case MALI_CEU_OPCODE_CALL: { pan_unpack(bytes, CEU_CALL, I); - if (ctx->call_stack_depth == MAX_CALL_STACK_DEPTH) { + if (qctx->call_stack_depth == MAX_CALL_STACK_DEPTH) { fprintf(stderr, "CS call stack overflow\n"); return false; } - assert(ctx->call_stack_depth < MAX_CALL_STACK_DEPTH); + assert(qctx->call_stack_depth < MAX_CALL_STACK_DEPTH); - ctx->ip++; + qctx->ip++; /* Note: tail calls are not optimized in the hardware. */ - assert(ctx->ip <= ctx->end); + assert(qctx->ip <= qctx->end); - unsigned depth = ctx->call_stack_depth++; + unsigned depth = qctx->call_stack_depth++; - ctx->call_stack[depth].lr = ctx->ip; - ctx->call_stack[depth].end = ctx->end; + qctx->call_stack[depth].lr = qctx->ip; + qctx->call_stack[depth].end = qctx->end; - return interpret_ceu_jump(ctx, I.address, I.length); + return interpret_ceu_jump(ctx, qctx, I.address, I.length); } case MALI_CEU_OPCODE_JUMP: { pan_unpack(bytes, CEU_CALL, I); - if (ctx->call_stack_depth == 0) { + if (qctx->call_stack_depth == 0) { fprintf(stderr, "Cannot jump from the entrypoint\n"); return false; } - return interpret_ceu_jump(ctx, I.address, I.length); + return interpret_ceu_jump(ctx, qctx, I.address, I.length); } default: @@ -708,35 +714,35 @@ interpret_ceu_instr(struct queue_ctx *ctx) /* Update IP first to point to the next instruction, so call doesn't * require special handling (even for tail calls). */ - ctx->ip++; + qctx->ip++; - while (ctx->ip == ctx->end) { + while (qctx->ip == qctx->end) { /* Graceful termination */ - if (ctx->call_stack_depth == 0) + if (qctx->call_stack_depth == 0) return false; /* Pop off the call stack */ - unsigned old_depth = --ctx->call_stack_depth; + unsigned old_depth = --qctx->call_stack_depth; - ctx->ip = ctx->call_stack[old_depth].lr; - ctx->end = ctx->call_stack[old_depth].end; + qctx->ip = qctx->call_stack[old_depth].lr; + qctx->end = qctx->call_stack[old_depth].end; } return true; } void -GENX(pandecode_cs)(mali_ptr queue, uint32_t size, unsigned gpu_id, - uint32_t *regs) +GENX(pandecode_cs)(struct pandecode_context *ctx, mali_ptr queue, uint32_t size, + unsigned gpu_id, uint32_t *regs) { - pandecode_dump_file_open(); + pandecode_dump_file_open(ctx); - uint64_t *cs = pandecode_fetch_gpu_mem(queue, size); + uint64_t *cs = pandecode_fetch_gpu_mem(ctx, queue, size); /* Mali-G610 has 96 registers. Other devices not yet supported, we can make * this configurable later when we encounter new Malis. */ - struct queue_ctx ctx = { + struct queue_ctx qctx = { .nr_regs = 96, .regs = regs, .ip = cs, @@ -746,12 +752,12 @@ GENX(pandecode_cs)(mali_ptr queue, uint32_t size, unsigned gpu_id, if (size) { do { - disassemble_ceu_instr(*(ctx.ip), 1 + ctx.call_stack_depth, true, - pandecode_dump_stream, &ctx); - } while (interpret_ceu_instr(&ctx)); + disassemble_ceu_instr(ctx, *(qctx.ip), 1 + qctx.call_stack_depth, true, + ctx->dump_stream, &qctx); + } while (interpret_ceu_instr(ctx, &qctx)); } - fflush(pandecode_dump_stream); - pandecode_map_read_write(); + fflush(ctx->dump_stream); + pandecode_map_read_write(ctx); } #endif diff --git a/src/panfrost/lib/genxml/decode_jm.c b/src/panfrost/lib/genxml/decode_jm.c index d348814508d..027c65effa4 100644 --- a/src/panfrost/lib/genxml/decode_jm.c +++ b/src/panfrost/lib/genxml/decode_jm.c @@ -30,10 +30,10 @@ #if PAN_ARCH <= 9 static void -pandecode_primitive(const void *p) +pandecode_primitive(struct pandecode_context *ctx, const void *p) { pan_unpack(p, PRIMITIVE, primitive); - DUMP_UNPACKED(PRIMITIVE, primitive, "Primitive:\n"); + DUMP_UNPACKED(ctx, PRIMITIVE, primitive, "Primitive:\n"); #if PAN_ARCH <= 7 /* Validate an index buffer is present if we need one. TODO: verify @@ -50,41 +50,41 @@ pandecode_primitive(const void *p) * size */ if (!size) - pandecode_log("// XXX: index size missing\n"); + pandecode_log(ctx, "// XXX: index size missing\n"); else - pandecode_validate_buffer(primitive.indices, + pandecode_validate_buffer(ctx, primitive.indices, primitive.index_count * size); } else if (primitive.index_type) - pandecode_log("// XXX: unexpected index size\n"); + pandecode_log(ctx, "// XXX: unexpected index size\n"); #endif } #if PAN_ARCH <= 7 static void -pandecode_attributes(mali_ptr addr, int count, bool varying, - enum mali_job_type job_type) +pandecode_attributes(struct pandecode_context *ctx, mali_ptr addr, int count, + bool varying, enum mali_job_type job_type) { char *prefix = varying ? "Varying" : "Attribute"; assert(addr); if (!count) { - pandecode_log("// warn: No %s records\n", prefix); + pandecode_log(ctx, "// warn: No %s records\n", prefix); return; } - MAP_ADDR(ATTRIBUTE_BUFFER, addr, cl); + MAP_ADDR(ctx, ATTRIBUTE_BUFFER, addr, cl); for (int i = 0; i < count; ++i) { pan_unpack(cl + i * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER, temp); - DUMP_UNPACKED(ATTRIBUTE_BUFFER, temp, "%s:\n", prefix); + DUMP_UNPACKED(ctx, ATTRIBUTE_BUFFER, temp, "%s:\n", prefix); switch (temp.type) { case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR_WRITE_REDUCTION: case MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR: { pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER), ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2); - pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, - temp2, (pandecode_indent + 1) * 2); + pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, temp2, + (ctx->indent + 1) * 2); i++; break; } @@ -92,8 +92,8 @@ pandecode_attributes(mali_ptr addr, int count, bool varying, case MALI_ATTRIBUTE_TYPE_3D_INTERLEAVED: { pan_unpack(cl + (i + 1) * pan_size(ATTRIBUTE_BUFFER_CONTINUATION_3D), ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2); - pan_print(pandecode_dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D, - temp2, (pandecode_indent + 1) * 2); + pan_print(ctx->dump_stream, ATTRIBUTE_BUFFER_CONTINUATION_3D, temp2, + (ctx->indent + 1) * 2); i++; break; } @@ -101,22 +101,24 @@ pandecode_attributes(mali_ptr addr, int count, bool varying, break; } } - pandecode_log("\n"); + pandecode_log(ctx, "\n"); } static unsigned -pandecode_attribute_meta(int count, mali_ptr attribute, bool varying) +pandecode_attribute_meta(struct pandecode_context *ctx, int count, + mali_ptr attribute, bool varying) { unsigned max = 0; for (int i = 0; i < count; ++i, attribute += pan_size(ATTRIBUTE)) { - MAP_ADDR(ATTRIBUTE, attribute, cl); + MAP_ADDR(ctx, ATTRIBUTE, attribute, cl); pan_unpack(cl, ATTRIBUTE, a); - DUMP_UNPACKED(ATTRIBUTE, a, "%s:\n", varying ? "Varying" : "Attribute"); + DUMP_UNPACKED(ctx, ATTRIBUTE, a, "%s:\n", + varying ? "Varying" : "Attribute"); max = MAX2(max, a.buffer_index); } - pandecode_log("\n"); + pandecode_log(ctx, "\n"); return MIN2(max + 1, 256); } @@ -134,7 +136,7 @@ bits(u32 word, u32 lo, u32 hi) } static void -pandecode_invocation(const void *i) +pandecode_invocation(struct pandecode_context *ctx, const void *i) { /* Decode invocation_count. See the comment before the definition of * invocation_count for an explanation. @@ -161,94 +163,98 @@ pandecode_invocation(const void *i) unsigned groups_z = bits(invocation.invocations, invocation.workgroups_z_shift, 32) + 1; - pandecode_log("Invocation (%d, %d, %d) x (%d, %d, %d)\n", size_x, size_y, - size_z, groups_x, groups_y, groups_z); + pandecode_log(ctx, "Invocation (%d, %d, %d) x (%d, %d, %d)\n", size_x, + size_y, size_z, groups_x, groups_y, groups_z); - DUMP_UNPACKED(INVOCATION, invocation, "Invocation:\n") + DUMP_UNPACKED(ctx, INVOCATION, invocation, "Invocation:\n") } static void -pandecode_textures(mali_ptr textures, unsigned texture_count) +pandecode_textures(struct pandecode_context *ctx, mali_ptr textures, + unsigned texture_count) { if (!textures) return; - pandecode_log("Textures %" PRIx64 ":\n", textures); - pandecode_indent++; + pandecode_log(ctx, "Textures %" PRIx64 ":\n", textures); + ctx->indent++; #if PAN_ARCH >= 6 const void *cl = - pandecode_fetch_gpu_mem(textures, pan_size(TEXTURE) * texture_count); + pandecode_fetch_gpu_mem(ctx, textures, pan_size(TEXTURE) * texture_count); for (unsigned tex = 0; tex < texture_count; ++tex) - GENX(pandecode_texture)(cl + pan_size(TEXTURE) * tex, tex); + GENX(pandecode_texture)(ctx, cl + pan_size(TEXTURE) * tex, tex); #else - mali_ptr *PANDECODE_PTR_VAR(u, textures); + mali_ptr *PANDECODE_PTR_VAR(ctx, u, textures); for (int tex = 0; tex < texture_count; ++tex) { - mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr)); - char *a = pointer_as_memory_reference(*u); - pandecode_log("%s,\n", a); + mali_ptr *PANDECODE_PTR_VAR(ctx, u, textures + tex * sizeof(mali_ptr)); + char *a = pointer_as_memory_reference(ctx, *u); + pandecode_log(ctx, "%s,\n", a); free(a); } /* Now, finally, descend down into the texture descriptor */ for (unsigned tex = 0; tex < texture_count; ++tex) { - mali_ptr *PANDECODE_PTR_VAR(u, textures + tex * sizeof(mali_ptr)); - GENX(pandecode_texture)(*u, tex); + mali_ptr *PANDECODE_PTR_VAR(ctx, u, textures + tex * sizeof(mali_ptr)); + GENX(pandecode_texture)(ctx, *u, tex); } #endif - pandecode_indent--; - pandecode_log("\n"); + ctx->indent--; + pandecode_log(ctx, "\n"); } static void -pandecode_samplers(mali_ptr samplers, unsigned sampler_count) +pandecode_samplers(struct pandecode_context *ctx, mali_ptr samplers, + unsigned sampler_count) { - pandecode_log("Samplers %" PRIx64 ":\n", samplers); - pandecode_indent++; + pandecode_log(ctx, "Samplers %" PRIx64 ":\n", samplers); + ctx->indent++; for (int i = 0; i < sampler_count; ++i) - DUMP_ADDR(SAMPLER, samplers + (pan_size(SAMPLER) * i), "Sampler %d:\n", - i); + DUMP_ADDR(ctx, SAMPLER, samplers + (pan_size(SAMPLER) * i), + "Sampler %d:\n", i); - pandecode_indent--; - pandecode_log("\n"); + ctx->indent--; + pandecode_log(ctx, "\n"); } static void -pandecode_uniform_buffers(mali_ptr pubufs, int ubufs_count) +pandecode_uniform_buffers(struct pandecode_context *ctx, mali_ptr pubufs, + int ubufs_count) { - uint64_t *PANDECODE_PTR_VAR(ubufs, pubufs); + uint64_t *PANDECODE_PTR_VAR(ctx, ubufs, pubufs); for (int i = 0; i < ubufs_count; i++) { mali_ptr addr = (ubufs[i] >> 10) << 2; unsigned size = addr ? (((ubufs[i] & ((1 << 10) - 1)) + 1) * 16) : 0; - pandecode_validate_buffer(addr, size); + pandecode_validate_buffer(ctx, addr, size); - char *ptr = pointer_as_memory_reference(addr); - pandecode_log("ubuf_%d[%u] = %s;\n", i, size, ptr); + char *ptr = pointer_as_memory_reference(ctx, addr); + pandecode_log(ctx, "ubuf_%d[%u] = %s;\n", i, size, ptr); free(ptr); } - pandecode_log("\n"); + pandecode_log(ctx, "\n"); } static void -pandecode_uniforms(mali_ptr uniforms, unsigned uniform_count) +pandecode_uniforms(struct pandecode_context *ctx, mali_ptr uniforms, + unsigned uniform_count) { - pandecode_validate_buffer(uniforms, uniform_count * 16); + pandecode_validate_buffer(ctx, uniforms, uniform_count * 16); - char *ptr = pointer_as_memory_reference(uniforms); - pandecode_log("vec4 uniforms[%u] = %s;\n", uniform_count, ptr); + char *ptr = pointer_as_memory_reference(ctx, uniforms); + pandecode_log(ctx, "vec4 uniforms[%u] = %s;\n", uniform_count, ptr); free(ptr); - pandecode_log("\n"); + pandecode_log(ctx, "\n"); } void -GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type, - unsigned gpu_id) +GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p, + enum mali_job_type job_type, unsigned gpu_id) { #if PAN_ARCH >= 5 struct pandecode_fbd fbd_info = {.rt_count = 1}; @@ -256,7 +262,7 @@ GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type, if (PAN_ARCH >= 6 || (PAN_ARCH == 5 && job_type != MALI_JOB_TYPE_TILER)) { #if PAN_ARCH >= 5 - DUMP_ADDR(LOCAL_STORAGE, p->thread_storage & ~1, "Local Storage:\n"); + DUMP_ADDR(ctx, LOCAL_STORAGE, p->thread_storage & ~1, "Local Storage:\n"); #endif } else { #if PAN_ARCH == 5 @@ -268,13 +274,12 @@ GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type, if (!ptr.type || ptr.zs_crc_extension_present || ptr.render_target_count != 1) { - fprintf(pandecode_dump_stream, - "Unexpected framebuffer pointer settings"); + fprintf(ctx->dump_stream, "Unexpected framebuffer pointer settings"); } - GENX(pandecode_fbd)(ptr.pointer, false, gpu_id); + GENX(pandecode_fbd)(ctx, ptr.pointer, false, gpu_id); #elif PAN_ARCH == 4 - GENX(pandecode_fbd)(p->fbd, false, gpu_id); + GENX(pandecode_fbd)(ctx, p->fbd, false, gpu_id); #endif } @@ -284,21 +289,21 @@ GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type, if (p->state) { uint32_t *cl = - pandecode_fetch_gpu_mem(p->state, pan_size(RENDERER_STATE)); + pandecode_fetch_gpu_mem(ctx, p->state, pan_size(RENDERER_STATE)); pan_unpack(cl, RENDERER_STATE, state); if (state.shader.shader & ~0xF) - pandecode_shader_disassemble(state.shader.shader & ~0xF, gpu_id); + pandecode_shader_disassemble(ctx, state.shader.shader & ~0xF, gpu_id); #if PAN_ARCH >= 6 bool idvs = (job_type == MALI_JOB_TYPE_INDEXED_VERTEX); if (idvs && state.secondary_shader) - pandecode_shader_disassemble(state.secondary_shader, gpu_id); + pandecode_shader_disassemble(ctx, state.secondary_shader, gpu_id); #endif - DUMP_UNPACKED(RENDERER_STATE, state, "State:\n"); - pandecode_indent++; + DUMP_UNPACKED(ctx, RENDERER_STATE, state, "State:\n"); + ctx->indent++; /* Save for dumps */ attribute_count = state.shader.attribute_count; @@ -316,10 +321,10 @@ GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type, #if PAN_ARCH == 4 mali_ptr shader = state.blend_shader & ~0xF; if (state.multisample_misc.blend_shader && shader) - pandecode_shader_disassemble(shader, gpu_id); + pandecode_shader_disassemble(ctx, shader, gpu_id); #endif - pandecode_indent--; - pandecode_log("\n"); + ctx->indent--; + pandecode_log(ctx, "\n"); /* MRT blend fields are used on v5+. Technically, they are optional on v5 * for backwards compatibility but we don't care about that. @@ -332,141 +337,146 @@ GENX(pandecode_dcd)(const struct MALI_DRAW *p, enum mali_job_type job_type, for (unsigned i = 0; i < fbd_info.rt_count; i++) { mali_ptr shader = - GENX(pandecode_blend)(blend_base, i, state.shader.shader); + GENX(pandecode_blend)(ctx, blend_base, i, state.shader.shader); if (shader & ~0xF) - pandecode_shader_disassemble(shader, gpu_id); + pandecode_shader_disassemble(ctx, shader, gpu_id); } } #endif } else - pandecode_log("// XXX: missing shader descriptor\n"); + pandecode_log(ctx, "// XXX: missing shader descriptor\n"); if (p->viewport) { - DUMP_ADDR(VIEWPORT, p->viewport, "Viewport:\n"); - pandecode_log("\n"); + DUMP_ADDR(ctx, VIEWPORT, p->viewport, "Viewport:\n"); + pandecode_log(ctx, "\n"); } unsigned max_attr_index = 0; if (p->attributes) max_attr_index = - pandecode_attribute_meta(attribute_count, p->attributes, false); + pandecode_attribute_meta(ctx, attribute_count, p->attributes, false); if (p->attribute_buffers) - pandecode_attributes(p->attribute_buffers, max_attr_index, false, + pandecode_attributes(ctx, p->attribute_buffers, max_attr_index, false, job_type); if (p->varyings) { varying_count = - pandecode_attribute_meta(varying_count, p->varyings, true); + pandecode_attribute_meta(ctx, varying_count, p->varyings, true); } if (p->varying_buffers) - pandecode_attributes(p->varying_buffers, varying_count, true, job_type); + pandecode_attributes(ctx, p->varying_buffers, varying_count, true, + job_type); if (p->uniform_buffers) { if (uniform_buffer_count) - pandecode_uniform_buffers(p->uniform_buffers, uniform_buffer_count); + pandecode_uniform_buffers(ctx, p->uniform_buffers, + uniform_buffer_count); else - pandecode_log("// warn: UBOs specified but not referenced\n"); + pandecode_log(ctx, "// warn: UBOs specified but not referenced\n"); } else if (uniform_buffer_count) - pandecode_log("// XXX: UBOs referenced but not specified\n"); + pandecode_log(ctx, "// XXX: UBOs referenced but not specified\n"); /* We don't want to actually dump uniforms, but we do need to validate * that the counts we were given are sane */ if (p->push_uniforms) { if (uniform_count) - pandecode_uniforms(p->push_uniforms, uniform_count); + pandecode_uniforms(ctx, p->push_uniforms, uniform_count); else - pandecode_log("// warn: Uniforms specified but not referenced\n"); + pandecode_log(ctx, "// warn: Uniforms specified but not referenced\n"); } else if (uniform_count) - pandecode_log("// XXX: Uniforms referenced but not specified\n"); + pandecode_log(ctx, "// XXX: Uniforms referenced but not specified\n"); if (p->textures) - pandecode_textures(p->textures, texture_count); + pandecode_textures(ctx, p->textures, texture_count); if (p->samplers) - pandecode_samplers(p->samplers, sampler_count); + pandecode_samplers(ctx, p->samplers, sampler_count); } static void -pandecode_vertex_compute_geometry_job(const struct MALI_JOB_HEADER *h, +pandecode_vertex_compute_geometry_job(struct pandecode_context *ctx, + const struct MALI_JOB_HEADER *h, mali_ptr job, unsigned gpu_id) { - struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job); pan_section_unpack(p, COMPUTE_JOB, DRAW, draw); - GENX(pandecode_dcd)(&draw, h->type, gpu_id); + GENX(pandecode_dcd)(ctx, &draw, h->type, gpu_id); - pandecode_log("Vertex Job Payload:\n"); - pandecode_indent++; - pandecode_invocation(pan_section_ptr(p, COMPUTE_JOB, INVOCATION)); - DUMP_SECTION(COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n"); - DUMP_UNPACKED(DRAW, draw, "Draw:\n"); - pandecode_indent--; - pandecode_log("\n"); + pandecode_log(ctx, "Vertex Job Payload:\n"); + ctx->indent++; + pandecode_invocation(ctx, pan_section_ptr(p, COMPUTE_JOB, INVOCATION)); + DUMP_SECTION(ctx, COMPUTE_JOB, PARAMETERS, p, "Vertex Job Parameters:\n"); + DUMP_UNPACKED(ctx, DRAW, draw, "Draw:\n"); + ctx->indent--; + pandecode_log(ctx, "\n"); } #endif static void -pandecode_write_value_job(mali_ptr job) +pandecode_write_value_job(struct pandecode_context *ctx, mali_ptr job) { - struct mali_write_value_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_write_value_job_packed *PANDECODE_PTR_VAR(ctx, p, job); pan_section_unpack(p, WRITE_VALUE_JOB, PAYLOAD, u); - DUMP_SECTION(WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n"); - pandecode_log("\n"); + DUMP_SECTION(ctx, WRITE_VALUE_JOB, PAYLOAD, p, "Write Value Payload:\n"); + pandecode_log(ctx, "\n"); } static void -pandecode_cache_flush_job(mali_ptr job) +pandecode_cache_flush_job(struct pandecode_context *ctx, mali_ptr job) { - struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_cache_flush_job_packed *PANDECODE_PTR_VAR(ctx, p, job); pan_section_unpack(p, CACHE_FLUSH_JOB, PAYLOAD, u); - DUMP_SECTION(CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n"); - pandecode_log("\n"); + DUMP_SECTION(ctx, CACHE_FLUSH_JOB, PAYLOAD, p, "Cache Flush Payload:\n"); + pandecode_log(ctx, "\n"); } static void -pandecode_tiler_job(const struct MALI_JOB_HEADER *h, mali_ptr job, +pandecode_tiler_job(struct pandecode_context *ctx, + const struct MALI_JOB_HEADER *h, mali_ptr job, unsigned gpu_id) { - struct mali_tiler_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_tiler_job_packed *PANDECODE_PTR_VAR(ctx, p, job); pan_section_unpack(p, TILER_JOB, DRAW, draw); - GENX(pandecode_dcd)(&draw, h->type, gpu_id); - pandecode_log("Tiler Job Payload:\n"); - pandecode_indent++; + GENX(pandecode_dcd)(ctx, &draw, h->type, gpu_id); + pandecode_log(ctx, "Tiler Job Payload:\n"); + ctx->indent++; #if PAN_ARCH <= 7 - pandecode_invocation(pan_section_ptr(p, TILER_JOB, INVOCATION)); + pandecode_invocation(ctx, pan_section_ptr(p, TILER_JOB, INVOCATION)); #endif - pandecode_primitive(pan_section_ptr(p, TILER_JOB, PRIMITIVE)); - DUMP_UNPACKED(DRAW, draw, "Draw:\n"); + pandecode_primitive(ctx, pan_section_ptr(p, TILER_JOB, PRIMITIVE)); + DUMP_UNPACKED(ctx, DRAW, draw, "Draw:\n"); - DUMP_SECTION(TILER_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n"); + DUMP_SECTION(ctx, TILER_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n"); #if PAN_ARCH >= 6 pan_section_unpack(p, TILER_JOB, TILER, tiler_ptr); - GENX(pandecode_tiler)(tiler_ptr.address, gpu_id); + GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id); #if PAN_ARCH >= 9 - DUMP_SECTION(TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n"); - DUMP_SECTION(TILER_JOB, VERTEX_COUNT, p, "Vertex count:\n"); - DUMP_SECTION(TILER_JOB, SCISSOR, p, "Scissor:\n"); - DUMP_SECTION(TILER_JOB, INDICES, p, "Indices:\n"); + DUMP_SECTION(ctx, TILER_JOB, INSTANCE_COUNT, p, "Instance count:\n"); + DUMP_SECTION(ctx, TILER_JOB, VERTEX_COUNT, p, "Vertex count:\n"); + DUMP_SECTION(ctx, TILER_JOB, SCISSOR, p, "Scissor:\n"); + DUMP_SECTION(ctx, TILER_JOB, INDICES, p, "Indices:\n"); #else pan_section_unpack(p, TILER_JOB, PADDING, padding); #endif #endif - pandecode_indent--; - pandecode_log("\n"); + ctx->indent--; + pandecode_log(ctx, "\n"); } static void -pandecode_fragment_job(mali_ptr job, unsigned gpu_id) +pandecode_fragment_job(struct pandecode_context *ctx, mali_ptr job, + unsigned gpu_id) { - struct mali_fragment_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_fragment_job_packed *PANDECODE_PTR_VAR(ctx, p, job); pan_section_unpack(p, FRAGMENT_JOB, PAYLOAD, s); uint64_t fbd_pointer; @@ -483,47 +493,50 @@ pandecode_fragment_job(mali_ptr job, unsigned gpu_id) #endif UNUSED struct pandecode_fbd info = - GENX(pandecode_fbd)(fbd_pointer, true, gpu_id); + GENX(pandecode_fbd)(ctx, fbd_pointer, true, gpu_id); #if PAN_ARCH >= 5 if (!ptr.type || ptr.zs_crc_extension_present != info.has_extra || ptr.render_target_count != info.rt_count) { - pandecode_log("invalid FBD tag\n"); + pandecode_log(ctx, "invalid FBD tag\n"); } #endif - DUMP_UNPACKED(FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n"); + DUMP_UNPACKED(ctx, FRAGMENT_JOB_PAYLOAD, s, "Fragment Job Payload:\n"); - pandecode_log("\n"); + pandecode_log(ctx, "\n"); } #if PAN_ARCH == 6 || PAN_ARCH == 7 static void -pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h, mali_ptr job, +pandecode_indexed_vertex_job(struct pandecode_context *ctx, + const struct MALI_JOB_HEADER *h, mali_ptr job, unsigned gpu_id) { - struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_indexed_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job); - pandecode_log("Vertex:\n"); + pandecode_log(ctx, "Vertex:\n"); pan_section_unpack(p, INDEXED_VERTEX_JOB, VERTEX_DRAW, vert_draw); - GENX(pandecode_dcd)(&vert_draw, h->type, gpu_id); - DUMP_UNPACKED(DRAW, vert_draw, "Vertex Draw:\n"); + GENX(pandecode_dcd)(ctx, &vert_draw, h->type, gpu_id); + DUMP_UNPACKED(ctx, DRAW, vert_draw, "Vertex Draw:\n"); - pandecode_log("Fragment:\n"); + pandecode_log(ctx, "Fragment:\n"); pan_section_unpack(p, INDEXED_VERTEX_JOB, FRAGMENT_DRAW, frag_draw); - GENX(pandecode_dcd)(&frag_draw, MALI_JOB_TYPE_FRAGMENT, gpu_id); - DUMP_UNPACKED(DRAW, frag_draw, "Fragment Draw:\n"); + GENX(pandecode_dcd)(ctx, &frag_draw, MALI_JOB_TYPE_FRAGMENT, gpu_id); + DUMP_UNPACKED(ctx, DRAW, frag_draw, "Fragment Draw:\n"); pan_section_unpack(p, INDEXED_VERTEX_JOB, TILER, tiler_ptr); - pandecode_log("Tiler Job Payload:\n"); - pandecode_indent++; - GENX(pandecode_tiler)(tiler_ptr.address, gpu_id); - pandecode_indent--; + pandecode_log(ctx, "Tiler Job Payload:\n"); + ctx->indent++; + GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id); + ctx->indent--; - pandecode_invocation(pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION)); - pandecode_primitive(pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE)); + pandecode_invocation(ctx, + pan_section_ptr(p, INDEXED_VERTEX_JOB, INVOCATION)); + pandecode_primitive(ctx, pan_section_ptr(p, INDEXED_VERTEX_JOB, PRIMITIVE)); - DUMP_SECTION(INDEXED_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n"); + DUMP_SECTION(ctx, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE, p, + "Primitive Size:\n"); pan_section_unpack(p, INDEXED_VERTEX_JOB, PADDING, padding); } @@ -531,45 +544,47 @@ pandecode_indexed_vertex_job(const struct MALI_JOB_HEADER *h, mali_ptr job, #if PAN_ARCH == 9 static void -pandecode_malloc_vertex_job(mali_ptr job, unsigned gpu_id) +pandecode_malloc_vertex_job(struct pandecode_context *ctx, mali_ptr job, + unsigned gpu_id) { - struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_malloc_vertex_job_packed *PANDECODE_PTR_VAR(ctx, p, job); - DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE, p, "Primitive:\n"); - DUMP_SECTION(MALLOC_VERTEX_JOB, INSTANCE_COUNT, p, "Instance count:\n"); - DUMP_SECTION(MALLOC_VERTEX_JOB, ALLOCATION, p, "Allocation:\n"); - DUMP_SECTION(MALLOC_VERTEX_JOB, TILER, p, "Tiler:\n"); - DUMP_SECTION(MALLOC_VERTEX_JOB, SCISSOR, p, "Scissor:\n"); - DUMP_SECTION(MALLOC_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n"); - DUMP_SECTION(MALLOC_VERTEX_JOB, INDICES, p, "Indices:\n"); + DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, PRIMITIVE, p, "Primitive:\n"); + DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, INSTANCE_COUNT, p, "Instance count:\n"); + DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, ALLOCATION, p, "Allocation:\n"); + DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, TILER, p, "Tiler:\n"); + DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, SCISSOR, p, "Scissor:\n"); + DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE, p, "Primitive Size:\n"); + DUMP_SECTION(ctx, MALLOC_VERTEX_JOB, INDICES, p, "Indices:\n"); pan_section_unpack(p, MALLOC_VERTEX_JOB, DRAW, dcd); pan_section_unpack(p, MALLOC_VERTEX_JOB, TILER, tiler_ptr); - pandecode_log("Tiler Job Payload:\n"); - pandecode_indent++; + pandecode_log(ctx, "Tiler Job Payload:\n"); + ctx->indent++; if (tiler_ptr.address) - GENX(pandecode_tiler)(tiler_ptr.address, gpu_id); + GENX(pandecode_tiler)(ctx, tiler_ptr.address, gpu_id); else - pandecode_log("\n"); - pandecode_indent--; + pandecode_log(ctx, "\n"); + ctx->indent--; - GENX(pandecode_dcd)(&dcd, 0, gpu_id); + GENX(pandecode_dcd)(ctx, &dcd, 0, gpu_id); pan_section_unpack(p, MALLOC_VERTEX_JOB, POSITION, position); pan_section_unpack(p, MALLOC_VERTEX_JOB, VARYING, varying); - GENX(pandecode_shader_environment)(&position, gpu_id); - GENX(pandecode_shader_environment)(&varying, gpu_id); + GENX(pandecode_shader_environment)(ctx, &position, gpu_id); + GENX(pandecode_shader_environment)(ctx, &varying, gpu_id); } static void -pandecode_compute_job(mali_ptr job, unsigned gpu_id) +pandecode_compute_job(struct pandecode_context *ctx, mali_ptr job, + unsigned gpu_id) { - struct mali_compute_job_packed *PANDECODE_PTR_VAR(p, job); + struct mali_compute_job_packed *PANDECODE_PTR_VAR(ctx, p, job); pan_section_unpack(p, COMPUTE_JOB, PAYLOAD, payload); - GENX(pandecode_shader_environment)(&payload.compute, gpu_id); - DUMP_SECTION(COMPUTE_JOB, PAYLOAD, p, "Compute"); + GENX(pandecode_shader_environment)(ctx, &payload.compute, gpu_id); + DUMP_SECTION(ctx, COMPUTE_JOB, PAYLOAD, p, "Compute"); } #endif @@ -578,9 +593,10 @@ pandecode_compute_job(mali_ptr job, unsigned gpu_id) * GPU using the job manager. */ void -GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id) +GENX(pandecode_jc)(struct pandecode_context *ctx, mali_ptr jc_gpu_va, + unsigned gpu_id) { - pandecode_dump_file_open(); + pandecode_dump_file_open(ctx); struct set *va_set = _mesa_pointer_set_create(NULL); struct set_entry *entry = NULL; @@ -589,7 +605,7 @@ GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id) do { struct mali_job_header_packed *hdr = - PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed); + PANDECODE_PTR(ctx, jc_gpu_va, struct mali_job_header_packed); entry = _mesa_set_search(va_set, hdr); if (entry != NULL) { @@ -600,45 +616,46 @@ GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id) pan_unpack(hdr, JOB_HEADER, h); next_job = h.next; - DUMP_UNPACKED(JOB_HEADER, h, "Job Header (%" PRIx64 "):\n", jc_gpu_va); - pandecode_log("\n"); + DUMP_UNPACKED(ctx, JOB_HEADER, h, "Job Header (%" PRIx64 "):\n", + jc_gpu_va); + pandecode_log(ctx, "\n"); switch (h.type) { case MALI_JOB_TYPE_WRITE_VALUE: - pandecode_write_value_job(jc_gpu_va); + pandecode_write_value_job(ctx, jc_gpu_va); break; case MALI_JOB_TYPE_CACHE_FLUSH: - pandecode_cache_flush_job(jc_gpu_va); + pandecode_cache_flush_job(ctx, jc_gpu_va); break; case MALI_JOB_TYPE_TILER: - pandecode_tiler_job(&h, jc_gpu_va, gpu_id); + pandecode_tiler_job(ctx, &h, jc_gpu_va, gpu_id); break; #if PAN_ARCH <= 7 case MALI_JOB_TYPE_VERTEX: case MALI_JOB_TYPE_COMPUTE: - pandecode_vertex_compute_geometry_job(&h, jc_gpu_va, gpu_id); + pandecode_vertex_compute_geometry_job(ctx, &h, jc_gpu_va, gpu_id); break; #if PAN_ARCH >= 6 case MALI_JOB_TYPE_INDEXED_VERTEX: - pandecode_indexed_vertex_job(&h, jc_gpu_va, gpu_id); + pandecode_indexed_vertex_job(ctx, &h, jc_gpu_va, gpu_id); break; #endif #else case MALI_JOB_TYPE_COMPUTE: - pandecode_compute_job(jc_gpu_va, gpu_id); + pandecode_compute_job(ctx, jc_gpu_va, gpu_id); break; case MALI_JOB_TYPE_MALLOC_VERTEX: - pandecode_malloc_vertex_job(jc_gpu_va, gpu_id); + pandecode_malloc_vertex_job(ctx, jc_gpu_va, gpu_id); break; #endif case MALI_JOB_TYPE_FRAGMENT: - pandecode_fragment_job(jc_gpu_va, gpu_id); + pandecode_fragment_job(ctx, jc_gpu_va, gpu_id); break; default: @@ -651,17 +668,18 @@ GENX(pandecode_jc)(mali_ptr jc_gpu_va, unsigned gpu_id) _mesa_set_destroy(va_set, NULL); - fflush(pandecode_dump_stream); - pandecode_map_read_write(); + fflush(ctx->dump_stream); + pandecode_map_read_write(ctx); } void -GENX(pandecode_abort_on_fault)(mali_ptr jc_gpu_va) +GENX(pandecode_abort_on_fault)(struct pandecode_context *ctx, + mali_ptr jc_gpu_va) { mali_ptr next_job = 0; do { - pan_unpack(PANDECODE_PTR(jc_gpu_va, struct mali_job_header_packed), + pan_unpack(PANDECODE_PTR(ctx, jc_gpu_va, struct mali_job_header_packed), JOB_HEADER, h); next_job = h.next; @@ -673,7 +691,7 @@ GENX(pandecode_abort_on_fault)(mali_ptr jc_gpu_va) } } while ((jc_gpu_va = next_job)); - pandecode_map_read_write(); + pandecode_map_read_write(ctx); } #endif diff --git a/src/panfrost/lib/pan_bo.c b/src/panfrost/lib/pan_bo.c index 0466ac1164f..b884cff4488 100644 --- a/src/panfrost/lib/pan_bo.c +++ b/src/panfrost/lib/pan_bo.c @@ -405,9 +405,11 @@ panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags, if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) { if (flags & PAN_BO_INVISIBLE) - pandecode_inject_mmap(bo->ptr.gpu, NULL, bo->size, NULL); + pandecode_inject_mmap(dev->decode_ctx, bo->ptr.gpu, NULL, bo->size, + NULL); else if (!(flags & PAN_BO_DELAY_MMAP)) - pandecode_inject_mmap(bo->ptr.gpu, bo->ptr.cpu, bo->size, NULL); + pandecode_inject_mmap(dev->decode_ctx, bo->ptr.gpu, bo->ptr.cpu, + bo->size, NULL); } return bo; @@ -444,7 +446,7 @@ panfrost_bo_unreference(struct panfrost_bo *bo) panfrost_bo_munmap(bo); if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) - pandecode_inject_free(bo->ptr.gpu, bo->size); + pandecode_inject_free(dev->decode_ctx, bo->ptr.gpu, bo->size); /* Rather than freeing the BO now, we'll cache the BO for later * allocations if we're allowed to. diff --git a/src/panfrost/lib/pan_device.h b/src/panfrost/lib/pan_device.h index 3b85f3526c5..8ad35a9d6b4 100644 --- a/src/panfrost/lib/pan_device.h +++ b/src/panfrost/lib/pan_device.h @@ -130,6 +130,9 @@ struct panfrost_device { int fd; + /* For pandecode */ + struct pandecode_context *decode_ctx; + /* Properties of the GPU in use */ unsigned arch; unsigned gpu_id; diff --git a/src/panfrost/lib/pan_props.c b/src/panfrost/lib/pan_props.c index b072fce5894..46d61c9f553 100644 --- a/src/panfrost/lib/pan_props.c +++ b/src/panfrost/lib/pan_props.c @@ -286,7 +286,7 @@ panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) /* Initialize pandecode before we start allocating */ if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) - pandecode_initialize(!(dev->debug & PAN_DBG_TRACE)); + dev->decode_ctx = pandecode_create_context(!(dev->debug & PAN_DBG_TRACE)); /* Tiler heap is internally required by the tiler, which can only be * active for a single job chain at once, so a single heap can be diff --git a/src/panfrost/lib/wrap.h b/src/panfrost/lib/wrap.h index 531f6065182..c1e61332203 100644 --- a/src/panfrost/lib/wrap.h +++ b/src/panfrost/lib/wrap.h @@ -42,22 +42,29 @@ * included in-tree. */ -void pandecode_initialize(bool to_stderr); +// TODO: update panwrap -void pandecode_next_frame(void); +struct pandecode_context; -void pandecode_close(void); +struct pandecode_context *pandecode_create_context(bool to_stderr); -void pandecode_inject_mmap(uint64_t gpu_va, void *cpu, unsigned sz, - const char *name); +void pandecode_next_frame(struct pandecode_context *ctx); -void pandecode_inject_free(uint64_t gpu_va, unsigned sz); +void pandecode_destroy_context(struct pandecode_context *ctx); -void pandecode_jc(uint64_t jc_gpu_va, unsigned gpu_id); +void pandecode_inject_mmap(struct pandecode_context *ctx, uint64_t gpu_va, + void *cpu, unsigned sz, const char *name); -void pandecode_cs(mali_ptr queue_gpu_va, uint32_t size, unsigned gpu_id, - uint32_t *regs); +void pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va, + unsigned sz); -void pandecode_abort_on_fault(uint64_t jc_gpu_va, unsigned gpu_id); +void pandecode_jc(struct pandecode_context *ctx, uint64_t jc_gpu_va, + unsigned gpu_id); + +void pandecode_cs(struct pandecode_context *ctx, mali_ptr queue_gpu_va, + uint32_t size, unsigned gpu_id, uint32_t *regs); + +void pandecode_abort_on_fault(struct pandecode_context *ctx, uint64_t jc_gpu_va, + unsigned gpu_id); #endif /* __MMAP_TRACE_H__ */ diff --git a/src/panfrost/tools/panfrostdump.c b/src/panfrost/tools/panfrostdump.c index d204866573a..987663dd652 100644 --- a/src/panfrost/tools/panfrostdump.c +++ b/src/panfrost/tools/panfrostdump.c @@ -254,7 +254,7 @@ main(int argc, char *argv[]) i = j = k = 0; atexit(cleanup); - pandecode_initialize(false); + struct pandecode_context *ctx = pandecode_create_context(false); hdr_fp = fopen(argv[optind], "r"); if (!hdr_fp) { @@ -374,7 +374,7 @@ main(int argc, char *argv[]) fclose(bodump); - pandecode_inject_mmap(doh.bomap.iova, bos[j], doh.file_size, + pandecode_inject_mmap(ctx, doh.bomap.iova, bos[j], doh.file_size, NULL); } else { @@ -397,8 +397,8 @@ main(int argc, char *argv[]) if (doh.type != PANFROSTDUMP_BUF_TRAILER) fprintf(stderr, "Trailing header isn't right\n"); - pandecode_jc(jc, gpu_id); - pandecode_close(); + pandecode_jc(ctx, jc, gpu_id); + pandecode_destroy_context(ctx); fclose(data_fp); fclose(hdr_fp); diff --git a/src/panfrost/vulkan/panvk_vX_device.c b/src/panfrost/vulkan/panvk_vX_device.c index e37983d843b..433b7bae9fd 100644 --- a/src/panfrost/vulkan/panvk_vX_device.c +++ b/src/panfrost/vulkan/panvk_vX_device.c @@ -77,10 +77,11 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch, } if (debug & PANVK_DEBUG_TRACE) - pandecode_jc(batch->scoreboard.first_job, pdev->gpu_id); + pandecode_jc(pdev->decode_ctx, batch->scoreboard.first_job, + pdev->gpu_id); if (debug & PANVK_DEBUG_DUMP) - pandecode_dump_mappings(); + pandecode_dump_mappings(pdev->decode_ctx); } if (batch->fragment_job) { @@ -109,14 +110,14 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch, } if (debug & PANVK_DEBUG_TRACE) - pandecode_jc(batch->fragment_job, pdev->gpu_id); + pandecode_jc(pdev->decode_ctx, batch->fragment_job, pdev->gpu_id); if (debug & PANVK_DEBUG_DUMP) - pandecode_dump_mappings(); + pandecode_dump_mappings(pdev->decode_ctx); } if (debug & PANVK_DEBUG_TRACE) - pandecode_next_frame(); + pandecode_next_frame(0); batch->issued = true; }