pan/genxml: Build libpanfrost_decode for v12

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Lars-Ivar Hesselberg Simonsen <lars-ivar.simonsen@arm.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34032>
This commit is contained in:
Mary Guillemard 2025-01-30 11:56:19 +00:00
parent ece01443e1
commit 811525b543
7 changed files with 527 additions and 9 deletions

View file

@ -748,8 +748,15 @@ cs_instr_is_asynchronous(enum mali_cs_opcode opcode, uint16_t wait_mask)
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
case MALI_CS_OPCODE_RUN_FRAGMENT: case MALI_CS_OPCODE_RUN_FRAGMENT:
case MALI_CS_OPCODE_RUN_FULLSCREEN: case MALI_CS_OPCODE_RUN_FULLSCREEN:
#if PAN_ARCH >= 12
case MALI_CS_OPCODE_RUN_IDVS2:
#else
case MALI_CS_OPCODE_RUN_IDVS: case MALI_CS_OPCODE_RUN_IDVS:
#if PAN_ARCH == 10
case MALI_CS_OPCODE_RUN_TILING: case MALI_CS_OPCODE_RUN_TILING:
#endif
#endif
/* Always asynchronous. */ /* Always asynchronous. */
return true; return true;
@ -761,6 +768,9 @@ cs_instr_is_asynchronous(enum mali_cs_opcode opcode, uint16_t wait_mask)
case MALI_CS_OPCODE_STORE_STATE: case MALI_CS_OPCODE_STORE_STATE:
case MALI_CS_OPCODE_TRACE_POINT: case MALI_CS_OPCODE_TRACE_POINT:
case MALI_CS_OPCODE_HEAP_OPERATION: case MALI_CS_OPCODE_HEAP_OPERATION:
#if PAN_ARCH >= 11
case MALI_CS_OPCODE_SHARED_SB_INC:
#endif
/* Asynchronous only if wait_mask != 0. */ /* Asynchronous only if wait_mask != 0. */
return wait_mask != 0; return wait_mask != 0;
@ -1158,6 +1168,7 @@ cs_run_compute(struct cs_builder *b, unsigned task_increment,
} }
} }
#if PAN_ARCH == 10
static inline void static inline void
cs_run_tiling(struct cs_builder *b, uint32_t flags_override, bool progress_inc, cs_run_tiling(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
struct cs_shader_res_sel res_sel) struct cs_shader_res_sel res_sel)
@ -1171,7 +1182,29 @@ cs_run_tiling(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
I.fau_select = res_sel.fau; I.fau_select = res_sel.fau;
} }
} }
#endif
#if PAN_ARCH >= 12
static inline void
cs_run_idvs2(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
bool malloc_enable, struct cs_index draw_id,
enum mali_idvs_shading_mode vertex_shading_mode)
{
cs_emit(b, RUN_IDVS2, I) {
I.flags_override = flags_override;
I.progress_increment = progress_inc;
I.malloc_enable = malloc_enable;
I.vertex_shading_mode = vertex_shading_mode;
if (draw_id.type == CS_INDEX_UNDEF) {
I.draw_id_register_enable = false;
} else {
I.draw_id_register_enable = true;
I.draw_id = cs_src32(b, draw_id);
}
}
}
#else
static inline void static inline void
cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool progress_inc, cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
bool malloc_enable, struct cs_shader_res_sel varying_sel, bool malloc_enable, struct cs_shader_res_sel varying_sel,
@ -1205,6 +1238,7 @@ cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool progress_inc,
I.fragment_tsd_select = frag_sel.tsd == 2; I.fragment_tsd_select = frag_sel.tsd == 2;
} }
} }
#endif
static inline void static inline void
cs_run_fragment(struct cs_builder *b, bool enable_tem, cs_run_fragment(struct cs_builder *b, bool enable_tem,
@ -1354,6 +1388,7 @@ cs_store64(struct cs_builder *b, struct cs_index data, struct cs_index address,
cs_store(b, data, address, BITFIELD_MASK(2), offset); cs_store(b, data, address, BITFIELD_MASK(2), offset);
} }
#if PAN_ARCH < 11
/* /*
* Select which scoreboard entry will track endpoint tasks and other tasks * Select which scoreboard entry will track endpoint tasks and other tasks
* respectively. Pass to cs_wait to wait later. * respectively. Pass to cs_wait to wait later.
@ -1374,6 +1409,38 @@ cs_set_scoreboard_entry(struct cs_builder *b, unsigned ep, unsigned other)
if (unlikely(b->conf.ls_tracker)) if (unlikely(b->conf.ls_tracker))
assert(b->conf.ls_tracker->sb_slot == other); assert(b->conf.ls_tracker->sb_slot == other);
} }
#else
static inline void
cs_set_state_imm32(struct cs_builder *b, enum mali_cs_set_state_type state,
unsigned value)
{
cs_emit(b, SET_STATE_IMM32, I) {
I.state = state;
I.value = value;
}
/* We assume the load/store scoreboard entry is static to keep things
* simple. */
if (state == MALI_CS_SET_STATE_TYPE_SB_SEL_OTHER &&
unlikely(b->conf.ls_tracker))
assert(b->conf.ls_tracker->sb_slot == value);
}
#endif
/*
* Select which scoreboard entry will track endpoint tasks.
* On v10, this also set other endpoint to SB0.
* Pass to cs_wait to wait later.
*/
static inline void
cs_select_sb_entries_for_async_ops(struct cs_builder *b, unsigned ep)
{
#if PAN_ARCH == 10
cs_set_scoreboard_entry(b, ep, 0);
#else
cs_set_state_imm32(b, MALI_CS_SET_STATE_TYPE_SB_SEL_ENDPOINT, ep);
#endif
}
static inline void static inline void
cs_progress_wait(struct cs_builder *b, unsigned queue, struct cs_index ref) cs_progress_wait(struct cs_builder *b, unsigned queue, struct cs_index ref)
@ -1967,6 +2034,51 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot, false);
} }
#if PAN_ARCH >= 12
struct cs_run_idvs2_trace {
uint64_t ip;
uint32_t draw_id;
uint32_t pad;
uint32_t sr[66];
} __attribute__((aligned(64)));
static inline void
cs_trace_run_idvs2(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
struct cs_index scratch_regs, uint32_t flags_override,
bool progress_inc, bool malloc_enable,
struct cs_index draw_id,
enum mali_idvs_shading_mode vertex_shading_mode)
{
if (likely(!ctx->enabled)) {
cs_run_idvs2(b, flags_override, progress_inc, malloc_enable, draw_id,
vertex_shading_mode);
return;
}
struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg);
struct cs_index data = cs_reg64(b, scratch_regs.reg + 2);
cs_trace_preamble(b, ctx, scratch_regs, sizeof(struct cs_run_idvs2_trace));
/* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP
* won't point to the right instruction. */
cs_load_ip_to(b, data);
cs_run_idvs2(b, flags_override, progress_inc, malloc_enable, draw_id,
vertex_shading_mode);
cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_idvs2, ip));
if (draw_id.type != CS_INDEX_UNDEF)
cs_store32(b, draw_id, tracebuf_addr,
cs_trace_field_offset(run_idvs2, draw_id));
for (unsigned i = 0; i < 64; i += 16)
cs_store(b, cs_reg_tuple(b, i, 16), tracebuf_addr, BITFIELD_MASK(16),
cs_trace_field_offset(run_idvs2, sr[i]));
cs_store(b, cs_reg_tuple(b, 64, 2), tracebuf_addr, BITFIELD_MASK(2),
cs_trace_field_offset(run_idvs2, sr[64]));
cs_wait_slot(b, ctx->ls_sb_slot, false);
}
#else
struct cs_run_idvs_trace { struct cs_run_idvs_trace {
uint64_t ip; uint64_t ip;
uint32_t draw_id; uint32_t draw_id;
@ -2011,6 +2123,7 @@ cs_trace_run_idvs(struct cs_builder *b, const struct cs_tracing_ctx *ctx,
cs_trace_field_offset(run_idvs, sr[48])); cs_trace_field_offset(run_idvs, sr[48]));
cs_wait_slot(b, ctx->ls_sb_slot, false); cs_wait_slot(b, ctx->ls_sb_slot, false);
} }
#endif
struct cs_run_compute_trace { struct cs_run_compute_trace {
uint64_t ip; uint64_t ip;

View file

@ -470,6 +470,7 @@ GENX(pandecode_depth_stencil)(struct pandecode_context *ctx, uint64_t addr)
DUMP_UNPACKED(ctx, DEPTH_STENCIL, desc, "Depth/stencil"); DUMP_UNPACKED(ctx, DEPTH_STENCIL, desc, "Depth/stencil");
} }
#if PAN_ARCH < 12
void void
GENX(pandecode_shader_environment)(struct pandecode_context *ctx, GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
const struct MALI_SHADER_ENVIRONMENT *p, const struct MALI_SHADER_ENVIRONMENT *p,
@ -487,6 +488,7 @@ GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
if (p->fau) if (p->fau)
GENX(pandecode_fau)(ctx, p->fau, p->fau_count, "FAU"); GENX(pandecode_fau)(ctx, p->fau, p->fau_count, "FAU");
} }
#endif
void void
GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend, GENX(pandecode_blend_descs)(struct pandecode_context *ctx, uint64_t blend,
@ -515,7 +517,35 @@ GENX(pandecode_dcd)(struct pandecode_context *ctx, const struct MALI_DRAW *p,
GENX(pandecode_depth_stencil)(ctx, p->depth_stencil); GENX(pandecode_depth_stencil)(ctx, p->depth_stencil);
GENX(pandecode_blend_descs) GENX(pandecode_blend_descs)
(ctx, p->blend, p->blend_count, frag_shader, gpu_id); (ctx, p->blend, p->blend_count, frag_shader, gpu_id);
#if PAN_ARCH >= 12
if (p->vertex_shader)
GENX(pandecode_shader)(ctx, p->vertex_shader, "Vertex Shader", gpu_id);
if (p->vertex_resources)
GENX(pandecode_resource_tables)(ctx, p->vertex_resources,
"Vertex Resources");
if (p->vertex_fau.pointer)
GENX(pandecode_fau)(ctx, p->vertex_fau.pointer, p->vertex_fau.count,
"Vertex FAU");
if (p->fragment_shader)
GENX(pandecode_shader)(ctx, p->fragment_shader, "Fragment Shader",
gpu_id);
if (p->fragment_resources)
GENX(pandecode_resource_tables)(ctx, p->fragment_resources,
"Fragment Resources");
if (p->fragment_fau.pointer)
GENX(pandecode_fau)(ctx, p->fragment_fau.pointer, p->fragment_fau.count,
"Fragment FAU");
if (p->thread_storage)
DUMP_ADDR(ctx, LOCAL_STORAGE, p->thread_storage, "Local Storage:\n");
#else
GENX(pandecode_shader_environment)(ctx, &p->shader, gpu_id); GENX(pandecode_shader_environment)(ctx, &p->shader, gpu_id);
#endif
DUMP_UNPACKED(ctx, DRAW, *p, "Draw:\n"); DUMP_UNPACKED(ctx, DRAW, *p, "Draw:\n");
} }
#endif #endif

View file

@ -137,6 +137,13 @@ void pandecode_cs_binary_v10(struct pandecode_context *ctx, uint64_t bin,
void pandecode_cs_trace_v10(struct pandecode_context *ctx, uint64_t trace, void pandecode_cs_trace_v10(struct pandecode_context *ctx, uint64_t trace,
uint32_t trace_size, unsigned gpu_id); uint32_t trace_size, unsigned gpu_id);
void pandecode_interpret_cs_v12(struct pandecode_context *ctx, uint64_t queue,
uint32_t size, unsigned gpu_id, uint32_t *regs);
void pandecode_cs_binary_v12(struct pandecode_context *ctx, uint64_t bin,
uint32_t bin_size, unsigned gpu_id);
void pandecode_cs_trace_v12(struct pandecode_context *ctx, uint64_t trace,
uint32_t trace_size, unsigned gpu_id);
/* Logging infrastructure */ /* Logging infrastructure */
static void static void
pandecode_make_indent(struct pandecode_context *ctx) pandecode_make_indent(struct pandecode_context *ctx)
@ -256,9 +263,11 @@ void GENX(pandecode_tiler)(struct pandecode_context *ctx, uint64_t gpu_va,
#endif #endif
#if PAN_ARCH >= 9 #if PAN_ARCH >= 9
#if PAN_ARCH < 12
void GENX(pandecode_shader_environment)(struct pandecode_context *ctx, void GENX(pandecode_shader_environment)(struct pandecode_context *ctx,
const struct MALI_SHADER_ENVIRONMENT *p, const struct MALI_SHADER_ENVIRONMENT *p,
unsigned gpu_id); unsigned gpu_id);
#endif
void GENX(pandecode_resource_tables)(struct pandecode_context *ctx, void GENX(pandecode_resource_tables)(struct pandecode_context *ctx,
uint64_t addr, const char *label); uint64_t addr, const char *label);

View file

@ -426,6 +426,9 @@ pandecode_interpret_cs(struct pandecode_context *ctx, uint64_t queue_gpu_va,
case 10: case 10:
pandecode_interpret_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs); pandecode_interpret_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs);
break; break;
case 12:
pandecode_interpret_cs_v12(ctx, queue_gpu_va, size, gpu_id, regs);
break;
default: default:
unreachable("Unsupported architecture"); unreachable("Unsupported architecture");
} }
@ -443,6 +446,9 @@ pandecode_cs_binary(struct pandecode_context *ctx, uint64_t bin_gpu_va,
case 10: case 10:
pandecode_cs_binary_v10(ctx, bin_gpu_va, size, gpu_id); pandecode_cs_binary_v10(ctx, bin_gpu_va, size, gpu_id);
break; break;
case 12:
pandecode_cs_binary_v12(ctx, bin_gpu_va, size, gpu_id);
break;
default: default:
unreachable("Unsupported architecture"); unreachable("Unsupported architecture");
} }
@ -460,6 +466,9 @@ pandecode_cs_trace(struct pandecode_context *ctx, uint64_t trace_gpu_va,
case 10: case 10:
pandecode_cs_trace_v10(ctx, trace_gpu_va, size, gpu_id); pandecode_cs_trace_v10(ctx, trace_gpu_va, size, gpu_id);
break; break;
case 12:
pandecode_cs_trace_v12(ctx, trace_gpu_va, size, gpu_id);
break;
default: default:
unreachable("Unsupported architecture"); unreachable("Unsupported architecture");
} }

View file

@ -141,6 +141,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
break; break;
} }
#if PAN_ARCH == 10
case MALI_CS_OPCODE_RUN_TILING: { case MALI_CS_OPCODE_RUN_TILING: {
cs_unpack(instr, CS_RUN_TILING, I); cs_unpack(instr, CS_RUN_TILING, I);
fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d", fprintf(fp, "RUN_TILING%s.srt%d.spd%d.tsd%d.fau%d",
@ -148,7 +149,9 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
I.spd_select, I.tsd_select, I.fau_select); I.spd_select, I.tsd_select, I.fau_select);
break; break;
} }
#endif
#if PAN_ARCH < 12
case MALI_CS_OPCODE_RUN_IDVS: { case MALI_CS_OPCODE_RUN_IDVS: {
cs_unpack(instr, CS_RUN_IDVS, I); cs_unpack(instr, CS_RUN_IDVS, I);
fprintf( fprintf(
@ -162,6 +165,130 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
I.flags_override); I.flags_override);
break; break;
} }
#else
case MALI_CS_OPCODE_RUN_IDVS2: {
cs_unpack(instr, CS_RUN_IDVS2, I);
const char *vertex_shading_str[] = {
".early",
".deferred",
".INVALID",
".INVALID",
};
fprintf(fp, "RUN_IDVS2%s%s%s%s r%u, #%x",
I.progress_increment ? ".progress_inc" : "",
I.malloc_enable ? "" : ".no_malloc",
I.draw_id_register_enable ? ".draw_id_enable" : "",
vertex_shading_str[I.vertex_shading_mode], I.draw_id,
I.flags_override);
break;
}
#endif
#if PAN_ARCH >= 11
case MALI_CS_OPCODE_LOGIC_OP32: {
cs_unpack(instr, CS_LOGIC_OP32, I);
const char *mode_name[] = {
".CLEAR", ".AND", ".AND_A_NB", ".MOV_A", ".AND_NA_B", ".MOV_B",
".XOR", ".OR", ".NOR", ".XNOR", ".NOT_B", ".OR_A_NB",
".NOT_A", ".OR_NA_B", ".NAND", ".SET",
};
const char *index_name[] = {
".direct",
".index",
};
fprintf(fp, "LOGIC_OP32%s r%u, r%u, r%u%s", mode_name[I.mode],
I.destination, I.source_0, I.source_1, index_name[I.index]);
break;
}
case MALI_CS_OPCODE_NEXT_SB_ENTRY: {
cs_unpack(instr, CS_NEXT_SB_ENTRY, I);
const char *sb_type_name[] = {
".no_change", ".endpoint", ".other", ".deferred",
".INVALID", ".INVALID", ".INVALID", ".INVALID",
".INVALID", ".INVALID", ".INVALID", ".INVALID",
".INVALID", ".INVALID", ".INVALID", ".INVALID",
};
const char *format_name[] = {".index", ".mask"};
fprintf(fp, "NEXT_SB_ENTR%s%s r%u", sb_type_name[I.sb_type],
format_name[I.format], I.destination);
break;
}
case MALI_CS_OPCODE_SET_STATE: {
cs_unpack(instr, CS_SET_STATE, I);
const char *state_name[] = {
".sb_sel_endpoint", ".sb_sel_other", ".sb_sel_deferred", ".INVALID",
".INVALID", ".INVALID", ".INVALID", ".INVALID",
".sb_mask_stream", ".sb_mask_wait",
};
const char *state =
I.state <= sizeof(state_name) ? state_name[I.state] : ".INVALID";
fprintf(fp, "SET_STATE%s r%u", state, I.source);
break;
}
case MALI_CS_OPCODE_SET_STATE_IMM32: {
cs_unpack(instr, CS_SET_STATE_IMM32, I);
const char *state_name[] = {
".sb_sel_endpoint", ".sb_sel_other", ".sb_sel_deferred", ".INVALID",
".INVALID", ".INVALID", ".INVALID", ".INVALID",
".sb_mask_stream", ".sb_mask_wait",
};
const char *state =
I.state <= sizeof(state_name) ? state_name[I.state] : ".INVALID";
fprintf(fp, "SET_STATE_IMM32%s #%u", state, I.value);
break;
}
case MALI_CS_OPCODE_SHARED_SB_INC: {
cs_unpack(instr, CS_SHARED_SB_INC, I);
const char *progress_increment_name[] = {
".no_increment",
".increment",
};
const char *defer_mode_name[] = {
".defer_immediate",
".defer_indirect",
};
fprintf(fp, "SHARED_SB_INC%s%s #%u, #%u",
progress_increment_name[I.progress_increment],
defer_mode_name[I.defer_mode], I.sb_mask, I.shared_entry);
break;
}
case MALI_CS_OPCODE_SHARED_SB_DEC: {
cs_unpack(instr, CS_SHARED_SB_DEC, I);
const char *progress_increment_name[] = {
".no_increment",
".increment",
};
fprintf(fp, "SHARED_SB_DEC%s #%u",
progress_increment_name[I.progress_increment], I.shared_entry);
break;
}
#endif
case MALI_CS_OPCODE_RUN_FRAGMENT: { case MALI_CS_OPCODE_RUN_FRAGMENT: {
static const char *tile_order[] = { static const char *tile_order[] = {
@ -203,6 +330,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
break; break;
} }
#if PAN_ARCH < 13
case MALI_CS_OPCODE_ADD_IMMEDIATE32: { case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
cs_unpack(instr, CS_ADD_IMM32, I); cs_unpack(instr, CS_ADD_IMM32, I);
@ -226,6 +354,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr)
I.source_0); I.source_0);
break; break;
} }
#endif
case MALI_CS_OPCODE_LOAD_MULTIPLE: { case MALI_CS_OPCODE_LOAD_MULTIPLE: {
cs_unpack(instr, CS_LOAD_MULTIPLE, I); cs_unpack(instr, CS_LOAD_MULTIPLE, I);
@ -536,6 +665,7 @@ pandecode_run_compute_indirect(struct pandecode_context *ctx, FILE *fp,
ctx->indent--; ctx->indent--;
} }
#if PAN_ARCH == 10
static void static void
pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp, pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_TILING *I) struct queue_ctx *qctx, struct MALI_CS_RUN_TILING *I)
@ -617,7 +747,137 @@ pandecode_run_tiling(struct pandecode_context *ctx, FILE *fp,
ctx->indent--; ctx->indent--;
} }
#endif
#if PAN_ARCH >= 12
static void
pandecode_run_idvs2(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_IDVS2 *I)
{
if (qctx->in_exception_handler)
return;
ctx->indent++;
uint64_t vert_srt = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_SRT);
uint64_t frag_srt = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SRT);
uint64_t vert_fau = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_FAU);
uint64_t fragment_fau = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_FAU);
uint64_t vertex_spd = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_SPD);
uint64_t fragment_spd = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_SPD);
uint64_t vertex_tsd = cs_get_u64(qctx, MALI_IDVS_SR_VERTEX_TSD);
uint64_t fragment_tsd = cs_get_u64(qctx, MALI_IDVS_SR_FRAGMENT_TSD);
uint32_t global_attribute_offset =
cs_get_u32(qctx, MALI_IDVS_SR_GLOBAL_ATTRIBUTE_OFFSET);
uint32_t index_count = cs_get_u32(qctx, MALI_IDVS_SR_INDEX_COUNT);
uint32_t instance_count = cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_COUNT);
uint32_t index_offset = cs_get_u32(qctx, MALI_IDVS_SR_INDEX_OFFSET);
uint32_t vertex_offset = cs_get_u32(qctx, MALI_IDVS_SR_VERTEX_OFFSET);
uint32_t instance_offset = cs_get_u32(qctx, MALI_IDVS_SR_INSTANCE_OFFSET);
uint64_t tilder_descriptor_pointer =
cs_get_u64(qctx, MALI_IDVS_SR_TILER_CTX);
uint64_t vertex_index_array_pointer =
cs_get_u64(qctx, MALI_IDVS_SR_INDEX_BUFFER);
uint32_t index_array_size = cs_get_u32(qctx, MALI_IDVS_SR_INDEX_BUFFER_SIZE);
uint32_t varying_size = cs_get_u32(qctx, MALI_IDVS_SR_VARY_SIZE) & 0xffff;
uint64_t zsd_pointer = cs_get_u64(qctx, MALI_IDVS_SR_ZSD);
uint64_t blend = cs_get_u64(qctx, MALI_IDVS_SR_BLEND_DESC);
uint32_t raw_tiler_flags = cs_get_u32(qctx, MALI_IDVS_SR_TILER_FLAGS);
uint64_t occlusion_pointer = cs_get_u32(qctx, MALI_IDVS_SR_OQ);
/* Merge flag overrides with the register flags */
struct mali_primitive_flags_packed tiler_flags_packed = {
.opaque[0] = raw_tiler_flags | I->flags_override,
};
pan_unpack(&tiler_flags_packed, PRIMITIVE_FLAGS, tiler_flags);
if (vert_srt)
GENX(pandecode_resource_tables)(ctx, vert_srt, "Vertex resources");
if (frag_srt)
GENX(pandecode_resource_tables)(ctx, frag_srt, "Fragment resources");
if (vert_fau) {
uint64_t lo = vert_fau & BITFIELD64_MASK(48);
uint64_t hi = vert_fau >> 56;
GENX(pandecode_fau)(ctx, lo, hi, "Vertex FAU");
}
if (fragment_fau) {
uint64_t lo = fragment_fau & BITFIELD64_MASK(48);
uint64_t hi = fragment_fau >> 56;
GENX(pandecode_fau)(ctx, lo, hi, "Fragment FAU");
}
if (vertex_spd) {
GENX(pandecode_shader)
(ctx, vertex_spd, "Vertex shader", qctx->gpu_id);
}
if (fragment_spd) {
GENX(pandecode_shader)
(ctx, fragment_spd, "Fragment shader", qctx->gpu_id);
}
DUMP_ADDR(ctx, LOCAL_STORAGE, vertex_tsd,
"Vertex Local Storage @%" PRIx64 ":\n", vertex_tsd);
DUMP_ADDR(ctx, LOCAL_STORAGE, fragment_tsd,
"Fragment Local Storage @%" PRIx64 ":\n", fragment_tsd);
pandecode_log(ctx, "Global attribute offset: %u\n", global_attribute_offset);
pandecode_log(ctx, "Index count: %u\n", index_count);
pandecode_log(ctx, "Instance count: %u\n", instance_count);
if (tiler_flags.index_type)
pandecode_log(ctx, "Index offset: %u\n", index_offset);
pandecode_log(ctx, "Vertex offset: %u\n", vertex_offset);
pandecode_log(ctx, "Instance offset: %u\n", instance_offset);
GENX(pandecode_tiler)(ctx, tilder_descriptor_pointer, qctx->gpu_id);
/* If this is true, then the scissor is actually a pointer to an
* array of boxes; bottom 56 bits are the pointer and top 8 are
* the length */
assert(!tiler_flags.scissor_array_enable);
struct mali_viewport_packed viewport_packed = {
.opaque[0] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_HIGH),
.opaque[1] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_HIGH + 1),
.opaque[2] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_LOW),
.opaque[3] = cs_get_u32(qctx, MALI_IDVS_SR_VIEWPORT_LOW + 1),
};
DUMP_CL(ctx, VIEWPORT, &viewport_packed, "Viewport\n");
DUMP_CL(ctx, SCISSOR, &qctx->regs[MALI_IDVS_SR_SCISSOR_BOX], "Scissor\n");
pandecode_log(ctx, "Per-vertex varying size: %u\n", varying_size);
DUMP_ADDR(ctx, DEPTH_STENCIL, zsd_pointer, "Depth/stencil");
GENX(pandecode_blend_descs)(ctx, blend & ~15, blend & 15, 0, qctx->gpu_id);
if (tiler_flags.index_type) {
pandecode_log(ctx, "Indices: %" PRIx64 "\n", vertex_index_array_pointer);
pandecode_log(ctx, "Index array size: %u\n", index_array_size);
}
DUMP_UNPACKED(ctx, PRIMITIVE_FLAGS, tiler_flags, "Primitive flags\n");
DUMP_CL(ctx, DCD_FLAGS_0, &qctx->regs[MALI_IDVS_SR_DCD0], "DCD Flags 0\n");
DUMP_CL(ctx, DCD_FLAGS_1, &qctx->regs[MALI_IDVS_SR_DCD1], "DCD Flags 1\n");
DUMP_CL(ctx, DCD_FLAGS_2, &qctx->regs[MALI_IDVS_SR_DCD2], "DCD Flags 2\n");
DUMP_CL(ctx, PRIMITIVE_SIZE, &qctx->regs[MALI_IDVS_SR_PRIMITIVE_SIZE],
"Primitive size\n");
DUMP_CL(ctx, PRIMITIVE_FLAGS_2, &qctx->regs[MALI_IDVS_SR_TILER_FLAGS2],
"Tiler flags 2\n");
pandecode_log(ctx, "Occlusion: %" PRIx64 "\n", occlusion_pointer);
ctx->indent--;
}
#else
static void static void
pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
struct queue_ctx *qctx, struct MALI_CS_RUN_IDVS *I) struct queue_ctx *qctx, struct MALI_CS_RUN_IDVS *I)
@ -767,6 +1027,7 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp,
ctx->indent--; ctx->indent--;
} }
#endif
static void static void
pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp, pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp,
@ -910,17 +1171,27 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
break; break;
} }
#if PAN_ARCH == 10
case MALI_CS_OPCODE_RUN_TILING: { case MALI_CS_OPCODE_RUN_TILING: {
cs_unpack(bytes, CS_RUN_TILING, I); cs_unpack(bytes, CS_RUN_TILING, I);
pandecode_run_tiling(ctx, fp, qctx, &I); pandecode_run_tiling(ctx, fp, qctx, &I);
break; break;
} }
#endif
#if PAN_ARCH >= 12
case MALI_CS_OPCODE_RUN_IDVS2: {
cs_unpack(bytes, CS_RUN_IDVS2, I);
pandecode_run_idvs2(ctx, fp, qctx, &I);
break;
}
#else
case MALI_CS_OPCODE_RUN_IDVS: { case MALI_CS_OPCODE_RUN_IDVS: {
cs_unpack(bytes, CS_RUN_IDVS, I); cs_unpack(bytes, CS_RUN_IDVS, I);
pandecode_run_idvs(ctx, fp, qctx, &I); pandecode_run_idvs(ctx, fp, qctx, &I);
break; break;
} }
#endif
case MALI_CS_OPCODE_RUN_FRAGMENT: { case MALI_CS_OPCODE_RUN_FRAGMENT: {
cs_unpack(bytes, CS_RUN_FRAGMENT, I); cs_unpack(bytes, CS_RUN_FRAGMENT, I);
@ -971,6 +1242,38 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx)
break; break;
} }
#if PAN_ARCH >= 11
case MALI_CS_OPCODE_LOGIC_OP32: {
cs_unpack(bytes, CS_LOGIC_OP32, I);
uint32_t *dest = &qctx->regs[I.destination];
uint32_t source_0 = qctx->regs[I.source_0];
uint32_t source_1 = qctx->regs[I.source_1];
uint32_t mode_0 = I.mode & 1;
uint32_t mode_1 = (I.mode >> 1) & 1;
uint32_t mode_2 = (I.mode >> 2) & 1;
uint32_t mode_3 = (I.mode >> 3) & 1;
if (I.index == MALI_CS_LOGIC_OP_INDEX_INDEX)
source_1 = (1 << source_1);
uint32_t result = 0;
for (int i = 0; i < 32; i++) {
uint32_t a_n = (source_0 >> i) & 1;
uint32_t b_n = (source_1 >> i) & 1;
uint32_t tmp = 0;
tmp |= mode_0 & a_n & b_n;
tmp |= mode_1 & a_n & ~b_n;
tmp |= mode_2 & ~a_n & b_n;
tmp |= mode_3 & ~a_n & ~b_n;
result |= tmp << i;
}
*dest = result;
break;
}
#endif
case MALI_CS_OPCODE_ADD_IMMEDIATE32: { case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
cs_unpack(bytes, CS_ADD_IMM32, I); cs_unpack(bytes, CS_ADD_IMM32, I);
@ -1089,11 +1392,9 @@ GENX(pandecode_interpret_cs)(struct pandecode_context *ctx, uint64_t queue,
uint64_t *cs = pandecode_fetch_gpu_mem(ctx, queue, size); uint64_t *cs = pandecode_fetch_gpu_mem(ctx, queue, size);
/* Mali-G610 has 96 registers. Other devices not yet supported, we can make /* v10 has 96 registers. v12+ have 128. */
* this configurable later when we encounter new Malis.
*/
struct queue_ctx qctx = { struct queue_ctx qctx = {
.nr_regs = 96, .nr_regs = PAN_ARCH >= 12 ? 96 : 128,
.regs = regs, .regs = regs,
.ip = cs, .ip = cs,
.end = cs + (size / 8), .end = cs + (size / 8),
@ -1200,6 +1501,39 @@ record_indirect_branch_target(struct cs_code_cfg *cfg,
break; break;
} }
#if PAN_ARCH >= 11
case MALI_CS_OPCODE_LOGIC_OP32: {
cs_unpack(instr, CS_LOGIC_OP32, I);
uint32_t *dest = &reg_file.u32[I.destination];
uint32_t source_0 = reg_file.u32[I.source_0];
uint32_t source_1 = reg_file.u32[I.source_1];
uint32_t mode_0 = I.mode & 1;
uint32_t mode_1 = (I.mode >> 1) & 1;
uint32_t mode_2 = (I.mode >> 2) & 1;
uint32_t mode_3 = (I.mode >> 3) & 1;
if (I.index == MALI_CS_LOGIC_OP_INDEX_INDEX)
source_1 = (1 << source_1);
uint32_t result = 0;
for (int i = 0; i < 32; i++) {
uint32_t a_n = (source_0 >> i) & 1;
uint32_t b_n = (source_1 >> i) & 1;
uint32_t tmp = 0;
tmp |= mode_0 & a_n & b_n;
tmp |= mode_1 & a_n & ~b_n;
tmp |= mode_2 & ~a_n & b_n;
tmp |= mode_3 & ~a_n & ~b_n;
result |= tmp << i;
}
*dest = result;
break;
}
#endif
case MALI_CS_OPCODE_ADD_IMMEDIATE32: { case MALI_CS_OPCODE_ADD_IMMEDIATE32: {
cs_unpack(instr, CS_ADD_IMM32, I); cs_unpack(instr, CS_ADD_IMM32, I);
reg_file.u32[I.destination] = reg_file.u32[I.source] + I.immediate; reg_file.u32[I.destination] = reg_file.u32[I.source] + I.immediate;
@ -1551,7 +1885,11 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin,
break; break;
} }
#if PAN_ARCH >= 12
case MALI_CS_OPCODE_RUN_IDVS2:
#else
case MALI_CS_OPCODE_RUN_IDVS: case MALI_CS_OPCODE_RUN_IDVS:
#endif
case MALI_CS_OPCODE_RUN_FRAGMENT: case MALI_CS_OPCODE_RUN_FRAGMENT:
case MALI_CS_OPCODE_RUN_COMPUTE: case MALI_CS_OPCODE_RUN_COMPUTE:
case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT:
@ -1610,11 +1948,9 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
uint64_t *instr = pandecode_fetch_gpu_mem(ctx, *ip, sizeof(*instr)); uint64_t *instr = pandecode_fetch_gpu_mem(ctx, *ip, sizeof(*instr));
/* Mali-G610 has 96 registers. Other devices not yet supported, we can /* v10 has 96 registers. v12+ have 128. */
* make this configurable later when we encounter new Malis.
*/
struct queue_ctx qctx = { struct queue_ctx qctx = {
.nr_regs = 96, .nr_regs = PAN_ARCH >= 12 ? 96 : 128,
.regs = regs, .regs = regs,
.ip = instr, .ip = instr,
.end = instr + 1, .end = instr + 1,
@ -1628,6 +1964,23 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
cs_unpack(instr, CS_BASE, base); cs_unpack(instr, CS_BASE, base);
switch (base.opcode) { switch (base.opcode) {
#if PAN_ARCH >= 12
case MALI_CS_OPCODE_RUN_IDVS2: {
struct cs_run_idvs2_trace *idvs_trace = trace_data;
assert(trace_size >= sizeof(idvs_trace));
cs_unpack(instr, CS_RUN_IDVS2, I);
memcpy(regs, idvs_trace->sr, sizeof(idvs_trace->sr));
if (I.draw_id_register_enable)
regs[I.draw_id] = idvs_trace->draw_id;
pandecode_run_idvs2(ctx, ctx->dump_stream, &qctx, &I);
trace_data = idvs_trace + 1;
trace_size -= sizeof(*idvs_trace);
break;
}
#else
case MALI_CS_OPCODE_RUN_IDVS: { case MALI_CS_OPCODE_RUN_IDVS: {
struct cs_run_idvs_trace *idvs_trace = trace_data; struct cs_run_idvs_trace *idvs_trace = trace_data;
@ -1643,6 +1996,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace,
trace_size -= sizeof(*idvs_trace); trace_size -= sizeof(*idvs_trace);
break; break;
} }
#endif
case MALI_CS_OPCODE_RUN_FRAGMENT: { case MALI_CS_OPCODE_RUN_FRAGMENT: {
struct cs_run_fragment_trace *frag_trace = trace_data; struct cs_run_fragment_trace *frag_trace = trace_data;

View file

@ -20,7 +20,7 @@ idep_pan_packers = declare_dependency(
libpanfrost_decode_per_arch = [] libpanfrost_decode_per_arch = []
foreach ver : ['4', '5', '6', '7', '9', '10'] foreach ver : ['4', '5', '6', '7', '9', '10', '12']
libpanfrost_decode_per_arch += static_library( libpanfrost_decode_per_arch += static_library(
'pandecode-arch-v' + ver, 'pandecode-arch-v' + ver,
['decode.c', 'decode_jm.c', 'decode_csf.c', pan_packers], ['decode.c', 'decode_jm.c', 'decode_csf.c', pan_packers],

View file

@ -67,6 +67,8 @@ extern const struct pan_blendable_format
panfrost_blendable_formats_v9[PIPE_FORMAT_COUNT]; panfrost_blendable_formats_v9[PIPE_FORMAT_COUNT];
extern const struct pan_blendable_format extern const struct pan_blendable_format
panfrost_blendable_formats_v10[PIPE_FORMAT_COUNT]; panfrost_blendable_formats_v10[PIPE_FORMAT_COUNT];
extern const struct pan_blendable_format
panfrost_blendable_formats_v12[PIPE_FORMAT_COUNT];
static inline const struct pan_blendable_format * static inline const struct pan_blendable_format *
panfrost_blendable_format_table(unsigned arch) panfrost_blendable_format_table(unsigned arch)
@ -92,6 +94,7 @@ extern const struct panfrost_format panfrost_pipe_format_v6[PIPE_FORMAT_COUNT];
extern const struct panfrost_format panfrost_pipe_format_v7[PIPE_FORMAT_COUNT]; extern const struct panfrost_format panfrost_pipe_format_v7[PIPE_FORMAT_COUNT];
extern const struct panfrost_format panfrost_pipe_format_v9[PIPE_FORMAT_COUNT]; extern const struct panfrost_format panfrost_pipe_format_v9[PIPE_FORMAT_COUNT];
extern const struct panfrost_format panfrost_pipe_format_v10[PIPE_FORMAT_COUNT]; extern const struct panfrost_format panfrost_pipe_format_v10[PIPE_FORMAT_COUNT];
extern const struct panfrost_format panfrost_pipe_format_v12[PIPE_FORMAT_COUNT];
static inline const struct panfrost_format * static inline const struct panfrost_format *
panfrost_format_table(unsigned arch) panfrost_format_table(unsigned arch)