diff --git a/src/panfrost/genxml/cs_builder.h b/src/panfrost/genxml/cs_builder.h index 537122eef5f..acdbb3a3e30 100644 --- a/src/panfrost/genxml/cs_builder.h +++ b/src/panfrost/genxml/cs_builder.h @@ -2512,6 +2512,69 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx, cs_flush_stores(b); } +#if PAN_ARCH >= 13 +#define CS_RUN_FULLSCREEN_SR_MASK \ + (BITFIELD64_RANGE(40, 4) | BITFIELD64_RANGE(56, 4) | BITFIELD64_RANGE(61, 3)) +#define CS_RUN_FULLSCREEN_SR_COUNT 11 +#elif PAN_ARCH >= 11 +#define CS_RUN_FULLSCREEN_SR_MASK \ + (BITFIELD64_RANGE(40, 4) | BITFIELD64_BIT(56) | BITFIELD64_RANGE(61, 3)) +#define CS_RUN_FULLSCREEN_SR_COUNT 8 +#else +#define CS_RUN_FULLSCREEN_SR_MASK \ + (BITFIELD64_RANGE(40, 4) | BITFIELD64_BIT(56)) +#define CS_RUN_FULLSCREEN_SR_COUNT 5 +#endif + +struct cs_run_fullscreen_trace { + uint64_t ip; + uint64_t dcd; + uint32_t sr[CS_RUN_FULLSCREEN_SR_COUNT]; +} __attribute__((aligned(64))); + +static inline void +cs_trace_run_fullscreen(struct cs_builder *b, const struct cs_tracing_ctx *ctx, + struct cs_index scratch_regs, uint32_t flags_override, + struct cs_index dcd) +{ + if (likely(!ctx->enabled)) { + cs_run_fullscreen(b, flags_override, dcd); + return; + } + + struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg); + struct cs_index data = cs_reg64(b, scratch_regs.reg + 2); + + cs_trace_preamble(b, ctx, scratch_regs, + sizeof(struct cs_run_fullscreen_trace)); + + /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP + * won't point to the right instruction. */ + cs_load_ip_to(b, data); + cs_run_fullscreen(b, flags_override, dcd); + cs_store64(b, data, tracebuf_addr, + cs_trace_field_offset(run_fullscreen, ip)); + + cs_store64(b, dcd, tracebuf_addr, + cs_trace_field_offset(run_fullscreen, dcd)); + + ASSERTED unsigned sr_count = 0; + unsigned sr_offset = cs_trace_field_offset(run_fullscreen, sr); + for (unsigned i = 0; i < 64; i += 16) { + unsigned mask = (CS_RUN_FULLSCREEN_SR_MASK >> i) & BITFIELD_MASK(16); + if (!mask) + continue; + + cs_store(b, cs_reg_tuple(b, i, util_last_bit(mask)), + tracebuf_addr, mask, sr_offset); + sr_offset += util_bitcount(mask) * sizeof(uint32_t); + sr_count += util_bitcount(mask); + } + assert(sr_count == CS_RUN_FULLSCREEN_SR_COUNT); + + cs_flush_stores(b); +} + #if PAN_ARCH >= 12 struct cs_run_idvs2_trace { uint64_t ip; diff --git a/src/panfrost/genxml/decode_csf.c b/src/panfrost/genxml/decode_csf.c index 453ea850b78..0b7dce24982 100644 --- a/src/panfrost/genxml/decode_csf.c +++ b/src/panfrost/genxml/decode_csf.c @@ -2450,6 +2450,7 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin, case MALI_CS_OPCODE_RUN_IDVS: #endif case MALI_CS_OPCODE_RUN_FRAGMENT: + case MALI_CS_OPCODE_RUN_FULLSCREEN: case MALI_CS_OPCODE_RUN_COMPUTE: case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: fprintf(ctx->dump_stream, " // tracepoint_%" PRIx64, @@ -2569,6 +2570,22 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace, break; } + case MALI_CS_OPCODE_RUN_FULLSCREEN: { + struct cs_run_fullscreen_trace *fs_trace = trace_data; + + assert(trace_size >= sizeof(*fs_trace)); + cs_unpack(instr, CS_RUN_FULLSCREEN, I); + regs[I.dcd + 0] = (uint32_t)(fs_trace->dcd); + regs[I.dcd + 1] = (uint32_t)(fs_trace->dcd >> 32); + uint32_t sr_idx = 0; + u_foreach_bit64(b, CS_RUN_FULLSCREEN_SR_MASK) + regs[b] = fs_trace->sr[sr_idx++]; + pandecode_run_fullscreen(ctx, ctx->dump_stream, &qctx, &I); + trace_data = fs_trace + 1; + trace_size -= sizeof(*fs_trace); + break; + } + case MALI_CS_OPCODE_RUN_COMPUTE: { struct cs_run_compute_trace *comp_trace = trace_data;