From 8c744c5dc0464b55a7680d48b20d51152243949f Mon Sep 17 00:00:00 2001 From: Marc Alcala Prieto Date: Fri, 17 Apr 2026 11:16:05 +0200 Subject: [PATCH] pan/genxml: Implement RUN_FRAGMENT2 Add support for emitting and decoding RUN_FRAGMENT2 instructions. Some existing decoding logic from decode.c is modified to be reusable by the new RUN_FRAGMENT2 decoding logic. Reviewed-by: Lars-Ivar Hesselberg Simonsen --- src/panfrost/genxml/cs_builder.h | 69 +++++++++++++++ src/panfrost/genxml/decode.c | 111 +++++++++++++----------- src/panfrost/genxml/decode.h | 18 ++++ src/panfrost/genxml/decode_csf.c | 143 +++++++++++++++++++++++++++++++ 4 files changed, 293 insertions(+), 48 deletions(-) diff --git a/src/panfrost/genxml/cs_builder.h b/src/panfrost/genxml/cs_builder.h index a109f4d113b..ae0653a1f84 100644 --- a/src/panfrost/genxml/cs_builder.h +++ b/src/panfrost/genxml/cs_builder.h @@ -824,7 +824,11 @@ cs_instr_is_asynchronous(enum mali_cs_opcode opcode, uint16_t wait_mask) case MALI_CS_OPCODE_STORE_MULTIPLE: case MALI_CS_OPCODE_RUN_COMPUTE: case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: +#if PAN_ARCH >= 14 + case MALI_CS_OPCODE_RUN_FRAGMENT2: +#else case MALI_CS_OPCODE_RUN_FRAGMENT: +#endif case MALI_CS_OPCODE_RUN_FULLSCREEN: #if PAN_ARCH >= 12 case MALI_CS_OPCODE_RUN_IDVS2: @@ -1614,6 +1618,22 @@ cs_run_idvs(struct cs_builder *b, uint32_t flags_override, bool malloc_enable, } #endif +#if PAN_ARCH >= 14 +static inline void +cs_run_fragment2(struct cs_builder *b, bool enable_tem, + enum mali_tile_render_order tile_order) +{ + /* Staging regs */ + cs_flush_loads(b); + + b->req_resource_mask |= CS_FRAG_RES; + + cs_emit(b, RUN_FRAGMENT2, I) { + I.enable_tem = enable_tem; + I.tile_order = tile_order; + } +} +#else static inline void cs_run_fragment(struct cs_builder *b, bool enable_tem, enum mali_tile_render_order tile_order) @@ -1628,6 +1648,7 @@ cs_run_fragment(struct cs_builder *b, bool enable_tem, I.tile_order = tile_order; } } +#endif static inline void cs_run_fullscreen(struct cs_builder *b, uint32_t flags_override, @@ -2469,6 +2490,53 @@ cs_trace_preamble(struct cs_builder *b, const struct cs_tracing_ctx *ctx, (int16_t)(offsetof(struct cs_##__type##_trace, __field) - \ sizeof(struct cs_##__type##_trace)) +#if PAN_ARCH >= 14 +#define CS_RUN_FRAGMENT2_SR_COUNT 56 +#define CS_RUN_FRAGMENT2_SR_MASK BITFIELD64_RANGE(0, CS_RUN_FRAGMENT2_SR_COUNT) +struct cs_run_fragment2_trace { + uint64_t ip; + uint32_t sr[CS_RUN_FRAGMENT2_SR_COUNT]; +} __attribute__((aligned(64))); + +static inline void +cs_trace_run_fragment2(struct cs_builder *b, const struct cs_tracing_ctx *ctx, + struct cs_index scratch_regs, bool enable_tem, + enum mali_tile_render_order tile_order) +{ + if (likely(!ctx->enabled)) { + cs_run_fragment2(b, enable_tem, tile_order); + return; + } + + struct cs_index tracebuf_addr = cs_reg64(b, scratch_regs.reg); + struct cs_index data = cs_reg64(b, scratch_regs.reg + 2); + + cs_trace_preamble(b, ctx, scratch_regs, + sizeof(struct cs_run_fragment2_trace)); + + /* cs_run_xx() must immediately follow cs_load_ip_to() otherwise the IP + * won't point to the right instruction. */ + cs_load_ip_to(b, data); + cs_run_fragment2(b, enable_tem, tile_order); + cs_store64(b, data, tracebuf_addr, cs_trace_field_offset(run_fragment2, ip)); + + ASSERTED unsigned sr_count = 0; + unsigned sr_offset = cs_trace_field_offset(run_fragment2, sr); + for (unsigned i = 0; i < CS_RUN_FRAGMENT2_SR_COUNT; i += 16) { + unsigned mask = (CS_RUN_FRAGMENT2_SR_MASK >> i) & BITFIELD_MASK(16); + if (!mask) + continue; + + cs_store(b, cs_reg_tuple(b, i, util_last_bit(mask)), tracebuf_addr, mask, + sr_offset); + sr_offset += util_bitcount(mask) * sizeof(uint32_t); + sr_count += util_bitcount(mask); + } + assert(sr_count == CS_RUN_FRAGMENT2_SR_COUNT); + + cs_flush_stores(b); +} +#else struct cs_run_fragment_trace { uint64_t ip; uint32_t sr[7]; @@ -2500,6 +2568,7 @@ cs_trace_run_fragment(struct cs_builder *b, const struct cs_tracing_ctx *ctx, cs_trace_field_offset(run_fragment, sr)); cs_flush_stores(b); } +#endif #if PAN_ARCH >= 13 #define CS_RUN_FULLSCREEN_SR_MASK \ diff --git a/src/panfrost/genxml/decode.c b/src/panfrost/genxml/decode.c index 38a2e696e4d..bda0431d33e 100644 --- a/src/panfrost/genxml/decode.c +++ b/src/panfrost/genxml/decode.c @@ -152,22 +152,22 @@ pandecode_rt(struct pandecode_context *ctx, unsigned index, uint64_t gpu_va) } -static void -pandecode_rts(struct pandecode_context *ctx, uint64_t gpu_va, - const struct MALI_FRAMEBUFFER_PARAMETERS *fb) +void +GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va, + uint32_t render_target_count) { pandecode_log(ctx, "Color Render Targets @%" PRIx64 ":\n", gpu_va); ctx->indent++; - for (int i = 0; i < (fb->render_target_count); i++) + for (int i = 0; i < render_target_count; i++) pandecode_rt(ctx, i, gpu_va); ctx->indent--; pandecode_log(ctx, "\n"); } -static void -pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va) +void +GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va) { const struct mali_zs_crc_extension_packed *PANDECODE_PTR_VAR( ctx, zs_crc_packed, (uint64_t)gpu_va); @@ -223,22 +223,65 @@ pandecode_zs_crc_ext(struct pandecode_context *ctx, uint64_t gpu_va) #if PAN_ARCH >= 6 -static void -pandecode_sample_locations(struct pandecode_context *ctx, const void *fb) +void +GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx, + uint64_t dcd_pointer, unsigned pre_frame_0, + unsigned pre_frame_1, unsigned post_frame, + unsigned job_type_param, uint64_t gpu_id) { - pan_section_unpack(fb, FRAMEBUFFER, PARAMETERS, params); + const unsigned dcd_size = pan_size(DRAW); - const uint16_t *PANDECODE_PTR_VAR(ctx, samples, params.sample_locations); + if (pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { + const struct mali_draw_packed *PANDECODE_PTR_VAR( + ctx, dcd, dcd_pointer + (0 * dcd_size)); + pan_unpack(dcd, DRAW, draw) + ; + pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n", dcd_pointer, + pre_frame_0); + ctx->indent++; + GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); + ctx->indent--; + } - pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n", - params.sample_locations); + if (pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { + const struct mali_draw_packed *PANDECODE_PTR_VAR( + ctx, dcd, dcd_pointer + (1 * dcd_size)); + pan_unpack(dcd, DRAW, draw) + ; + pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n", + dcd_pointer + (1 * dcd_size)); + ctx->indent++; + GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); + ctx->indent--; + } + + if (post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { + const struct mali_draw_packed *PANDECODE_PTR_VAR( + ctx, dcd, dcd_pointer + (2 * dcd_size)); + pan_unpack(dcd, DRAW, draw) + ; + pandecode_log(ctx, "Post frame:\n"); + ctx->indent++; + GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); + ctx->indent--; + } +} + +void +GENX(pandecode_sample_locations)(struct pandecode_context *ctx, + uint64_t sample_locations) +{ + const uint16_t *PANDECODE_PTR_VAR(ctx, samples, sample_locations); + + pandecode_log(ctx, "Sample locations @%" PRIx64 ":\n", sample_locations); for (int i = 0; i < 33; i++) { pandecode_log(ctx, " (%d, %d),\n", samples[2 * i] - 128, samples[2 * i + 1] - 128); } } -#endif +#endif /* PAN_ARCH >= 6 */ +#if PAN_ARCH < 14 struct pandecode_fbd GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, bool is_fragment, uint64_t gpu_id) @@ -248,46 +291,17 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, DUMP_UNPACKED(ctx, FRAMEBUFFER_PARAMETERS, params, "Parameters:\n"); #if PAN_ARCH >= 6 - pandecode_sample_locations(ctx, fb); + GENX(pandecode_sample_locations)(ctx, params.sample_locations); - unsigned dcd_size = pan_size(DRAW); unsigned job_type_param = 0; #if PAN_ARCH <= 9 job_type_param = MALI_JOB_TYPE_FRAGMENT; #endif - if (params.pre_frame_0 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const struct mali_draw_packed *PANDECODE_PTR_VAR( - ctx, dcd, params.frame_shader_dcds + (0 * dcd_size)); - pan_unpack(dcd, DRAW, draw); - pandecode_log(ctx, "Pre frame 0 @%" PRIx64 " (mode=%d):\n", - params.frame_shader_dcds, params.pre_frame_0); - ctx->indent++; - GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); - ctx->indent--; - } - - if (params.pre_frame_1 != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const struct mali_draw_packed *PANDECODE_PTR_VAR( - ctx, dcd, params.frame_shader_dcds + (1 * dcd_size)); - pan_unpack(dcd, DRAW, draw); - pandecode_log(ctx, "Pre frame 1 @%" PRIx64 ":\n", - params.frame_shader_dcds + (1 * dcd_size)); - ctx->indent++; - GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); - ctx->indent--; - } - - if (params.post_frame != MALI_PRE_POST_FRAME_SHADER_MODE_NEVER) { - const struct mali_draw_packed *PANDECODE_PTR_VAR( - ctx, dcd, params.frame_shader_dcds + (2 * dcd_size)); - pan_unpack(dcd, DRAW, draw); - pandecode_log(ctx, "Post frame:\n"); - ctx->indent++; - GENX(pandecode_dcd)(ctx, &draw, job_type_param, gpu_id); - ctx->indent--; - } + GENX(pandecode_frame_shader_dcds) + (ctx, params.frame_shader_dcds, params.pre_frame_0, params.pre_frame_1, + params.post_frame, job_type_param, gpu_id); #else DUMP_SECTION(ctx, FRAMEBUFFER, LOCAL_STORAGE, fb, "Local Storage:\n"); @@ -312,13 +326,13 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, gpu_va += pan_size(FRAMEBUFFER); if (params.has_zs_crc_extension) { - pandecode_zs_crc_ext(ctx, gpu_va); + GENX(pandecode_zs_crc_ext)(ctx, gpu_va); gpu_va += pan_size(ZS_CRC_EXTENSION); } if (is_fragment) - pandecode_rts(ctx, gpu_va, ¶ms); + GENX(pandecode_rts)(ctx, gpu_va, params.render_target_count); return (struct pandecode_fbd){ .rt_count = params.render_target_count, @@ -336,6 +350,7 @@ GENX(pandecode_fbd)(struct pandecode_context *ctx, uint64_t gpu_va, }; #endif } +#endif /* PAN_ARCH < 14 */ #if PAN_ARCH >= 5 uint64_t diff --git a/src/panfrost/genxml/decode.h b/src/panfrost/genxml/decode.h index f7d83ca5525..bc9f743f9b7 100644 --- a/src/panfrost/genxml/decode.h +++ b/src/panfrost/genxml/decode.h @@ -275,4 +275,22 @@ void GENX(pandecode_depth_stencil)(struct pandecode_context *ctx, #endif +#if PAN_ARCH >= 6 +void GENX(pandecode_sample_locations)(struct pandecode_context *ctx, + uint64_t sample_locations); + +void + GENX(pandecode_frame_shader_dcds)(struct pandecode_context *ctx, + uint64_t dcd_pointer, unsigned pre_frame_0, + unsigned pre_frame_1, unsigned post_frame, + unsigned job_type_param, uint64_t gpu_id); +#endif + +#if PAN_ARCH >= 5 +void GENX(pandecode_rts)(struct pandecode_context *ctx, uint64_t gpu_va, + uint32_t render_target_count); + +void GENX(pandecode_zs_crc_ext)(struct pandecode_context *ctx, uint64_t gpu_va); +#endif + #endif /* __MMAP_TRACE_H__ */ diff --git a/src/panfrost/genxml/decode_csf.c b/src/panfrost/genxml/decode_csf.c index ca3b4807950..efb8be00544 100644 --- a/src/panfrost/genxml/decode_csf.c +++ b/src/panfrost/genxml/decode_csf.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2022-2023 Collabora, Ltd. + * Copyright (C) 2026 Arm Ltd. * SPDX-License-Identifier: MIT */ @@ -343,6 +344,23 @@ print_cs_instr(FILE *fp, const uint64_t *instr) } #endif +#if PAN_ARCH >= 14 + case MALI_CS_OPCODE_RUN_FRAGMENT2: { + static const char *tile_order[] = { + "zorder", "horizontal", "vertical", "unknown", + "unknown", "rev_horizontal", "rev_vertical", "unknown", + "unknown", "unknown", "unknown", "unknown", + "unknown", "unknown", "unknown", "unknown", + }; + + cs_unpack(instr, CS_RUN_FRAGMENT2, I); + + fprintf(fp, "RUN_FRAGMENT2%s.tile_order=%s", + I.enable_tem ? ".tile_enable_map_enable" : "", + tile_order[I.tile_order]); + break; + } +#else case MALI_CS_OPCODE_RUN_FRAGMENT: { static const char *tile_order[] = { "zorder", "horizontal", "vertical", "unknown", @@ -350,6 +368,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr) "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", }; + cs_unpack(instr, CS_RUN_FRAGMENT, I); fprintf(fp, "RUN_FRAGMENT%s%s.tile_order=%s", @@ -358,6 +377,7 @@ print_cs_instr(FILE *fp, const uint64_t *instr) tile_order[I.tile_order]); break; } +#endif case MALI_CS_OPCODE_RUN_FULLSCREEN: { cs_unpack(instr, CS_RUN_FULLSCREEN, I); @@ -1097,6 +1117,101 @@ pandecode_run_idvs(struct pandecode_context *ctx, FILE *fp, } #endif +#if PAN_ARCH >= 14 +static void +pandecode_run_fragment2(struct pandecode_context *ctx, FILE *fp, + struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT2 *I) +{ + if (qctx->in_exception_handler) + return; + + ctx->indent++; + + pandecode_log(ctx, "Iter trace ID0: %" PRIu32 "\n", + cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID0)); + pandecode_log(ctx, "Iter trace ID1: %" PRIu32 "\n", + cs_get_u32(qctx, MALI_FRAGMENT_SR_ITER_TRACE_ID1)); + pandecode_log(ctx, "TEM pointer: %" PRIx64 "\n", + cs_get_u64(qctx, MALI_FRAGMENT_SR_TEM_POINTER)); + pandecode_log(ctx, "TEM row stride: %" PRIu32 "\n", + cs_get_u32(qctx, MALI_FRAGMENT_SR_TEM_ROW_STRIDE)); + + for (unsigned i = 0; i < 11; ++i) { + const unsigned reg = MALI_FRAGMENT_SR_IRD_BUFFER_POINTER_0 + (i * 2); + pandecode_log(ctx, "IRD buffer pointer %u: %" PRIx64 "\n", i, + cs_get_u64(qctx, reg)); + } + + DUMP_CL(ctx, FRAGMENT_FLAGS_3, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_3], + "Flags 3:\n"); + DUMP_CL(ctx, FRAGMENT_BOUNDING_BOX, + &qctx->regs[MALI_FRAGMENT_SR_BOUNDING_BOX], "Bounding Box:\n"); + DUMP_CL(ctx, FRAME_SIZE, &qctx->regs[MALI_FRAGMENT_SR_FRAME_SIZE], + "Frame size:\n"); + + pan_unpack((const struct mali_fragment_flags_0_packed *)&qctx + ->regs[MALI_FRAGMENT_SR_FLAGS_0], + FRAGMENT_FLAGS_0, flags0_unpacked) + ; + DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_0, flags0_unpacked, "Flags 0:\n"); + + pan_unpack((const struct mali_fragment_flags_1_packed *)&qctx + ->regs[MALI_FRAGMENT_SR_FLAGS_1], + FRAGMENT_FLAGS_1, flags1_unpacked) + ; + DUMP_UNPACKED(ctx, FRAGMENT_FLAGS_1, flags1_unpacked, "Flags 1:\n"); + + DUMP_CL(ctx, FRAGMENT_FLAGS_2, &qctx->regs[MALI_FRAGMENT_SR_FLAGS_2], + "Flags 2:\n"); + pandecode_log(ctx, "Z clear: %f\n", + uif(cs_get_u32(qctx, MALI_FRAGMENT_SR_Z_CLEAR))); + + const uint64_t tiler_pointer = + cs_get_u64(qctx, MALI_FRAGMENT_SR_TILER_DESCRIPTOR_POINTER); + pandecode_log(ctx, "Tiler descriptor pointer: 0x%" PRIx64 "\n", + tiler_pointer); + + const uint64_t rtd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_RTD_POINTER); + pandecode_log(ctx, "RTD pointer: 0x%" PRIx64 "\n", rtd_pointer); + + const uint64_t dbd_pointer = cs_get_u64(qctx, MALI_FRAGMENT_SR_DBD_POINTER); + pandecode_log(ctx, "DBD pointer: 0x%" PRIx64 "\n", dbd_pointer); + + pandecode_log(ctx, "Frame argument: %" PRIx64 "\n", + cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_ARG)); + + const uint64_t sample_locations = + cs_get_u64(qctx, MALI_FRAGMENT_SR_SAMPLE_POSITION_ARRAY_POINTER); + pandecode_log(ctx, "Sample locations: 0x%" PRIx64 "\n", sample_locations); + + const uint64_t dcd_pointer = + cs_get_u64(qctx, MALI_FRAGMENT_SR_FRAME_SHADER_DCD_POINTER); + pandecode_log(ctx, "Frame shader DCD pointer: 0x%" PRIx64 "\n", dcd_pointer); + + DUMP_CL(ctx, VRS_IMAGE, &qctx->regs[MALI_FRAGMENT_SR_VRS_IMAGE], + "VRS image:\n"); + + GENX(pandecode_sample_locations) + (ctx, sample_locations); + + const unsigned job_type_param = 0; + GENX(pandecode_frame_shader_dcds) + (ctx, dcd_pointer, flags0_unpacked.pre_frame_0, flags0_unpacked.pre_frame_1, + flags0_unpacked.post_frame, job_type_param, qctx->gpu_id); + + if (tiler_pointer) + GENX(pandecode_tiler)(ctx, tiler_pointer); + + if (dbd_pointer) + GENX(pandecode_zs_crc_ext)(ctx, dbd_pointer); + + if (rtd_pointer) + GENX(pandecode_rts) + (ctx, rtd_pointer, flags1_unpacked.render_target_count); + + ctx->indent--; +} +#else static void pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp, struct queue_ctx *qctx, struct MALI_CS_RUN_FRAGMENT *I) @@ -1115,6 +1230,7 @@ pandecode_run_fragment(struct pandecode_context *ctx, FILE *fp, ctx->indent--; } +#endif /* PAN_ARCH >= 14 */ static void pandecode_run_fullscreen(struct pandecode_context *ctx, FILE *fp, @@ -1261,11 +1377,19 @@ interpret_cs_instr(struct pandecode_context *ctx, struct queue_ctx *qctx) } #endif +#if PAN_ARCH >= 14 + case MALI_CS_OPCODE_RUN_FRAGMENT2: { + cs_unpack(bytes, CS_RUN_FRAGMENT2, I); + pandecode_run_fragment2(ctx, fp, qctx, &I); + break; + } +#else case MALI_CS_OPCODE_RUN_FRAGMENT: { cs_unpack(bytes, CS_RUN_FRAGMENT, I); pandecode_run_fragment(ctx, fp, qctx, &I); break; } +#endif case MALI_CS_OPCODE_RUN_FULLSCREEN: { cs_unpack(bytes, CS_RUN_FULLSCREEN, I); @@ -2430,7 +2554,12 @@ print_cs_binary(struct pandecode_context *ctx, uint64_t bin, #else case MALI_CS_OPCODE_RUN_IDVS: #endif + +#if PAN_ARCH >= 14 + case MALI_CS_OPCODE_RUN_FRAGMENT2: +#else case MALI_CS_OPCODE_RUN_FRAGMENT: +#endif case MALI_CS_OPCODE_RUN_FULLSCREEN: case MALI_CS_OPCODE_RUN_COMPUTE: case MALI_CS_OPCODE_RUN_COMPUTE_INDIRECT: @@ -2539,6 +2668,19 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace, } #endif +#if PAN_ARCH >= 14 + case MALI_CS_OPCODE_RUN_FRAGMENT2: { + struct cs_run_fragment2_trace *frag_trace = trace_data; + + assert(trace_size >= sizeof(*frag_trace)); + cs_unpack(instr, CS_RUN_FRAGMENT2, I); + memcpy(®s[0], frag_trace->sr, sizeof(frag_trace->sr)); + pandecode_run_fragment2(ctx, ctx->dump_stream, &qctx, &I); + trace_data = frag_trace + 1; + trace_size -= sizeof(*frag_trace); + break; + } +#else case MALI_CS_OPCODE_RUN_FRAGMENT: { struct cs_run_fragment_trace *frag_trace = trace_data; @@ -2550,6 +2692,7 @@ GENX(pandecode_cs_trace)(struct pandecode_context *ctx, uint64_t trace, trace_size -= sizeof(*frag_trace); break; } +#endif case MALI_CS_OPCODE_RUN_FULLSCREEN: { struct cs_run_fullscreen_trace *fs_trace = trace_data;