diff --git a/src/asahi/genxml/cmdbuf.xml b/src/asahi/genxml/cmdbuf.xml index 7e07d54e132..387420a1034 100644 --- a/src/asahi/genxml/cmdbuf.xml +++ b/src/asahi/genxml/cmdbuf.xml @@ -1010,14 +1010,17 @@ - + - + + + + diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index 1689f2e959c..35055450d67 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -665,7 +665,8 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link, switch (block_type) { case AGX_CDM_BLOCK_TYPE_LAUNCH: { - size_t length = AGX_CDM_LAUNCH_LENGTH; + size_t length = + AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH; #define CDM_PRINT(STRUCT_NAME, human) \ do { \ @@ -674,17 +675,20 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link, length += AGX_CDM_##STRUCT_NAME##_LENGTH; \ } while (0); - agx_unpack(agxdecode_dump_stream, map, CDM_LAUNCH, hdr); - agxdecode_stateful(ctx, hdr.pipeline, "Pipeline", agxdecode_usc, verbose, - params, &hdr.sampler_state_register_count); - DUMP_UNPACKED(CDM_LAUNCH, hdr, "Compute\n"); - map += AGX_CDM_LAUNCH_LENGTH; + agx_unpack(agxdecode_dump_stream, map + 0, CDM_LAUNCH_WORD_0, hdr0); + agx_unpack(agxdecode_dump_stream, map + 4, CDM_LAUNCH_WORD_1, hdr1); + + agxdecode_stateful(ctx, hdr1.pipeline, "Pipeline", agxdecode_usc, verbose, + params, &hdr0.sampler_state_register_count); + DUMP_UNPACKED(CDM_LAUNCH_WORD_0, hdr0, "Compute\n"); + DUMP_UNPACKED(CDM_LAUNCH_WORD_1, hdr1, "Compute\n"); + map += 8; /* Added in G14X */ if (params->gpu_generation >= 14 && params->num_clusters_total > 1) CDM_PRINT(UNK_G14X, "Unknown G14X"); - switch (hdr.mode) { + switch (hdr0.mode) { case AGX_CDM_MODE_DIRECT: CDM_PRINT(GLOBAL_SIZE, "Global size"); CDM_PRINT(LOCAL_SIZE, "Local size"); @@ -697,7 +701,7 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link, CDM_PRINT(INDIRECT, "Indirect buffer"); break; default: - fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr.mode); + fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr0.mode); break; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 494cc2a7532..d41301a1410 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -4037,9 +4037,10 @@ agx_launch_gs_prerast(struct agx_batch *batch, agx_ensure_cmdbuf_has_space( batch, &batch->cdm, - 8 * (AGX_CDM_LAUNCH_LENGTH + AGX_CDM_UNK_G14X_LENGTH + - AGX_CDM_INDIRECT_LENGTH + AGX_CDM_GLOBAL_SIZE_LENGTH + - AGX_CDM_LOCAL_SIZE_LENGTH + AGX_CDM_BARRIER_LENGTH)); + 8 * (AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH + + AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH + + AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH + + AGX_CDM_BARRIER_LENGTH)); assert(!info->primitive_restart && "should have been lowered"); @@ -5216,7 +5217,7 @@ agx_launch(struct agx_batch *batch, const struct pipe_grid_info *info, /* TODO: Ensure space if we allow multiple kernels in a batch */ uint8_t *out = batch->cdm.current; - agx_push(out, CDM_LAUNCH, cfg) { + agx_push(out, CDM_LAUNCH_WORD_0, cfg) { if (info->indirect) cfg.mode = AGX_CDM_MODE_INDIRECT_GLOBAL; else @@ -5227,6 +5228,9 @@ agx_launch(struct agx_batch *batch, const struct pipe_grid_info *info, cfg.texture_state_register_count = agx_nr_tex_descriptors(batch, cs); cfg.sampler_state_register_count = translate_sampler_state_count(ctx, cs, stage); + } + + agx_push(out, CDM_LAUNCH_WORD_1, cfg) { cfg.pipeline = agx_build_pipeline(batch, cs, linked, PIPE_SHADER_COMPUTE, info->variable_shared_mem, subgroups_per_core); @@ -5367,9 +5371,10 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) * in practice, we can use CDM stream links. */ size_t dispatch_upper_bound = - AGX_CDM_LAUNCH_LENGTH + AGX_CDM_UNK_G14X_LENGTH + - AGX_CDM_INDIRECT_LENGTH + AGX_CDM_GLOBAL_SIZE_LENGTH + - AGX_CDM_LOCAL_SIZE_LENGTH + AGX_CDM_BARRIER_LENGTH; + AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH + + AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH + + AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH + + AGX_CDM_BARRIER_LENGTH; if (batch->cdm.current + dispatch_upper_bound >= batch->cdm.end) agx_flush_batch_for_reason(ctx, batch, "CDM overfull");