diff --git a/src/asahi/genxml/cmdbuf.xml b/src/asahi/genxml/cmdbuf.xml
index 7e07d54e132..387420a1034 100644
--- a/src/asahi/genxml/cmdbuf.xml
+++ b/src/asahi/genxml/cmdbuf.xml
@@ -1010,14 +1010,17 @@
-
+
-
+
+
+
+
diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c
index 1689f2e959c..35055450d67 100644
--- a/src/asahi/lib/decode.c
+++ b/src/asahi/lib/decode.c
@@ -665,7 +665,8 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
switch (block_type) {
case AGX_CDM_BLOCK_TYPE_LAUNCH: {
- size_t length = AGX_CDM_LAUNCH_LENGTH;
+ size_t length =
+ AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH;
#define CDM_PRINT(STRUCT_NAME, human) \
do { \
@@ -674,17 +675,20 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
length += AGX_CDM_##STRUCT_NAME##_LENGTH; \
} while (0);
- agx_unpack(agxdecode_dump_stream, map, CDM_LAUNCH, hdr);
- agxdecode_stateful(ctx, hdr.pipeline, "Pipeline", agxdecode_usc, verbose,
- params, &hdr.sampler_state_register_count);
- DUMP_UNPACKED(CDM_LAUNCH, hdr, "Compute\n");
- map += AGX_CDM_LAUNCH_LENGTH;
+ agx_unpack(agxdecode_dump_stream, map + 0, CDM_LAUNCH_WORD_0, hdr0);
+ agx_unpack(agxdecode_dump_stream, map + 4, CDM_LAUNCH_WORD_1, hdr1);
+
+ agxdecode_stateful(ctx, hdr1.pipeline, "Pipeline", agxdecode_usc, verbose,
+ params, &hdr0.sampler_state_register_count);
+ DUMP_UNPACKED(CDM_LAUNCH_WORD_0, hdr0, "Compute\n");
+ DUMP_UNPACKED(CDM_LAUNCH_WORD_1, hdr1, "Compute\n");
+ map += 8;
/* Added in G14X */
if (params->gpu_generation >= 14 && params->num_clusters_total > 1)
CDM_PRINT(UNK_G14X, "Unknown G14X");
- switch (hdr.mode) {
+ switch (hdr0.mode) {
case AGX_CDM_MODE_DIRECT:
CDM_PRINT(GLOBAL_SIZE, "Global size");
CDM_PRINT(LOCAL_SIZE, "Local size");
@@ -697,7 +701,7 @@ agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
CDM_PRINT(INDIRECT, "Indirect buffer");
break;
default:
- fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr.mode);
+ fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr0.mode);
break;
}
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c
index 494cc2a7532..d41301a1410 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -4037,9 +4037,10 @@ agx_launch_gs_prerast(struct agx_batch *batch,
agx_ensure_cmdbuf_has_space(
batch, &batch->cdm,
- 8 * (AGX_CDM_LAUNCH_LENGTH + AGX_CDM_UNK_G14X_LENGTH +
- AGX_CDM_INDIRECT_LENGTH + AGX_CDM_GLOBAL_SIZE_LENGTH +
- AGX_CDM_LOCAL_SIZE_LENGTH + AGX_CDM_BARRIER_LENGTH));
+ 8 * (AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH +
+ AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH +
+ AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH +
+ AGX_CDM_BARRIER_LENGTH));
assert(!info->primitive_restart && "should have been lowered");
@@ -5216,7 +5217,7 @@ agx_launch(struct agx_batch *batch, const struct pipe_grid_info *info,
/* TODO: Ensure space if we allow multiple kernels in a batch */
uint8_t *out = batch->cdm.current;
- agx_push(out, CDM_LAUNCH, cfg) {
+ agx_push(out, CDM_LAUNCH_WORD_0, cfg) {
if (info->indirect)
cfg.mode = AGX_CDM_MODE_INDIRECT_GLOBAL;
else
@@ -5227,6 +5228,9 @@ agx_launch(struct agx_batch *batch, const struct pipe_grid_info *info,
cfg.texture_state_register_count = agx_nr_tex_descriptors(batch, cs);
cfg.sampler_state_register_count =
translate_sampler_state_count(ctx, cs, stage);
+ }
+
+ agx_push(out, CDM_LAUNCH_WORD_1, cfg) {
cfg.pipeline =
agx_build_pipeline(batch, cs, linked, PIPE_SHADER_COMPUTE,
info->variable_shared_mem, subgroups_per_core);
@@ -5367,9 +5371,10 @@ agx_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
* in practice, we can use CDM stream links.
*/
size_t dispatch_upper_bound =
- AGX_CDM_LAUNCH_LENGTH + AGX_CDM_UNK_G14X_LENGTH +
- AGX_CDM_INDIRECT_LENGTH + AGX_CDM_GLOBAL_SIZE_LENGTH +
- AGX_CDM_LOCAL_SIZE_LENGTH + AGX_CDM_BARRIER_LENGTH;
+ AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH +
+ AGX_CDM_UNK_G14X_LENGTH + AGX_CDM_INDIRECT_LENGTH +
+ AGX_CDM_GLOBAL_SIZE_LENGTH + AGX_CDM_LOCAL_SIZE_LENGTH +
+ AGX_CDM_BARRIER_LENGTH;
if (batch->cdm.current + dispatch_upper_bound >= batch->cdm.end)
agx_flush_batch_for_reason(ctx, batch, "CDM overfull");