diff --git a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c
index 13f923b09ba..1dc2908073f 100644
--- a/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c
+++ b/src/panfrost/vulkan/csf/panvk_vX_cmd_dispatch.c
@@ -20,6 +20,7 @@
 #include "panvk_cmd_push_constant.h"
 #include "panvk_device.h"
 #include "panvk_entrypoints.h"
+#include "panvk_macros.h"
 #include "panvk_meta.h"
 #include "panvk_physical_device.h"
 
@@ -128,28 +129,18 @@ calculate_workgroups_per_task(const struct panvk_shader *shader,
    return wg_per_task;
 }
 
-static void
-cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
+uint64_t
+panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf,
+                                         const struct panvk_shader *shader,
+                                         const struct pan_compute_dim *dim,
+                                         bool indirect)
 {
-   const struct panvk_shader *shader = cmdbuf->state.compute.shader;
-   VkResult result;
-
-   /* If there's no compute shader, we can skip the dispatch. */
-   if (!panvk_priv_mem_dev_addr(shader->spd))
-      return;
-
    struct panvk_physical_device *phys_dev =
       to_panvk_physical_device(cmdbuf->vk.base.device->physical);
-   struct panvk_descriptor_state *desc_state =
-      &cmdbuf->state.compute.desc_state;
-   struct panvk_shader_desc_state *cs_desc_state =
-      &cmdbuf->state.compute.cs.desc;
-   const struct cs_tracing_ctx *tracing_ctx =
-      &cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].tracing;
 
    struct panfrost_ptr tsd = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
    if (!tsd.gpu)
-      return;
+      return tsd.gpu;
 
    struct pan_tls_info tlsinfo = {
       .tls.size = shader->info.tls_size,
@@ -159,8 +150,6 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
    unsigned core_count =
       panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
 
-   bool indirect = info->indirect.buffer_dev_addr != 0;
-
    /* Only used for indirect dispatch */
    unsigned wg_per_task = 0;
    if (indirect)
@@ -180,13 +169,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
          /* TODO: Similar to what we are doing for indirect this should change
           * to calculate the maximum number of workgroups we can execute
           * concurrently. */
-         struct pan_compute_dim dim = {
-            info->direct.wg_count.x,
-            info->direct.wg_count.y,
-            info->direct.wg_count.z,
-         };
-
-         tlsinfo.wls.instances = pan_wls_instances(&dim);
+         tlsinfo.wls.instances = pan_wls_instances(dim);
       }
 
       /* TODO: Clamp WLS instance to some maximum WLS budget. */
@@ -201,7 +184,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
       tlsinfo.wls.ptr =
          panvk_cmd_alloc_dev_mem(cmdbuf, tls, wls_total_size, 4096).gpu;
       if (!tlsinfo.wls.ptr)
-         return;
+         return 0;
    }
 
    cmdbuf->state.tls.info.tls.size =
@@ -210,11 +193,50 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
    if (!cmdbuf->state.tls.desc.gpu) {
       cmdbuf->state.tls.desc = panvk_cmd_alloc_desc(cmdbuf, LOCAL_STORAGE);
       if (!cmdbuf->state.tls.desc.gpu)
-         return;
+         return 0;
    }
 
    GENX(pan_emit_tls)(&tlsinfo, tsd.cpu);
 
+   return tsd.gpu;
+}
+
+static void
+cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
+{
+   const struct panvk_shader *shader = cmdbuf->state.compute.shader;
+   VkResult result;
+
+   /* If there's no compute shader, we can skip the dispatch. */
+   if (!panvk_priv_mem_dev_addr(shader->spd))
+      return;
+
+   struct panvk_physical_device *phys_dev =
+      to_panvk_physical_device(cmdbuf->vk.base.device->physical);
+   struct panvk_descriptor_state *desc_state =
+      &cmdbuf->state.compute.desc_state;
+   struct panvk_shader_desc_state *cs_desc_state =
+      &cmdbuf->state.compute.cs.desc;
+   const struct cs_tracing_ctx *tracing_ctx =
+      &cmdbuf->state.cs[PANVK_SUBQUEUE_COMPUTE].tracing;
+
+   struct pan_compute_dim dim = {
+      info->direct.wg_count.x,
+      info->direct.wg_count.y,
+      info->direct.wg_count.z,
+   };
+   bool indirect = info->indirect.buffer_dev_addr != 0;
+
+   uint64_t tsd =
+      panvk_per_arch(cmd_dispatch_prepare_tls)(cmdbuf, shader, &dim, indirect);
+   if (!tsd)
+      return;
+
+   /* Only used for indirect dispatch */
+   unsigned wg_per_task = 0;
+   if (indirect)
+      wg_per_task = calculate_workgroups_per_task(shader, phys_dev);
+
    if (compute_state_dirty(cmdbuf, DESC_STATE) ||
        compute_state_dirty(cmdbuf, CS)) {
       result = panvk_per_arch(cmd_prepare_push_descs)(
@@ -245,11 +267,11 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
    struct cs_builder *b = panvk_get_cs_builder(cmdbuf, PANVK_SUBQUEUE_COMPUTE);
 
    /* Copy the global TLS pointer to the per-job TSD. */
-   if (tlsinfo.tls.size) {
+   if (shader->info.tls_size) {
       cs_move64_to(b, cs_scratch_reg64(b, 0), cmdbuf->state.tls.desc.gpu);
       cs_load64_to(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
       cs_wait_slot(b, SB_ID(LS), false);
-      cs_move64_to(b, cs_scratch_reg64(b, 0), tsd.gpu);
+      cs_move64_to(b, cs_scratch_reg64(b, 0), tsd);
       cs_store64(b, cs_scratch_reg64(b, 2), cs_scratch_reg64(b, 0), 8);
       cs_wait_slot(b, SB_ID(LS), false);
    }
@@ -269,7 +291,7 @@ cmd_dispatch(struct panvk_cmd_buffer *cmdbuf, struct panvk_dispatch_info *info)
          cs_move64_to(b, cs_sr_reg64(b, 16),
                       panvk_priv_mem_dev_addr(shader->spd));
 
-      cs_move64_to(b, cs_sr_reg64(b, 24), tsd.gpu);
+      cs_move64_to(b, cs_sr_reg64(b, 24), tsd);
 
       /* Global attribute offset */
       cs_move32_to(b, cs_sr_reg32(b, 32), 0);
diff --git a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c
index 180ae54996a..4cc70b911fe 100644
--- a/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c
+++ b/src/panfrost/vulkan/jm/panvk_vX_cmd_dispatch.c
@@ -16,6 +16,7 @@
 #include "panvk_cmd_desc_state.h"
 #include "panvk_device.h"
 #include "panvk_entrypoints.h"
+#include "panvk_macros.h"
 #include "panvk_meta.h"
 #include "panvk_physical_device.h"
 
@@ -26,6 +27,36 @@
 
 #include <vulkan/vulkan_core.h>
 
+uint64_t
+panvk_per_arch(cmd_dispatch_prepare_tls)(struct panvk_cmd_buffer *cmdbuf,
+                                         const struct panvk_shader *shader,
+                                         const struct pan_compute_dim *dim,
+                                         bool indirect)
+{
+   struct panvk_batch *batch = cmdbuf->cur_batch;
+
+   assert(batch);
+   assert(!indirect && "Indirect not supported yet!");
+
+   struct panvk_physical_device *phys_dev =
+      to_panvk_physical_device(cmdbuf->vk.base.device->physical);
+
+   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
+
+   batch->tlsinfo.tls.size = shader->info.tls_size;
+   batch->tlsinfo.wls.size = shader->info.wls_size;
+   if (batch->tlsinfo.wls.size) {
+      unsigned core_id_range;
+
+      panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
+      batch->tlsinfo.wls.instances = pan_wls_instances(dim);
+      batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) *
+                              batch->tlsinfo.wls.instances * core_id_range;
+   }
+
+   return batch->tls.gpu;
+}
+
 VKAPI_ATTR void VKAPI_CALL
 panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
                                 uint32_t baseGroupX, uint32_t baseGroupY,
@@ -47,9 +78,6 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
       .wg_base = {baseGroupX, baseGroupY, baseGroupZ},
       .direct.wg_count = {groupCountX, groupCountY, groupCountZ},
    };
-   struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
-   struct panvk_physical_device *phys_dev =
-      to_panvk_physical_device(dev->vk.physical);
    struct pan_compute_dim wg_count = {groupCountX, groupCountY, groupCountZ};
 
    panvk_per_arch(cmd_close_batch)(cmdbuf);
@@ -60,8 +88,8 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
    struct panvk_shader_desc_state *cs_desc_state =
       &cmdbuf->state.compute.cs.desc;
 
-   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
-   uint64_t tsd = batch->tls.gpu;
+   uint64_t tsd = panvk_per_arch(cmd_dispatch_prepare_tls)(
+      cmdbuf, shader, &wg_count, false);
 
    result = panvk_per_arch(cmd_prepare_push_descs)(
       cmdbuf, desc_state, shader->desc_info.used_set_mask);
@@ -138,17 +166,6 @@ panvk_per_arch(CmdDispatchBase)(VkCommandBuffer commandBuffer,
    pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
                   copy_desc_dep, &job, false);
 
-   batch->tlsinfo.tls.size = shader->info.tls_size;
-   batch->tlsinfo.wls.size = shader->info.wls_size;
-   if (batch->tlsinfo.wls.size) {
-      unsigned core_id_range;
-
-      panfrost_query_core_count(&phys_dev->kmod.props, &core_id_range);
-      batch->tlsinfo.wls.instances = pan_wls_instances(&wg_count);
-      batch->wls_total_size = pan_wls_adjust_size(batch->tlsinfo.wls.size) *
-                              batch->tlsinfo.wls.instances * core_id_range;
-   }
-
    panvk_per_arch(cmd_close_batch)(cmdbuf);
    clear_dirty_after_dispatch(cmdbuf);
 }
diff --git a/src/panfrost/vulkan/panvk_cmd_dispatch.h b/src/panfrost/vulkan/panvk_cmd_dispatch.h
index b697f11c1bf..9aafa4043f7 100644
--- a/src/panfrost/vulkan/panvk_cmd_dispatch.h
+++ b/src/panfrost/vulkan/panvk_cmd_dispatch.h
@@ -10,6 +10,8 @@
 #error "PAN_ARCH must be defined"
 #endif
 
+#include "pan_desc.h"
+
 enum panvk_cmd_compute_dirty_state {
    PANVK_CMD_COMPUTE_DIRTY_CS,
    PANVK_CMD_COMPUTE_DIRTY_DESC_STATE,
@@ -74,4 +76,8 @@ struct panvk_dispatch_info {
 void panvk_per_arch(cmd_prepare_dispatch_sysvals)(
    struct panvk_cmd_buffer *cmdbuf, const struct panvk_dispatch_info *info);
 
+uint64_t panvk_per_arch(cmd_dispatch_prepare_tls)(
+   struct panvk_cmd_buffer *cmdbuf, const struct panvk_shader *shader,
+   const struct pan_compute_dim *dim, bool indirect);
+
 #endif