diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build
index 071620bd95c..58fd5d10b75 100644
--- a/src/gallium/drivers/panfrost/meson.build
+++ b/src/gallium/drivers/panfrost/meson.build
@@ -58,9 +58,12 @@ panfrost_versions = ['4', '5', '6', '7', '9']
 libpanfrost_versions = []
 
 foreach ver : panfrost_versions
+  files_panfrost_vx = ['pan_cmdstream.c', pan_packers]
+  if ver in ['4', '5', '6', '7', '9']
+    files_panfrost_vx += ['pan_jm.c']
+  endif
   libpanfrost_versions += static_library(
-    'panfrost-v' + ver,
-    ['pan_cmdstream.c', pan_packers],
+    'panfrost-v' + ver, files_panfrost_vx,
     include_directories : panfrost_includes,
     c_args : ['-DPAN_ARCH=' + ver],
     gnu_symbol_visibility : 'hidden',
diff --git a/src/gallium/drivers/panfrost/pan_cmdstream.c b/src/gallium/drivers/panfrost/pan_cmdstream.c
index b6b030b71ab..202f2964bb0 100644
--- a/src/gallium/drivers/panfrost/pan_cmdstream.c
+++ b/src/gallium/drivers/panfrost/pan_cmdstream.c
@@ -24,8 +24,6 @@
  * SOFTWARE.
  */
 
-#include "drm-uapi/panfrost_drm.h"
-
 #include "gallium/auxiliary/util/u_blend.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
@@ -49,6 +47,7 @@
 #include "pan_cmdstream.h"
 #include "pan_context.h"
 #include "pan_indirect_dispatch.h"
+#include "pan_jm.h"
 #include "pan_job.h"
 #include "pan_pool.h"
 #include "pan_shader.h"
@@ -2429,25 +2428,6 @@ panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
    batch->varyings.vs = linkage->producer;
    batch->varyings.fs = linkage->consumer;
 }
-
-/*
- * Push jobs required for the rasterization pipeline. If there are side effects
- * from the vertex shader, these are handled ahead-of-time with a compute
- * shader. This function should not be called if rasterization is skipped.
- */
-static void
-jm_push_vertex_tiler_jobs(struct panfrost_batch *batch,
-                          const struct panfrost_ptr *vertex_job,
-                          const struct panfrost_ptr *tiler_job)
-{
-   unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
-                                      MALI_JOB_TYPE_VERTEX, false, false, 0, 0,
-                                      vertex_job, false);
-
-   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
-                    MALI_JOB_TYPE_TILER, false, false, vertex, 0, tiler_job,
-                    false);
-}
 #endif
 
 static void
@@ -2509,17 +2489,6 @@ panfrost_initialize_surface(struct panfrost_batch *batch,
    }
 }
 
-static void
-jm_emit_fragment_job(struct panfrost_batch *batch,
-                     const struct pan_fb_info *pfb)
-{
-   struct panfrost_ptr transfer =
-      pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB);
-
-   GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu, transfer.cpu);
-   batch->jm.jobs.frag = transfer.gpu;
-}
-
 /* Generate a fragment job. This should be called once per frame. (Usually,
  * this corresponds to eglSwapBuffers or one of glFlush, glFinish)
  */
@@ -2556,7 +2525,7 @@ emit_fragment_job(struct panfrost_batch *batch, const struct pan_fb_info *pfb)
    assert(batch->maxx > batch->minx);
    assert(batch->maxy > batch->miny);
 
-   jm_emit_fragment_job(batch, pfb);
+   GENX(jm_emit_fragment_job)(batch, pfb);
 }
 
 /* Count generated primitives (when there is no geom/tess shaders) for
@@ -2594,52 +2563,6 @@ panfrost_update_streamout_offsets(struct panfrost_context *ctx)
    }
 }
 
-#if PAN_ARCH <= 7
-static inline void
-jm_emit_draw_descs(struct panfrost_batch *batch, struct MALI_DRAW *d,
-                   enum pipe_shader_type st)
-{
-   d->offset_start = batch->ctx->offset_start;
-   d->instance_size =
-      batch->ctx->instance_count > 1 ? batch->ctx->padded_count : 1;
-
-   d->uniform_buffers = batch->uniform_buffers[st];
-   d->push_uniforms = batch->push_uniforms[st];
-   d->textures = batch->textures[st];
-   d->samplers = batch->samplers[st];
-}
-
-static void
-jm_emit_vertex_draw(struct panfrost_batch *batch, void *section)
-{
-   pan_pack(section, DRAW, cfg) {
-      cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
-      cfg.attributes = batch->attribs[PIPE_SHADER_VERTEX];
-      cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_VERTEX];
-      cfg.varyings = batch->varyings.vs;
-      cfg.varying_buffers = cfg.varyings ? batch->varyings.bufs : 0;
-      cfg.thread_storage = batch->tls.gpu;
-      jm_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
-   }
-}
-
-static void
-jm_emit_vertex_job(struct panfrost_batch *batch,
-                   const struct pipe_draw_info *info, void *invocation_template,
-                   void *job)
-{
-   void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
-   memcpy(section, invocation_template, pan_size(INVOCATION));
-
-   pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
-      cfg.job_task_split = 5;
-   }
-
-   section = pan_section_ptr(job, COMPUTE_JOB, DRAW);
-   jm_emit_vertex_draw(batch, section);
-}
-#endif
-
 /* On Bifrost and older, the Renderer State Descriptor aggregates many pieces of
  * 3D state. In particular, it groups the fragment shader descriptor with
  * depth/stencil, blend, polygon offset, and multisampling state. These pieces
@@ -2760,464 +2683,6 @@ panfrost_update_state_3d(struct panfrost_batch *batch)
 #endif
 }
 
-#if PAN_ARCH >= 6
-static mali_ptr
-jm_emit_tiler_desc(struct panfrost_batch *batch)
-{
-   struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
-
-   if (batch->tiler_ctx.bifrost)
-      return batch->tiler_ctx.bifrost;
-
-   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
-
-   pan_pack(t.cpu, TILER_HEAP, heap) {
-      heap.size = dev->tiler_heap->size;
-      heap.base = dev->tiler_heap->ptr.gpu;
-      heap.bottom = dev->tiler_heap->ptr.gpu;
-      heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
-   }
-
-   mali_ptr heap = t.gpu;
-   unsigned max_levels = dev->tiler_features.max_levels;
-   assert(max_levels >= 2);
-
-   t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
-   pan_pack(t.cpu, TILER_CONTEXT, tiler) {
-      /* TODO: Select hierarchy mask more effectively */
-      tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
-
-      /* For large framebuffers, disable the smallest bin size to
-       * avoid pathological tiler memory usage. Required to avoid OOM
-       * on dEQP-GLES31.functional.fbo.no_attachments.maximums.all on
-       * Mali-G57.
-       */
-      if (MAX2(batch->key.width, batch->key.height) >= 4096)
-         tiler.hierarchy_mask &= ~1;
-
-      tiler.fb_width = batch->key.width;
-      tiler.fb_height = batch->key.height;
-      tiler.heap = heap;
-      tiler.sample_pattern =
-         pan_sample_pattern(util_framebuffer_get_num_samples(&batch->key));
-#if PAN_ARCH >= 9
-      tiler.first_provoking_vertex =
-         pan_tristate_get(batch->first_provoking_vertex);
-#endif
-   }
-
-   batch->tiler_ctx.bifrost = t.gpu;
-   return batch->tiler_ctx.bifrost;
-}
-#endif
-
-/* Packs a primitive descriptor, mostly common between Midgard/Bifrost tiler
- * jobs and Valhall IDVS jobs
- */
-static void
-jm_emit_primitive(struct panfrost_batch *batch,
-                  const struct pipe_draw_info *info,
-                  const struct pipe_draw_start_count_bias *draw,
-                  bool secondary_shader, void *out)
-{
-   struct panfrost_context *ctx = batch->ctx;
-   UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
-
-   pan_pack(out, PRIMITIVE, cfg) {
-      cfg.draw_mode = pan_draw_mode(info->mode);
-      if (panfrost_writes_point_size(ctx))
-         cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
-
-#if PAN_ARCH <= 8
-      /* For line primitives, PRIMITIVE.first_provoking_vertex must
-       * be set to true and the provoking vertex is selected with
-       * DRAW.flat_shading_vertex.
-       */
-      if (u_reduced_prim(info->mode) == MESA_PRIM_LINES)
-         cfg.first_provoking_vertex = true;
-      else
-         cfg.first_provoking_vertex = rast->flatshade_first;
-
-      if (panfrost_is_implicit_prim_restart(info)) {
-         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
-      } else if (info->primitive_restart) {
-         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT;
-         cfg.primitive_restart_index = info->restart_index;
-      }
-
-      cfg.job_task_split = 6;
-#else
-      struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
-
-      cfg.allow_rotating_primitives = allow_rotating_primitives(fs, info);
-      cfg.primitive_restart = info->primitive_restart;
-
-      /* Non-fixed restart indices should have been lowered */
-      assert(!cfg.primitive_restart || panfrost_is_implicit_prim_restart(info));
-#endif
-
-      cfg.index_count = draw->count;
-      cfg.index_type = panfrost_translate_index_size(info->index_size);
-
-      if (PAN_ARCH >= 9) {
-         /* Base vertex offset on Valhall is used for both
-          * indexed and non-indexed draws, in a simple way for
-          * either. Handle both cases.
-          */
-         if (cfg.index_type)
-            cfg.base_vertex_offset = draw->index_bias;
-         else
-            cfg.base_vertex_offset = draw->start;
-
-         /* Indices are moved outside the primitive descriptor
-          * on Valhall, so we don't need to set that here
-          */
-      } else if (cfg.index_type) {
-         cfg.base_vertex_offset = draw->index_bias - ctx->offset_start;
-
-#if PAN_ARCH <= 7
-         cfg.indices = batch->indices;
-#endif
-      }
-
-#if PAN_ARCH >= 6
-      cfg.secondary_shader = secondary_shader;
-#endif
-   }
-}
-
-#if PAN_ARCH >= 9
-static void
-jm_emit_shader_env(struct panfrost_batch *batch,
-                   struct MALI_SHADER_ENVIRONMENT *cfg,
-                   enum pipe_shader_type stage, mali_ptr shader_ptr)
-{
-   cfg->resources = panfrost_emit_resources(batch, stage);
-   cfg->thread_storage = batch->tls.gpu;
-   cfg->shader = shader_ptr;
-
-   /* Each entry of FAU is 64-bits */
-   cfg->fau = batch->push_uniforms[stage];
-   cfg->fau_count = DIV_ROUND_UP(batch->nr_push_uniforms[stage], 2);
-}
-#endif
-
-static void
-jm_emit_tiler_draw(void *out, struct panfrost_batch *batch, bool fs_required,
-                   enum mesa_prim prim)
-{
-   struct panfrost_context *ctx = batch->ctx;
-   struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
-   bool polygon = (prim == MESA_PRIM_TRIANGLES);
-
-   pan_pack(out, DRAW, cfg) {
-      /*
-       * From the Gallium documentation,
-       * pipe_rasterizer_state::cull_face "indicates which faces of
-       * polygons to cull". Points and lines are not considered
-       * polygons and should be drawn even if all faces are culled.
-       * The hardware does not take primitive type into account when
-       * culling, so we need to do that check ourselves.
-       */
-      cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
-      cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
-      cfg.front_face_ccw = rast->front_ccw;
-
-      if (ctx->occlusion_query && ctx->active_queries) {
-         if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
-            cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
-         else
-            cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
-
-         struct panfrost_resource *rsrc =
-            pan_resource(ctx->occlusion_query->rsrc);
-         cfg.occlusion = rsrc->image.data.bo->ptr.gpu;
-         panfrost_batch_write_rsrc(ctx->batch, rsrc, PIPE_SHADER_FRAGMENT);
-      }
-
-#if PAN_ARCH >= 9
-      struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
-
-      cfg.multisample_enable = rast->multisample;
-      cfg.sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF;
-
-      /* Use per-sample shading if required by API Also use it when a
-       * blend shader is used with multisampling, as this is handled
-       * by a single ST_TILE in the blend shader with the current
-       * sample ID, requiring per-sample shading.
-       */
-      cfg.evaluate_per_sample =
-         (rast->multisample &&
-          ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader));
-
-      cfg.single_sampled_lines = !rast->multisample;
-
-      cfg.vertex_array.packet = true;
-
-      cfg.minimum_z = batch->minimum_z;
-      cfg.maximum_z = batch->maximum_z;
-
-      cfg.depth_stencil = batch->depth_stencil;
-
-      if (fs_required) {
-         bool has_oq = ctx->occlusion_query && ctx->active_queries;
-
-         struct pan_earlyzs_state earlyzs = pan_earlyzs_get(
-            fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq,
-            ctx->blend->base.alpha_to_coverage,
-            ctx->depth_stencil->zs_always_passes);
-
-         cfg.pixel_kill_operation = earlyzs.kill;
-         cfg.zs_update_operation = earlyzs.update;
-
-         cfg.allow_forward_pixel_to_kill =
-            pan_allow_forward_pixel_to_kill(ctx, fs);
-         cfg.allow_forward_pixel_to_be_killed = !fs->info.writes_global;
-
-         /* Mask of render targets that may be written. A render
-          * target may be written if the fragment shader writes
-          * to it AND it actually exists. If the render target
-          * doesn't actually exist, the blend descriptor will be
-          * OFF so it may be omitted from the mask.
-          *
-          * Only set when there is a fragment shader, since
-          * otherwise no colour updates are possible.
-          */
-         cfg.render_target_mask =
-            (fs->info.outputs_written >> FRAG_RESULT_DATA0) & ctx->fb_rt_mask;
-
-         /* Also use per-sample shading if required by the shader
-          */
-         cfg.evaluate_per_sample |= fs->info.fs.sample_shading;
-
-         /* Unlike Bifrost, alpha-to-coverage must be included in
-          * this identically-named flag. Confusing, isn't it?
-          */
-         cfg.shader_modifies_coverage = fs->info.fs.writes_coverage ||
-                                        fs->info.fs.can_discard ||
-                                        ctx->blend->base.alpha_to_coverage;
-
-         /* Blend descriptors are only accessed by a BLEND
-          * instruction on Valhall. It follows that if the
-          * fragment shader is omitted, we may also emit the
-          * blend descriptors.
-          */
-         cfg.blend = batch->blend;
-         cfg.blend_count = MAX2(batch->key.nr_cbufs, 1);
-         cfg.alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
-
-         cfg.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
-         cfg.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
-
-         jm_emit_shader_env(batch, &cfg.shader, PIPE_SHADER_FRAGMENT,
-                            batch->rsd[PIPE_SHADER_FRAGMENT]);
-      } else {
-         /* These operations need to be FORCE to benefit from the
-          * depth-only pass optimizations.
-          */
-         cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
-         cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY;
-
-         /* No shader and no blend => no shader or blend
-          * reasons to disable FPK. The only FPK-related state
-          * not covered is alpha-to-coverage which we don't set
-          * without blend.
-          */
-         cfg.allow_forward_pixel_to_kill = true;
-
-         /* No shader => no shader side effects */
-         cfg.allow_forward_pixel_to_be_killed = true;
-
-         /* Alpha isn't written so these are vacuous */
-         cfg.overdraw_alpha0 = true;
-         cfg.overdraw_alpha1 = true;
-      }
-#else
-      cfg.position = batch->varyings.pos;
-      cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
-      cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
-      cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT];
-      cfg.viewport = batch->viewport;
-      cfg.varyings = batch->varyings.fs;
-      cfg.varying_buffers = cfg.varyings ? batch->varyings.bufs : 0;
-      cfg.thread_storage = batch->tls.gpu;
-
-      /* For all primitives but lines DRAW.flat_shading_vertex must
-       * be set to 0 and the provoking vertex is selected with the
-       * PRIMITIVE.first_provoking_vertex field.
-       */
-      if (prim == MESA_PRIM_LINES) {
-         /* The logic is inverted across arches. */
-         cfg.flat_shading_vertex = rast->flatshade_first ^ (PAN_ARCH <= 5);
-      }
-
-      jm_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);
-#endif
-   }
-}
-
-#if PAN_ARCH >= 9
-static void
-jm_emit_malloc_vertex_job(struct panfrost_batch *batch,
-                          const struct pipe_draw_info *info,
-                          const struct pipe_draw_start_count_bias *draw,
-                          bool secondary_shader, void *job)
-{
-   struct panfrost_context *ctx = batch->ctx;
-   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-   struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
-
-   bool fs_required = panfrost_fs_required(
-      fs, ctx->blend, &ctx->pipe_framebuffer, ctx->depth_stencil);
-
-   /* Varying shaders only feed data to the fragment shader, so if we omit
-    * the fragment shader, we should omit the varying shader too.
-    */
-   secondary_shader &= fs_required;
-
-   jm_emit_primitive(batch, info, draw, secondary_shader,
-                     pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE));
-
-   pan_section_pack(job, MALLOC_VERTEX_JOB, INSTANCE_COUNT, cfg) {
-      cfg.count = info->instance_count;
-   }
-
-   pan_section_pack(job, MALLOC_VERTEX_JOB, ALLOCATION, cfg) {
-      if (secondary_shader) {
-         unsigned sz = panfrost_vertex_attribute_stride(vs, fs);
-
-         cfg.vertex_packet_stride = sz + 16;
-         cfg.vertex_attribute_stride = sz;
-      } else {
-         /* Hardware requirement for "no varyings" */
-         cfg.vertex_packet_stride = 16;
-         cfg.vertex_attribute_stride = 0;
-      }
-   }
-
-   pan_section_pack(job, MALLOC_VERTEX_JOB, TILER, cfg) {
-      cfg.address = batch->indices;
-   }
-
-   STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR));
-   memcpy(pan_section_ptr(job, MALLOC_VERTEX_JOB, SCISSOR), &batch->scissor,
-          pan_size(SCISSOR));
-
-   panfrost_emit_primitive_size(
-      ctx, info->mode == MESA_PRIM_POINTS, 0,
-      pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE));
-
-   pan_section_pack(job, MALLOC_VERTEX_JOB, INDICES, cfg) {
-      cfg.address = batch->indices;
-   }
-
-   jm_emit_tiler_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW), batch,
-                      fs_required, u_reduced_prim(info->mode));
-
-   pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) {
-      jm_emit_shader_env(batch, &cfg, PIPE_SHADER_VERTEX,
-                         panfrost_get_position_shader(batch, info));
-   }
-
-   pan_section_pack(job, MALLOC_VERTEX_JOB, VARYING, cfg) {
-      /* If a varying shader is used, we configure it with the same
-       * state as the position shader for backwards compatible
-       * behaviour with Bifrost. This could be optimized.
-       */
-      if (!secondary_shader)
-         continue;
-
-      jm_emit_shader_env(batch, &cfg, PIPE_SHADER_VERTEX,
-                         panfrost_get_varying_shader(batch));
-   }
-}
-#endif
-
-#if PAN_ARCH <= 7
-static void
-jm_emit_tiler_job(struct panfrost_batch *batch,
-                  const struct pipe_draw_info *info,
-                  const struct pipe_draw_start_count_bias *draw,
-                  void *invocation_template, bool secondary_shader, void *job)
-{
-   struct panfrost_context *ctx = batch->ctx;
-
-   void *section = pan_section_ptr(job, TILER_JOB, INVOCATION);
-   memcpy(section, invocation_template, pan_size(INVOCATION));
-
-   jm_emit_primitive(batch, info, draw, secondary_shader,
-                     pan_section_ptr(job, TILER_JOB, PRIMITIVE));
-
-   void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
-   enum mesa_prim prim = u_reduced_prim(info->mode);
-
-#if PAN_ARCH >= 6
-   pan_section_pack(job, TILER_JOB, TILER, cfg) {
-      cfg.address = jm_emit_tiler_desc(batch);
-   }
-
-   pan_section_pack(job, TILER_JOB, PADDING, cfg)
-      ;
-#endif
-
-   jm_emit_tiler_draw(pan_section_ptr(job, TILER_JOB, DRAW), batch, true, prim);
-
-   panfrost_emit_primitive_size(ctx, prim == MESA_PRIM_POINTS,
-                                batch->varyings.psiz, prim_size);
-}
-#endif
-
-static void
-jm_launch_xfb(struct panfrost_batch *batch, const struct pipe_draw_info *info,
-              unsigned count)
-{
-   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
-
-#if PAN_ARCH >= 9
-   pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
-      cfg.workgroup_size_x = 1;
-      cfg.workgroup_size_y = 1;
-      cfg.workgroup_size_z = 1;
-
-      cfg.workgroup_count_x = count;
-      cfg.workgroup_count_y = info->instance_count;
-      cfg.workgroup_count_z = 1;
-
-      jm_emit_shader_env(batch, &cfg.compute, PIPE_SHADER_VERTEX,
-                         batch->rsd[PIPE_SHADER_VERTEX]);
-
-      /* TODO: Indexing. Also, this is a legacy feature... */
-      cfg.compute.attribute_offset = batch->ctx->offset_start;
-
-      /* Transform feedback shaders do not use barriers or shared
-       * memory, so we may merge workgroups.
-       */
-      cfg.allow_merging_workgroups = true;
-      cfg.task_increment = 1;
-      cfg.task_axis = MALI_TASK_AXIS_Z;
-   }
-#else
-   struct mali_invocation_packed invocation;
-
-   panfrost_pack_work_groups_compute(&invocation, 1, count,
-                                     info->instance_count, 1, 1, 1,
-                                     PAN_ARCH <= 5, false);
-
-   /* No varyings on XFB compute jobs. */
-   mali_ptr saved_vs_varyings = batch->varyings.vs;
-
-   batch->varyings.vs = 0;
-   jm_emit_vertex_job(batch, info, &invocation, t.cpu);
-   batch->varyings.vs = saved_vs_varyings;
-#endif
-   enum mali_job_type job_type = MALI_JOB_TYPE_COMPUTE;
-#if PAN_ARCH <= 5
-   job_type = MALI_JOB_TYPE_VERTEX;
-#endif
-   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc, job_type, true,
-                    false, 0, 0, &t, false);
-}
-
 static void
 panfrost_launch_xfb(struct panfrost_batch *batch,
                     const struct pipe_draw_info *info, unsigned count)
@@ -3259,7 +2724,7 @@ panfrost_launch_xfb(struct panfrost_batch *batch,
                               &batch->push_uniforms[PIPE_SHADER_VERTEX],
                               &batch->nr_push_uniforms[PIPE_SHADER_VERTEX]);
 
-   jm_launch_xfb(batch, info, count);
+   GENX(jm_launch_xfb)(batch, info, count);
    batch->compute_count++;
 
    ctx->uncompiled[PIPE_SHADER_VERTEX] = vs_uncompiled;
@@ -3352,79 +2817,6 @@ panfrost_draw_get_vertex_count(struct panfrost_batch *batch,
    return vertex_count;
 }
 
-static void
-jm_launch_draw(struct panfrost_batch *batch, const struct pipe_draw_info *info,
-               unsigned drawid_offset,
-               const struct pipe_draw_start_count_bias *draw,
-               unsigned vertex_count)
-{
-   struct panfrost_context *ctx = batch->ctx;
-   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
-   bool secondary_shader = vs->info.vs.secondary_enable;
-   bool idvs = vs->info.vs.idvs;
-
-#if PAN_ARCH <= 7
-   struct mali_invocation_packed invocation;
-   if (info->instance_count > 1) {
-      panfrost_pack_work_groups_compute(&invocation, 1, vertex_count,
-                                        info->instance_count, 1, 1, 1, true,
-                                        false);
-   } else {
-      pan_pack(&invocation, INVOCATION, cfg) {
-         cfg.invocations = vertex_count - 1;
-         cfg.size_y_shift = 0;
-         cfg.size_z_shift = 0;
-         cfg.workgroups_x_shift = 0;
-         cfg.workgroups_y_shift = 0;
-         cfg.workgroups_z_shift = 32;
-         cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT;
-      }
-   }
-#endif
-
-   UNUSED struct panfrost_ptr tiler, vertex;
-
-   if (idvs) {
-#if PAN_ARCH >= 9
-      tiler = pan_pool_alloc_desc(&batch->pool.base, MALLOC_VERTEX_JOB);
-#elif PAN_ARCH >= 6
-      tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
-#else
-      unreachable("IDVS is unsupported on Midgard");
-#endif
-   } else {
-      vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
-      tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
-   }
-
-#if PAN_ARCH >= 9
-   assert(idvs && "Memory allocated IDVS required on Valhall");
-
-   jm_emit_malloc_vertex_job(batch, info, draw, secondary_shader, tiler.cpu);
-
-   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
-                    MALI_JOB_TYPE_MALLOC_VERTEX, false, false, 0, 0, &tiler,
-                    false);
-#else
-   /* Fire off the draw itself */
-   jm_emit_tiler_job(batch, info, draw, &invocation, secondary_shader,
-                     tiler.cpu);
-   if (idvs) {
-#if PAN_ARCH >= 6
-      jm_emit_vertex_draw(
-         batch, pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
-
-      panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
-                       MALI_JOB_TYPE_INDEXED_VERTEX, false, false, 0, 0, &tiler,
-                       false);
-#endif
-   } else {
-      jm_emit_vertex_job(batch, info, &invocation, vertex.cpu);
-      jm_push_vertex_tiler_jobs(batch, &vertex, &tiler);
-   }
-#endif
-}
-
 static void
 panfrost_direct_draw(struct panfrost_batch *batch,
                      const struct pipe_draw_info *info, unsigned drawid_offset,
@@ -3479,7 +2871,7 @@ panfrost_direct_draw(struct panfrost_batch *batch,
                                     info->mode == MESA_PRIM_POINTS);
 #endif
 
-   jm_launch_draw(batch, info, drawid_offset, draw, vertex_count);
+   GENX(jm_launch_draw)(batch, info, drawid_offset, draw, vertex_count);
    batch->draw_count++;
 }
 
@@ -3576,95 +2968,6 @@ panfrost_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,
    }
 }
 
-static void
-jm_launch_grid(struct panfrost_batch *batch, const struct pipe_grid_info *info)
-{
-   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
-
-   /* Invoke according to the grid info */
-
-   unsigned num_wg[3] = {info->grid[0], info->grid[1], info->grid[2]};
-
-   if (info->indirect)
-      num_wg[0] = num_wg[1] = num_wg[2] = 1;
-
-#if PAN_ARCH <= 7
-   panfrost_pack_work_groups_compute(
-      pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION), num_wg[0], num_wg[1],
-      num_wg[2], info->block[0], info->block[1], info->block[2], false,
-      info->indirect != NULL);
-
-   pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
-      cfg.job_task_split = util_logbase2_ceil(info->block[0] + 1) +
-                           util_logbase2_ceil(info->block[1] + 1) +
-                           util_logbase2_ceil(info->block[2] + 1);
-   }
-
-   pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
-      cfg.state = batch->rsd[PIPE_SHADER_COMPUTE];
-      cfg.attributes = batch->attribs[PIPE_SHADER_COMPUTE];
-      cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_COMPUTE];
-      cfg.thread_storage = batch->tls.gpu;
-      cfg.uniform_buffers = batch->uniform_buffers[PIPE_SHADER_COMPUTE];
-      cfg.push_uniforms = batch->push_uniforms[PIPE_SHADER_COMPUTE];
-      cfg.textures = batch->textures[PIPE_SHADER_COMPUTE];
-      cfg.samplers = batch->samplers[PIPE_SHADER_COMPUTE];
-   }
-#else
-   struct panfrost_context *ctx = batch->ctx;
-   struct panfrost_compiled_shader *cs = ctx->prog[PIPE_SHADER_COMPUTE];
-
-   pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
-      cfg.workgroup_size_x = info->block[0];
-      cfg.workgroup_size_y = info->block[1];
-      cfg.workgroup_size_z = info->block[2];
-
-      cfg.workgroup_count_x = num_wg[0];
-      cfg.workgroup_count_y = num_wg[1];
-      cfg.workgroup_count_z = num_wg[2];
-
-      jm_emit_shader_env(batch, &cfg.compute, PIPE_SHADER_COMPUTE,
-                         batch->rsd[PIPE_SHADER_COMPUTE]);
-
-      /* Workgroups may be merged if the shader does not use barriers
-       * or shared memory. This condition is checked against the
-       * static shared_size at compile-time. We need to check the
-       * variable shared size at launch_grid time, because the
-       * compiler doesn't know about that.
-       */
-      cfg.allow_merging_workgroups = cs->info.cs.allow_merging_workgroups &&
-                                     (info->variable_shared_mem == 0);
-
-      cfg.task_increment = 1;
-      cfg.task_axis = MALI_TASK_AXIS_Z;
-   }
-#endif
-
-   unsigned indirect_dep = 0;
-#if PAN_GPU_INDIRECTS
-   if (info->indirect) {
-      struct pan_indirect_dispatch_info indirect = {
-         .job = t.gpu,
-         .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
-                         info->indirect_offset,
-         .num_wg_sysval =
-            {
-               batch->num_wg_sysval[0],
-               batch->num_wg_sysval[1],
-               batch->num_wg_sysval[2],
-            },
-      };
-
-      indirect_dep = GENX(pan_indirect_dispatch_emit)(
-         &batch->pool.base, &batch->jm.jobs.vtc_jc, &indirect);
-   }
-#endif
-
-   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
-                    MALI_JOB_TYPE_COMPUTE, true, false, indirect_dep, 0, &t,
-                    false);
-}
-
 /* Launch grid is the compute equivalent of draw_vbo, so in this routine, we
  * construct the COMPUTE job and some of its payload.
  */
@@ -3709,7 +3012,7 @@ panfrost_launch_grid_on_batch(struct pipe_context *pipe,
    mali_ptr saved_tls = batch->tls.gpu;
    batch->tls.gpu = panfrost_emit_shared_memory(batch, info);
 
-   jm_launch_grid(batch, info);
+   GENX(jm_launch_grid)(batch, info);
    batch->compute_count++;
    batch->tls.gpu = saved_tls;
 }
@@ -4293,46 +3596,6 @@ screen_destroy(struct pipe_screen *pscreen)
 #endif
 }
 
-static void
-jm_preload_fb(struct panfrost_batch *batch, struct pan_fb_info *fb)
-{
-   GENX(pan_preload_fb)
-   (&batch->pool.base, &batch->jm.jobs.vtc_jc, fb, batch->tls.gpu,
-    PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL);
-}
-
-static void
-jm_init_batch(struct panfrost_batch *batch)
-{
-   /* Reserve the framebuffer and local storage descriptors */
-   batch->framebuffer =
-#if PAN_ARCH == 4
-      pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER);
-#else
-      pan_pool_alloc_desc_aggregate(
-         &batch->pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC(ZS_CRC_EXTENSION),
-         PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
-#endif
-
-#if PAN_ARCH >= 6
-   batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
-#else
-   /* On Midgard, the TLS is embedded in the FB descriptor */
-   batch->tls = batch->framebuffer;
-
-#if PAN_ARCH == 5
-   struct mali_framebuffer_pointer_packed ptr;
-
-   pan_pack(ptr.opaque, FRAMEBUFFER_POINTER, cfg) {
-      cfg.pointer = batch->framebuffer.gpu;
-      cfg.render_target_count = 1; /* a necessary lie */
-   }
-
-   batch->tls.gpu = ptr.opaque[0];
-#endif
-#endif
-}
-
 static void
 panfrost_sampler_view_destroy(struct pipe_context *pctx,
                               struct pipe_sampler_view *pview)
@@ -4420,167 +3683,10 @@ init_polygon_list(struct panfrost_batch *batch)
 #endif
 }
 
-static int
-jm_submit_jc(struct panfrost_batch *batch, mali_ptr first_job_desc,
-             uint32_t reqs, uint32_t out_sync)
-{
-   struct panfrost_context *ctx = batch->ctx;
-   struct pipe_context *gallium = (struct pipe_context *)ctx;
-   struct panfrost_device *dev = pan_device(gallium->screen);
-   struct drm_panfrost_submit submit = {
-      0,
-   };
-   uint32_t in_syncs[1];
-   uint32_t *bo_handles;
-   int ret;
-
-   /* If we trace, we always need a syncobj, so make one of our own if we
-    * weren't given one to use. Remember that we did so, so we can free it
-    * after we're done but preventing double-frees if we were given a
-    * syncobj */
-
-   if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
-      out_sync = ctx->syncobj;
-
-   submit.out_sync = out_sync;
-   submit.jc = first_job_desc;
-   submit.requirements = reqs;
-
-   if (ctx->in_sync_fd >= 0) {
-      ret =
-         drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
-      assert(!ret);
-
-      in_syncs[submit.in_sync_count++] = ctx->in_sync_obj;
-      close(ctx->in_sync_fd);
-      ctx->in_sync_fd = -1;
-   }
-
-   if (submit.in_sync_count)
-      submit.in_syncs = (uintptr_t)in_syncs;
-
-   bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
-                          panfrost_pool_num_bos(&batch->invisible_pool) +
-                          batch->num_bos + 2,
-                       sizeof(*bo_handles));
-   assert(bo_handles);
-
-   pan_bo_access *flags = util_dynarray_begin(&batch->bos);
-   unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
-
-   for (int i = 0; i < end_bo; ++i) {
-      if (!flags[i])
-         continue;
-
-      assert(submit.bo_handle_count < batch->num_bos);
-      bo_handles[submit.bo_handle_count++] = i;
-
-      /* Update the BO access flags so that panfrost_bo_wait() knows
-       * about all pending accesses.
-       * We only keep the READ/WRITE info since this is all the BO
-       * wait logic cares about.
-       * We also preserve existing flags as this batch might not
-       * be the first one to access the BO.
-       */
-      struct panfrost_bo *bo = pan_lookup_bo(dev, i);
-
-      bo->gpu_access |= flags[i] & (PAN_BO_ACCESS_RW);
-   }
-
-   panfrost_pool_get_bo_handles(&batch->pool,
-                                bo_handles + submit.bo_handle_count);
-   submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
-   panfrost_pool_get_bo_handles(&batch->invisible_pool,
-                                bo_handles + submit.bo_handle_count);
-   submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
-
-   /* Add the tiler heap to the list of accessed BOs if the batch has at
-    * least one tiler job. Tiler heap is written by tiler jobs and read
-    * by fragment jobs (the polygon list is coming from this heap).
-    */
-   if (batch->jm.jobs.vtc_jc.first_tiler)
-      bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
-
-   /* Always used on Bifrost, occassionally used on Midgard */
-   bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
-
-   submit.bo_handles = (u64)(uintptr_t)bo_handles;
-   if (ctx->is_noop)
-      ret = 0;
-   else
-      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
-   free(bo_handles);
-
-   if (ret)
-      return errno;
-
-   /* Trace the job if we're doing that */
-   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
-      /* Wait so we can get errors reported back */
-      drmSyncobjWait(dev->fd, &out_sync, 1, INT64_MAX, 0, NULL);
-
-      if (dev->debug & PAN_DBG_TRACE)
-         pandecode_jc(dev->decode_ctx, submit.jc, dev->gpu_id);
-
-      if (dev->debug & PAN_DBG_DUMP)
-         pandecode_dump_mappings(dev->decode_ctx);
-
-      /* Jobs won't be complete if blackhole rendering, that's ok */
-      if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
-         pandecode_abort_on_fault(dev->decode_ctx, submit.jc, dev->gpu_id);
-   }
-
-   return 0;
-}
-
-/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
- * outsync corresponding to the later of the two (since there will be an
- * implicit dep between them) */
-
-static int
-jm_submit_batch(struct panfrost_batch *batch)
-{
-   struct pipe_screen *pscreen = batch->ctx->base.screen;
-   struct panfrost_device *dev = pan_device(pscreen);
-   bool has_draws = batch->jm.jobs.vtc_jc.first_job;
-   bool has_tiler = batch->jm.jobs.vtc_jc.first_tiler;
-   bool has_frag = panfrost_has_fragment_job(batch);
-   uint32_t out_sync = batch->ctx->syncobj;
-   int ret = 0;
-
-   /* Take the submit lock to make sure no tiler jobs from other context
-    * are inserted between our tiler and fragment jobs, failing to do that
-    * might result in tiler heap corruption.
-    */
-   if (has_tiler)
-      pthread_mutex_lock(&dev->submit_lock);
-
-   if (has_draws) {
-      ret = jm_submit_jc(batch, batch->jm.jobs.vtc_jc.first_job, 0,
-                         has_frag ? 0 : out_sync);
-
-      if (ret)
-         goto done;
-   }
-
-   if (has_frag) {
-      ret =
-         jm_submit_jc(batch, batch->jm.jobs.frag, PANFROST_JD_REQ_FS, out_sync);
-      if (ret)
-         goto done;
-   }
-
-done:
-   if (has_tiler)
-      pthread_mutex_unlock(&dev->submit_lock);
-
-   return ret;
-}
-
 static int
 submit_batch(struct panfrost_batch *batch, struct pan_fb_info *fb)
 {
-   jm_preload_fb(batch, fb);
+   GENX(jm_preload_fb)(batch, fb);
    init_polygon_list(batch);
 
    /* Now that all draws are in, we can finally prepare the
@@ -4593,7 +3699,7 @@ submit_batch(struct panfrost_batch *batch, struct pan_fb_info *fb)
       emit_fragment_job(batch, fb);
    }
 
-   return jm_submit_batch(batch);
+   return GENX(jm_submit_batch)(batch);
 }
 
 void
@@ -4604,7 +3710,7 @@ GENX(panfrost_cmdstream_screen_init)(struct panfrost_screen *screen)
    screen->vtbl.prepare_shader = prepare_shader;
    screen->vtbl.screen_destroy = screen_destroy;
    screen->vtbl.context_populate_vtbl = context_populate_vtbl;
-   screen->vtbl.init_batch = jm_init_batch;
+   screen->vtbl.init_batch = GENX(jm_init_batch);
    screen->vtbl.submit_batch = submit_batch;
    screen->vtbl.get_blend_shader = GENX(pan_blend_get_shader_locked);
    screen->vtbl.get_compiler_options = GENX(pan_shader_get_compiler_options);
diff --git a/src/gallium/drivers/panfrost/pan_jm.c b/src/gallium/drivers/panfrost/pan_jm.c
new file mode 100644
index 00000000000..1fc0580e168
--- /dev/null
+++ b/src/gallium/drivers/panfrost/pan_jm.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (C) 2018 Alyssa Rosenzweig
+ * Copyright (C) 2020 Collabora Ltd.
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "decode.h"
+
+#include "drm-uapi/panfrost_drm.h"
+
+#include "pan_blitter.h"
+#include "pan_cmdstream.h"
+#include "pan_context.h"
+#include "pan_indirect_dispatch.h"
+#include "pan_jm.h"
+#include "pan_job.h"
+
+#if PAN_ARCH >= 10
+#error "JM helpers are only used for gen < 10"
+#endif
+
+void
+GENX(jm_init_batch)(struct panfrost_batch *batch)
+{
+   /* Reserve the framebuffer and local storage descriptors */
+   batch->framebuffer =
+#if PAN_ARCH == 4
+      pan_pool_alloc_desc(&batch->pool.base, FRAMEBUFFER);
+#else
+      pan_pool_alloc_desc_aggregate(
+         &batch->pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC(ZS_CRC_EXTENSION),
+         PAN_DESC_ARRAY(MAX2(batch->key.nr_cbufs, 1), RENDER_TARGET));
+#endif
+
+#if PAN_ARCH >= 6
+   batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
+#else
+   /* On Midgard, the TLS is embedded in the FB descriptor */
+   batch->tls = batch->framebuffer;
+
+#if PAN_ARCH == 5
+   struct mali_framebuffer_pointer_packed ptr;
+
+   pan_pack(ptr.opaque, FRAMEBUFFER_POINTER, cfg) {
+      cfg.pointer = batch->framebuffer.gpu;
+      cfg.render_target_count = 1; /* a necessary lie */
+   }
+
+   batch->tls.gpu = ptr.opaque[0];
+#endif
+#endif
+}
+
+static int
+jm_submit_jc(struct panfrost_batch *batch, mali_ptr first_job_desc,
+             uint32_t reqs, uint32_t out_sync)
+{
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_context *gallium = (struct pipe_context *)ctx;
+   struct panfrost_device *dev = pan_device(gallium->screen);
+   struct drm_panfrost_submit submit = {
+      0,
+   };
+   uint32_t in_syncs[1];
+   uint32_t *bo_handles;
+   int ret;
+
+   /* If we trace, we always need a syncobj, so make one of our own if we
+    * weren't given one to use. Remember that we did so, so we can free it
+    * after we're done but preventing double-frees if we were given a
+    * syncobj */
+
+   if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
+      out_sync = ctx->syncobj;
+
+   submit.out_sync = out_sync;
+   submit.jc = first_job_desc;
+   submit.requirements = reqs;
+
+   if (ctx->in_sync_fd >= 0) {
+      ret =
+         drmSyncobjImportSyncFile(dev->fd, ctx->in_sync_obj, ctx->in_sync_fd);
+      assert(!ret);
+
+      in_syncs[submit.in_sync_count++] = ctx->in_sync_obj;
+      close(ctx->in_sync_fd);
+      ctx->in_sync_fd = -1;
+   }
+
+   if (submit.in_sync_count)
+      submit.in_syncs = (uintptr_t)in_syncs;
+
+   bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
+                          panfrost_pool_num_bos(&batch->invisible_pool) +
+                          batch->num_bos + 2,
+                       sizeof(*bo_handles));
+   assert(bo_handles);
+
+   pan_bo_access *flags = util_dynarray_begin(&batch->bos);
+   unsigned end_bo = util_dynarray_num_elements(&batch->bos, pan_bo_access);
+
+   for (int i = 0; i < end_bo; ++i) {
+      if (!flags[i])
+         continue;
+
+      assert(submit.bo_handle_count < batch->num_bos);
+      bo_handles[submit.bo_handle_count++] = i;
+
+      /* Update the BO access flags so that panfrost_bo_wait() knows
+       * about all pending accesses.
+       * We only keep the READ/WRITE info since this is all the BO
+       * wait logic cares about.
+       * We also preserve existing flags as this batch might not
+       * be the first one to access the BO.
+       */
+      struct panfrost_bo *bo = pan_lookup_bo(dev, i);
+
+      bo->gpu_access |= flags[i] & (PAN_BO_ACCESS_RW);
+   }
+
+   panfrost_pool_get_bo_handles(&batch->pool,
+                                bo_handles + submit.bo_handle_count);
+   submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
+   panfrost_pool_get_bo_handles(&batch->invisible_pool,
+                                bo_handles + submit.bo_handle_count);
+   submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
+
+   /* Add the tiler heap to the list of accessed BOs if the batch has at
+    * least one tiler job. Tiler heap is written by tiler jobs and read
+    * by fragment jobs (the polygon list is coming from this heap).
+    */
+   if (batch->jm.jobs.vtc_jc.first_tiler)
+      bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
+
+   /* Always used on Bifrost, occassionally used on Midgard */
+   bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
+
+   submit.bo_handles = (u64)(uintptr_t)bo_handles;
+   if (ctx->is_noop)
+      ret = 0;
+   else
+      ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
+   free(bo_handles);
+
+   if (ret)
+      return errno;
+
+   /* Trace the job if we're doing that */
+   if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
+      /* Wait so we can get errors reported back */
+      drmSyncobjWait(dev->fd, &out_sync, 1, INT64_MAX, 0, NULL);
+
+      if (dev->debug & PAN_DBG_TRACE)
+         pandecode_jc(dev->decode_ctx, submit.jc, dev->gpu_id);
+
+      if (dev->debug & PAN_DBG_DUMP)
+         pandecode_dump_mappings(dev->decode_ctx);
+
+      /* Jobs won't be complete if blackhole rendering, that's ok */
+      if (!ctx->is_noop && dev->debug & PAN_DBG_SYNC)
+         pandecode_abort_on_fault(dev->decode_ctx, submit.jc, dev->gpu_id);
+   }
+
+   return 0;
+}
+
+/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
+ * outsync corresponding to the later of the two (since there will be an
+ * implicit dep between them) */
+
+int
+GENX(jm_submit_batch)(struct panfrost_batch *batch)
+{
+   struct pipe_screen *pscreen = batch->ctx->base.screen;
+   struct panfrost_device *dev = pan_device(pscreen);
+   bool has_draws = batch->jm.jobs.vtc_jc.first_job;
+   bool has_tiler = batch->jm.jobs.vtc_jc.first_tiler;
+   bool has_frag = panfrost_has_fragment_job(batch);
+   uint32_t out_sync = batch->ctx->syncobj;
+   int ret = 0;
+
+   /* Take the submit lock to make sure no tiler jobs from other context
+    * are inserted between our tiler and fragment jobs, failing to do that
+    * might result in tiler heap corruption.
+    */
+   if (has_tiler)
+      pthread_mutex_lock(&dev->submit_lock);
+
+   if (has_draws) {
+      ret = jm_submit_jc(batch, batch->jm.jobs.vtc_jc.first_job, 0,
+                         has_frag ? 0 : out_sync);
+
+      if (ret)
+         goto done;
+   }
+
+   if (has_frag) {
+      ret =
+         jm_submit_jc(batch, batch->jm.jobs.frag, PANFROST_JD_REQ_FS, out_sync);
+      if (ret)
+         goto done;
+   }
+
+done:
+   if (has_tiler)
+      pthread_mutex_unlock(&dev->submit_lock);
+
+   return ret;
+}
+
+void
+GENX(jm_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb)
+{
+   GENX(pan_preload_fb)
+   (&batch->pool.base, &batch->jm.jobs.vtc_jc, fb, batch->tls.gpu,
+    PAN_ARCH >= 6 ? batch->tiler_ctx.bifrost : 0, NULL);
+}
+
+void
+GENX(jm_emit_fragment_job)(struct panfrost_batch *batch,
+                           const struct pan_fb_info *pfb)
+{
+   struct panfrost_ptr transfer =
+      pan_pool_alloc_desc(&batch->pool.base, FRAGMENT_JOB);
+
+   GENX(pan_emit_fragment_job)(pfb, batch->framebuffer.gpu, transfer.cpu);
+
+   batch->jm.jobs.frag = transfer.gpu;
+}
+
+#if PAN_ARCH == 9
+static void
+jm_emit_shader_env(struct panfrost_batch *batch,
+                   struct MALI_SHADER_ENVIRONMENT *cfg,
+                   enum pipe_shader_type stage, mali_ptr shader_ptr)
+{
+   cfg->resources = panfrost_emit_resources(batch, stage);
+   cfg->thread_storage = batch->tls.gpu;
+   cfg->shader = shader_ptr;
+
+   /* Each entry of FAU is 64-bits */
+   cfg->fau = batch->push_uniforms[stage];
+   cfg->fau_count = DIV_ROUND_UP(batch->nr_push_uniforms[stage], 2);
+}
+#endif
+
+void
+GENX(jm_launch_grid)(struct panfrost_batch *batch,
+                     const struct pipe_grid_info *info)
+{
+   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
+
+   /* Invoke according to the grid info */
+
+   unsigned num_wg[3] = {info->grid[0], info->grid[1], info->grid[2]};
+
+   if (info->indirect)
+      num_wg[0] = num_wg[1] = num_wg[2] = 1;
+
+#if PAN_ARCH <= 7
+   panfrost_pack_work_groups_compute(
+      pan_section_ptr(t.cpu, COMPUTE_JOB, INVOCATION), num_wg[0], num_wg[1],
+      num_wg[2], info->block[0], info->block[1], info->block[2], false,
+      info->indirect != NULL);
+
+   pan_section_pack(t.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
+      cfg.job_task_split = util_logbase2_ceil(info->block[0] + 1) +
+                           util_logbase2_ceil(info->block[1] + 1) +
+                           util_logbase2_ceil(info->block[2] + 1);
+   }
+
+   pan_section_pack(t.cpu, COMPUTE_JOB, DRAW, cfg) {
+      cfg.state = batch->rsd[PIPE_SHADER_COMPUTE];
+      cfg.attributes = batch->attribs[PIPE_SHADER_COMPUTE];
+      cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_COMPUTE];
+      cfg.thread_storage = batch->tls.gpu;
+      cfg.uniform_buffers = batch->uniform_buffers[PIPE_SHADER_COMPUTE];
+      cfg.push_uniforms = batch->push_uniforms[PIPE_SHADER_COMPUTE];
+      cfg.textures = batch->textures[PIPE_SHADER_COMPUTE];
+      cfg.samplers = batch->samplers[PIPE_SHADER_COMPUTE];
+   }
+#else
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *cs = ctx->prog[PIPE_SHADER_COMPUTE];
+
+   pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
+      cfg.workgroup_size_x = info->block[0];
+      cfg.workgroup_size_y = info->block[1];
+      cfg.workgroup_size_z = info->block[2];
+
+      cfg.workgroup_count_x = num_wg[0];
+      cfg.workgroup_count_y = num_wg[1];
+      cfg.workgroup_count_z = num_wg[2];
+
+      jm_emit_shader_env(batch, &cfg.compute, PIPE_SHADER_COMPUTE,
+                         batch->rsd[PIPE_SHADER_COMPUTE]);
+
+      /* Workgroups may be merged if the shader does not use barriers
+       * or shared memory. This condition is checked against the
+       * static shared_size at compile-time. We need to check the
+       * variable shared size at launch_grid time, because the
+       * compiler doesn't know about that.
+       */
+      cfg.allow_merging_workgroups = cs->info.cs.allow_merging_workgroups &&
+                                     (info->variable_shared_mem == 0);
+
+      cfg.task_increment = 1;
+      cfg.task_axis = MALI_TASK_AXIS_Z;
+   }
+#endif
+
+   unsigned indirect_dep = 0;
+#if PAN_GPU_INDIRECTS
+   if (info->indirect) {
+      struct pan_indirect_dispatch_info indirect = {
+         .job = t.gpu,
+         .indirect_dim = pan_resource(info->indirect)->image.data.bo->ptr.gpu +
+                         info->indirect_offset,
+         .num_wg_sysval =
+            {
+               batch->num_wg_sysval[0],
+               batch->num_wg_sysval[1],
+               batch->num_wg_sysval[2],
+            },
+      };
+
+      indirect_dep = GENX(pan_indirect_dispatch_emit)(
+         &batch->pool.base, &batch->jm.jobs.vtc_jc, &indirect);
+   }
+#endif
+
+   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
+                    MALI_JOB_TYPE_COMPUTE, true, false, indirect_dep, 0, &t,
+                    false);
+}
+
+#if PAN_ARCH >= 6
+static mali_ptr
+jm_emit_tiler_desc(struct panfrost_batch *batch)
+{
+   struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
+
+   if (batch->tiler_ctx.bifrost)
+      return batch->tiler_ctx.bifrost;
+
+   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, TILER_HEAP);
+
+   pan_pack(t.cpu, TILER_HEAP, heap) {
+      heap.size = dev->tiler_heap->size;
+      heap.base = dev->tiler_heap->ptr.gpu;
+      heap.bottom = dev->tiler_heap->ptr.gpu;
+      heap.top = dev->tiler_heap->ptr.gpu + dev->tiler_heap->size;
+   }
+
+   mali_ptr heap = t.gpu;
+   unsigned max_levels = dev->tiler_features.max_levels;
+   assert(max_levels >= 2);
+
+   t = pan_pool_alloc_desc(&batch->pool.base, TILER_CONTEXT);
+   pan_pack(t.cpu, TILER_CONTEXT, tiler) {
+      /* TODO: Select hierarchy mask more effectively */
+      tiler.hierarchy_mask = (max_levels >= 8) ? 0xFF : 0x28;
+
+      /* For large framebuffers, disable the smallest bin size to
+       * avoid pathological tiler memory usage. Required to avoid OOM
+       * on dEQP-GLES31.functional.fbo.no_attachments.maximums.all on
+       * Mali-G57.
+       */
+      if (MAX2(batch->key.width, batch->key.height) >= 4096)
+         tiler.hierarchy_mask &= ~1;
+
+      tiler.fb_width = batch->key.width;
+      tiler.fb_height = batch->key.height;
+      tiler.heap = heap;
+      tiler.sample_pattern =
+         pan_sample_pattern(util_framebuffer_get_num_samples(&batch->key));
+#if PAN_ARCH >= 9
+      tiler.first_provoking_vertex =
+         pan_tristate_get(batch->first_provoking_vertex);
+#endif
+   }
+
+   batch->tiler_ctx.bifrost = t.gpu;
+   return batch->tiler_ctx.bifrost;
+}
+#endif
+
+#if PAN_ARCH <= 7
+static inline void
+jm_emit_draw_descs(struct panfrost_batch *batch, struct MALI_DRAW *d,
+                   enum pipe_shader_type st)
+{
+   d->offset_start = batch->ctx->offset_start;
+   d->instance_size =
+      batch->ctx->instance_count > 1 ? batch->ctx->padded_count : 1;
+
+   d->uniform_buffers = batch->uniform_buffers[st];
+   d->push_uniforms = batch->push_uniforms[st];
+   d->textures = batch->textures[st];
+   d->samplers = batch->samplers[st];
+}
+
+static void
+jm_emit_vertex_draw(struct panfrost_batch *batch, void *section)
+{
+   pan_pack(section, DRAW, cfg) {
+      cfg.state = batch->rsd[PIPE_SHADER_VERTEX];
+      cfg.attributes = batch->attribs[PIPE_SHADER_VERTEX];
+      cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_VERTEX];
+      cfg.varyings = batch->varyings.vs;
+      cfg.varying_buffers = cfg.varyings ? batch->varyings.bufs : 0;
+      cfg.thread_storage = batch->tls.gpu;
+      jm_emit_draw_descs(batch, &cfg, PIPE_SHADER_VERTEX);
+   }
+}
+
+static void
+jm_emit_vertex_job(struct panfrost_batch *batch,
+                   const struct pipe_draw_info *info, void *invocation_template,
+                   void *job)
+{
+   void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION);
+   memcpy(section, invocation_template, pan_size(INVOCATION));
+
+   pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) {
+      cfg.job_task_split = 5;
+   }
+
+   section = pan_section_ptr(job, COMPUTE_JOB, DRAW);
+   jm_emit_vertex_draw(batch, section);
+}
+#endif /* PAN_ARCH <= 7 */
+
+static void
+jm_emit_tiler_draw(void *out, struct panfrost_batch *batch, bool fs_required,
+                   enum mesa_prim prim)
+{
+   struct panfrost_context *ctx = batch->ctx;
+   struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+   bool polygon = (prim == MESA_PRIM_TRIANGLES);
+
+   pan_pack(out, DRAW, cfg) {
+      /*
+       * From the Gallium documentation,
+       * pipe_rasterizer_state::cull_face "indicates which faces of
+       * polygons to cull". Points and lines are not considered
+       * polygons and should be drawn even if all faces are culled.
+       * The hardware does not take primitive type into account when
+       * culling, so we need to do that check ourselves.
+       */
+      cfg.cull_front_face = polygon && (rast->cull_face & PIPE_FACE_FRONT);
+      cfg.cull_back_face = polygon && (rast->cull_face & PIPE_FACE_BACK);
+      cfg.front_face_ccw = rast->front_ccw;
+
+      if (ctx->occlusion_query && ctx->active_queries) {
+         if (ctx->occlusion_query->type == PIPE_QUERY_OCCLUSION_COUNTER)
+            cfg.occlusion_query = MALI_OCCLUSION_MODE_COUNTER;
+         else
+            cfg.occlusion_query = MALI_OCCLUSION_MODE_PREDICATE;
+
+         struct panfrost_resource *rsrc =
+            pan_resource(ctx->occlusion_query->rsrc);
+         cfg.occlusion = rsrc->image.data.bo->ptr.gpu;
+         panfrost_batch_write_rsrc(ctx->batch, rsrc, PIPE_SHADER_FRAGMENT);
+      }
+
+#if PAN_ARCH >= 9
+      struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
+
+      cfg.multisample_enable = rast->multisample;
+      cfg.sample_mask = rast->multisample ? ctx->sample_mask : 0xFFFF;
+
+      /* Use per-sample shading if required by API Also use it when a
+       * blend shader is used with multisampling, as this is handled
+       * by a single ST_TILE in the blend shader with the current
+       * sample ID, requiring per-sample shading.
+       */
+      cfg.evaluate_per_sample =
+         (rast->multisample &&
+          ((ctx->min_samples > 1) || ctx->valhall_has_blend_shader));
+
+      cfg.single_sampled_lines = !rast->multisample;
+
+      cfg.vertex_array.packet = true;
+
+      cfg.minimum_z = batch->minimum_z;
+      cfg.maximum_z = batch->maximum_z;
+
+      cfg.depth_stencil = batch->depth_stencil;
+
+      if (fs_required) {
+         bool has_oq = ctx->occlusion_query && ctx->active_queries;
+
+         struct pan_earlyzs_state earlyzs = pan_earlyzs_get(
+            fs->earlyzs, ctx->depth_stencil->writes_zs || has_oq,
+            ctx->blend->base.alpha_to_coverage,
+            ctx->depth_stencil->zs_always_passes);
+
+         cfg.pixel_kill_operation = earlyzs.kill;
+         cfg.zs_update_operation = earlyzs.update;
+
+         cfg.allow_forward_pixel_to_kill =
+            pan_allow_forward_pixel_to_kill(ctx, fs);
+         cfg.allow_forward_pixel_to_be_killed = !fs->info.writes_global;
+
+         /* Mask of render targets that may be written. A render
+          * target may be written if the fragment shader writes
+          * to it AND it actually exists. If the render target
+          * doesn't actually exist, the blend descriptor will be
+          * OFF so it may be omitted from the mask.
+          *
+          * Only set when there is a fragment shader, since
+          * otherwise no colour updates are possible.
+          */
+         cfg.render_target_mask =
+            (fs->info.outputs_written >> FRAG_RESULT_DATA0) & ctx->fb_rt_mask;
+
+         /* Also use per-sample shading if required by the shader
+          */
+         cfg.evaluate_per_sample |= fs->info.fs.sample_shading;
+
+         /* Unlike Bifrost, alpha-to-coverage must be included in
+          * this identically-named flag. Confusing, isn't it?
+          */
+         cfg.shader_modifies_coverage = fs->info.fs.writes_coverage ||
+                                        fs->info.fs.can_discard ||
+                                        ctx->blend->base.alpha_to_coverage;
+
+         /* Blend descriptors are only accessed by a BLEND
+          * instruction on Valhall. It follows that if the
+          * fragment shader is omitted, we may also emit the
+          * blend descriptors.
+          */
+         cfg.blend = batch->blend;
+         cfg.blend_count = MAX2(batch->key.nr_cbufs, 1);
+         cfg.alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
+
+         cfg.overdraw_alpha0 = panfrost_overdraw_alpha(ctx, 0);
+         cfg.overdraw_alpha1 = panfrost_overdraw_alpha(ctx, 1);
+
+         jm_emit_shader_env(batch, &cfg.shader, PIPE_SHADER_FRAGMENT,
+                            batch->rsd[PIPE_SHADER_FRAGMENT]);
+      } else {
+         /* These operations need to be FORCE to benefit from the
+          * depth-only pass optimizations.
+          */
+         cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+         cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_EARLY;
+
+         /* No shader and no blend => no shader or blend
+          * reasons to disable FPK. The only FPK-related state
+          * not covered is alpha-to-coverage which we don't set
+          * without blend.
+          */
+         cfg.allow_forward_pixel_to_kill = true;
+
+         /* No shader => no shader side effects */
+         cfg.allow_forward_pixel_to_be_killed = true;
+
+         /* Alpha isn't written so these are vacuous */
+         cfg.overdraw_alpha0 = true;
+         cfg.overdraw_alpha1 = true;
+      }
+#else
+      cfg.position = batch->varyings.pos;
+      cfg.state = batch->rsd[PIPE_SHADER_FRAGMENT];
+      cfg.attributes = batch->attribs[PIPE_SHADER_FRAGMENT];
+      cfg.attribute_buffers = batch->attrib_bufs[PIPE_SHADER_FRAGMENT];
+      cfg.viewport = batch->viewport;
+      cfg.varyings = batch->varyings.fs;
+      cfg.varying_buffers = cfg.varyings ? batch->varyings.bufs : 0;
+      cfg.thread_storage = batch->tls.gpu;
+
+      /* For all primitives but lines DRAW.flat_shading_vertex must
+       * be set to 0 and the provoking vertex is selected with the
+       * PRIMITIVE.first_provoking_vertex field.
+       */
+      if (prim == MESA_PRIM_LINES) {
+         /* The logic is inverted across arches. */
+         cfg.flat_shading_vertex = rast->flatshade_first ^ (PAN_ARCH <= 5);
+      }
+
+      jm_emit_draw_descs(batch, &cfg, PIPE_SHADER_FRAGMENT);
+#endif
+   }
+}
+
+/* Packs a primitive descriptor, mostly common between Midgard/Bifrost tiler
+ * jobs and Valhall IDVS jobs
+ */
+static void
+jm_emit_primitive(struct panfrost_batch *batch,
+                  const struct pipe_draw_info *info,
+                  const struct pipe_draw_start_count_bias *draw,
+                  bool secondary_shader, void *out)
+{
+   struct panfrost_context *ctx = batch->ctx;
+   UNUSED struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
+
+   pan_pack(out, PRIMITIVE, cfg) {
+      cfg.draw_mode = pan_draw_mode(info->mode);
+      if (panfrost_writes_point_size(ctx))
+         cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
+
+#if PAN_ARCH <= 8
+      /* For line primitives, PRIMITIVE.first_provoking_vertex must
+       * be set to true and the provoking vertex is selected with
+       * DRAW.flat_shading_vertex.
+       */
+      if (u_reduced_prim(info->mode) == MESA_PRIM_LINES)
+         cfg.first_provoking_vertex = true;
+      else
+         cfg.first_provoking_vertex = rast->flatshade_first;
+
+      if (panfrost_is_implicit_prim_restart(info)) {
+         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
+      } else if (info->primitive_restart) {
+         cfg.primitive_restart = MALI_PRIMITIVE_RESTART_EXPLICIT;
+         cfg.primitive_restart_index = info->restart_index;
+      }
+
+      cfg.job_task_split = 6;
+#else
+      struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
+
+      cfg.allow_rotating_primitives = allow_rotating_primitives(fs, info);
+      cfg.primitive_restart = info->primitive_restart;
+
+      /* Non-fixed restart indices should have been lowered */
+      assert(!cfg.primitive_restart || panfrost_is_implicit_prim_restart(info));
+#endif
+
+      cfg.index_count = draw->count;
+      cfg.index_type = panfrost_translate_index_size(info->index_size);
+
+      if (PAN_ARCH >= 9) {
+         /* Base vertex offset on Valhall is used for both
+          * indexed and non-indexed draws, in a simple way for
+          * either. Handle both cases.
+          */
+         if (cfg.index_type)
+            cfg.base_vertex_offset = draw->index_bias;
+         else
+            cfg.base_vertex_offset = draw->start;
+
+         /* Indices are moved outside the primitive descriptor
+          * on Valhall, so we don't need to set that here
+          */
+      } else if (cfg.index_type) {
+         cfg.base_vertex_offset = draw->index_bias - ctx->offset_start;
+
+#if PAN_ARCH <= 7
+         cfg.indices = batch->indices;
+#endif
+      }
+
+#if PAN_ARCH >= 6
+      cfg.secondary_shader = secondary_shader;
+#endif
+   }
+}
+
+#if PAN_ARCH == 9
+static void
+jm_emit_malloc_vertex_job(struct panfrost_batch *batch,
+                          const struct pipe_draw_info *info,
+                          const struct pipe_draw_start_count_bias *draw,
+                          bool secondary_shader, void *job)
+{
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   struct panfrost_compiled_shader *fs = ctx->prog[PIPE_SHADER_FRAGMENT];
+
+   bool fs_required = panfrost_fs_required(
+      fs, ctx->blend, &ctx->pipe_framebuffer, ctx->depth_stencil);
+
+   /* Varying shaders only feed data to the fragment shader, so if we omit
+    * the fragment shader, we should omit the varying shader too.
+    */
+   secondary_shader &= fs_required;
+
+   jm_emit_primitive(batch, info, draw, secondary_shader,
+                     pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE));
+
+   pan_section_pack(job, MALLOC_VERTEX_JOB, INSTANCE_COUNT, cfg) {
+      cfg.count = info->instance_count;
+   }
+
+   pan_section_pack(job, MALLOC_VERTEX_JOB, ALLOCATION, cfg) {
+      if (secondary_shader) {
+         unsigned sz = panfrost_vertex_attribute_stride(vs, fs);
+         cfg.vertex_packet_stride = sz + 16;
+         cfg.vertex_attribute_stride = sz;
+      } else {
+         /* Hardware requirement for "no varyings" */
+         cfg.vertex_packet_stride = 16;
+         cfg.vertex_attribute_stride = 0;
+      }
+   }
+
+   pan_section_pack(job, MALLOC_VERTEX_JOB, TILER, cfg) {
+      cfg.address = jm_emit_tiler_desc(batch);
+   }
+
+   STATIC_ASSERT(sizeof(batch->scissor) == pan_size(SCISSOR));
+   memcpy(pan_section_ptr(job, MALLOC_VERTEX_JOB, SCISSOR), &batch->scissor,
+          pan_size(SCISSOR));
+
+   panfrost_emit_primitive_size(
+      ctx, info->mode == MESA_PRIM_POINTS, 0,
+      pan_section_ptr(job, MALLOC_VERTEX_JOB, PRIMITIVE_SIZE));
+
+   pan_section_pack(job, MALLOC_VERTEX_JOB, INDICES, cfg) {
+      cfg.address = batch->indices;
+   }
+
+   jm_emit_tiler_draw(pan_section_ptr(job, MALLOC_VERTEX_JOB, DRAW), batch,
+                      fs_required, u_reduced_prim(info->mode));
+
+   pan_section_pack(job, MALLOC_VERTEX_JOB, POSITION, cfg) {
+      jm_emit_shader_env(batch, &cfg, PIPE_SHADER_VERTEX,
+                         panfrost_get_position_shader(batch, info));
+   }
+
+   pan_section_pack(job, MALLOC_VERTEX_JOB, VARYING, cfg) {
+      /* If a varying shader is used, we configure it with the same
+       * state as the position shader for backwards compatible
+       * behaviour with Bifrost. This could be optimized.
+       */
+      if (!secondary_shader)
+         continue;
+
+      jm_emit_shader_env(batch, &cfg, PIPE_SHADER_VERTEX,
+                         panfrost_get_varying_shader(batch));
+   }
+}
+#endif
+
+#if PAN_ARCH <= 7
+static void
+jm_emit_tiler_job(struct panfrost_batch *batch,
+                  const struct pipe_draw_info *info,
+                  const struct pipe_draw_start_count_bias *draw,
+                  void *invocation_template, bool secondary_shader, void *job)
+{
+   struct panfrost_context *ctx = batch->ctx;
+
+   void *section = pan_section_ptr(job, TILER_JOB, INVOCATION);
+   memcpy(section, invocation_template, pan_size(INVOCATION));
+
+   jm_emit_primitive(batch, info, draw, secondary_shader,
+                     pan_section_ptr(job, TILER_JOB, PRIMITIVE));
+
+   void *prim_size = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE);
+   enum mesa_prim prim = u_reduced_prim(info->mode);
+
+#if PAN_ARCH >= 6
+   pan_section_pack(job, TILER_JOB, TILER, cfg) {
+      cfg.address = jm_emit_tiler_desc(batch);
+   }
+
+   pan_section_pack(job, TILER_JOB, PADDING, cfg)
+      ;
+#endif
+
+   jm_emit_tiler_draw(pan_section_ptr(job, TILER_JOB, DRAW), batch, true, prim);
+
+   panfrost_emit_primitive_size(ctx, prim == MESA_PRIM_POINTS,
+                                batch->varyings.psiz, prim_size);
+}
+#endif
+
+void
+GENX(jm_launch_xfb)(struct panfrost_batch *batch,
+                    const struct pipe_draw_info *info, unsigned count)
+{
+   struct panfrost_ptr t = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
+
+#if PAN_ARCH == 9
+   pan_section_pack(t.cpu, COMPUTE_JOB, PAYLOAD, cfg) {
+      cfg.workgroup_size_x = 1;
+      cfg.workgroup_size_y = 1;
+      cfg.workgroup_size_z = 1;
+
+      cfg.workgroup_count_x = count;
+      cfg.workgroup_count_y = info->instance_count;
+      cfg.workgroup_count_z = 1;
+
+      jm_emit_shader_env(batch, &cfg.compute, PIPE_SHADER_VERTEX,
+                         batch->rsd[PIPE_SHADER_VERTEX]);
+
+      /* TODO: Indexing. Also, this is a legacy feature... */
+      cfg.compute.attribute_offset = batch->ctx->offset_start;
+
+      /* Transform feedback shaders do not use barriers or shared
+       * memory, so we may merge workgroups.
+       */
+      cfg.allow_merging_workgroups = true;
+      cfg.task_increment = 1;
+      cfg.task_axis = MALI_TASK_AXIS_Z;
+   }
+#else
+   struct mali_invocation_packed invocation;
+
+   panfrost_pack_work_groups_compute(&invocation, 1, count,
+                                     info->instance_count, 1, 1, 1,
+                                     PAN_ARCH <= 5, false);
+
+   /* No varyings on XFB compute jobs. */
+   mali_ptr saved_vs_varyings = batch->varyings.vs;
+
+   batch->varyings.vs = 0;
+   jm_emit_vertex_job(batch, info, &invocation, t.cpu);
+   batch->varyings.vs = saved_vs_varyings;
+
+#endif
+   enum mali_job_type job_type = MALI_JOB_TYPE_COMPUTE;
+#if PAN_ARCH <= 5
+   job_type = MALI_JOB_TYPE_VERTEX;
+#endif
+   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc, job_type, true,
+                    false, 0, 0, &t, false);
+}
+
+#if PAN_ARCH < 9
+/*
+ * Push jobs required for the rasterization pipeline. If there are side effects
+ * from the vertex shader, these are handled ahead-of-time with a compute
+ * shader. This function should not be called if rasterization is skipped.
+ */
+static void
+jm_push_vertex_tiler_jobs(struct panfrost_batch *batch,
+                          const struct panfrost_ptr *vertex_job,
+                          const struct panfrost_ptr *tiler_job)
+{
+   unsigned vertex = panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
+                                      MALI_JOB_TYPE_VERTEX, false, false, 0, 0,
+                                      vertex_job, false);
+
+   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
+                    MALI_JOB_TYPE_TILER, false, false, vertex, 0, tiler_job,
+                    false);
+}
+#endif
+
+void
+GENX(jm_launch_draw)(struct panfrost_batch *batch,
+                     const struct pipe_draw_info *info, unsigned drawid_offset,
+                     const struct pipe_draw_start_count_bias *draw,
+                     unsigned vertex_count)
+{
+   struct panfrost_context *ctx = batch->ctx;
+   struct panfrost_compiled_shader *vs = ctx->prog[PIPE_SHADER_VERTEX];
+   bool secondary_shader = vs->info.vs.secondary_enable;
+   bool idvs = vs->info.vs.idvs;
+
+#if PAN_ARCH <= 7
+   struct mali_invocation_packed invocation;
+   if (info->instance_count > 1) {
+      panfrost_pack_work_groups_compute(&invocation, 1, vertex_count,
+                                        info->instance_count, 1, 1, 1, true,
+                                        false);
+   } else {
+      pan_pack(&invocation, INVOCATION, cfg) {
+         cfg.invocations = vertex_count - 1;
+         cfg.size_y_shift = 0;
+         cfg.size_z_shift = 0;
+         cfg.workgroups_x_shift = 0;
+         cfg.workgroups_y_shift = 0;
+         cfg.workgroups_z_shift = 32;
+         cfg.thread_group_split = MALI_SPLIT_MIN_EFFICIENT;
+      }
+   }
+
+   /* Emit all sort of descriptors. */
+#endif
+
+   UNUSED struct panfrost_ptr tiler, vertex;
+
+   if (idvs) {
+#if PAN_ARCH == 9
+      tiler = pan_pool_alloc_desc(&batch->pool.base, MALLOC_VERTEX_JOB);
+#elif PAN_ARCH >= 6
+      tiler = pan_pool_alloc_desc(&batch->pool.base, INDEXED_VERTEX_JOB);
+#else
+      unreachable("IDVS is unsupported on Midgard");
+#endif
+   } else {
+      vertex = pan_pool_alloc_desc(&batch->pool.base, COMPUTE_JOB);
+      tiler = pan_pool_alloc_desc(&batch->pool.base, TILER_JOB);
+   }
+
+#if PAN_ARCH == 9
+   assert(idvs && "Memory allocated IDVS required on Valhall");
+
+   jm_emit_malloc_vertex_job(batch, info, draw, secondary_shader, tiler.cpu);
+
+   panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
+                    MALI_JOB_TYPE_MALLOC_VERTEX, false, false, 0, 0, &tiler,
+                    false);
+#else
+   /* Fire off the draw itself */
+   jm_emit_tiler_job(batch, info, draw, &invocation, secondary_shader,
+                     tiler.cpu);
+   if (idvs) {
+#if PAN_ARCH >= 6
+      jm_emit_vertex_draw(
+         batch, pan_section_ptr(tiler.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
+
+      panfrost_add_job(&batch->pool.base, &batch->jm.jobs.vtc_jc,
+                       MALI_JOB_TYPE_INDEXED_VERTEX, false, false, 0, 0, &tiler,
+                       false);
+#endif
+   } else {
+      jm_emit_vertex_job(batch, info, &invocation, vertex.cpu);
+      jm_push_vertex_tiler_jobs(batch, &vertex, &tiler);
+   }
+#endif
+}
diff --git a/src/gallium/drivers/panfrost/pan_jm.h b/src/gallium/drivers/panfrost/pan_jm.h
index c3ce0644f2c..fda0d78dc36 100644
--- a/src/gallium/drivers/panfrost/pan_jm.h
+++ b/src/gallium/drivers/panfrost/pan_jm.h
@@ -38,4 +38,36 @@ struct panfrost_jm_batch {
    } jobs;
 };
 
+#if defined(PAN_ARCH) && PAN_ARCH < 10
+
+#include "genxml/gen_macros.h"
+
+struct panfrost_batch;
+struct panfrost_context;
+struct pan_fb_info;
+struct pipe_draw_info;
+struct pipe_grid_info;
+struct pipe_draw_start_count_bias;
+
+void GENX(jm_init_batch)(struct panfrost_batch *batch);
+int GENX(jm_submit_batch)(struct panfrost_batch *batch);
+
+void GENX(jm_preload_fb)(struct panfrost_batch *batch, struct pan_fb_info *fb);
+void GENX(jm_emit_fragment_job)(struct panfrost_batch *batch,
+                                const struct pan_fb_info *pfb);
+
+void GENX(jm_launch_xfb)(struct panfrost_batch *batch,
+                         const struct pipe_draw_info *info, unsigned count);
+
+void GENX(jm_launch_grid)(struct panfrost_batch *batch,
+                          const struct pipe_grid_info *info);
+
+void GENX(jm_launch_draw)(struct panfrost_batch *batch,
+                          const struct pipe_draw_info *info,
+                          unsigned drawid_offset,
+                          const struct pipe_draw_start_count_bias *draw,
+                          unsigned vertex_count);
+
+#endif /* PAN_ARCH < 10 */
+
 #endif