diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index 52434dff42e..9f406cd3578 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -2959,7 +2959,8 @@ agx_preprocess_nir(nir_shader *nir, const nir_shader *libagx,
          out->inputs_flat_shaded = masks.flat;
          out->inputs_linear_shaded = masks.linear;
       }
-   } else if (nir->info.stage == MESA_SHADER_VERTEX) {
+   } else if (nir->info.stage == MESA_SHADER_VERTEX ||
+              nir->info.stage == MESA_SHADER_TESS_EVAL) {
       out->has_edgeflags = nir->info.outputs_written & VARYING_BIT_EDGE;
       out->cull_distance_size = nir->info.cull_distance_array_size;
 
diff --git a/src/asahi/compiler/agx_nir_lower_cull_distance.c b/src/asahi/compiler/agx_nir_lower_cull_distance.c
index 8c7734e3342..58c15730bb2 100644
--- a/src/asahi/compiler/agx_nir_lower_cull_distance.c
+++ b/src/asahi/compiler/agx_nir_lower_cull_distance.c
@@ -9,6 +9,7 @@
 #include "agx_compile.h"
 #include "agx_nir.h"
 #include "glsl_types.h"
+#include "shader_enums.h"
 
 /*
  * Lower cull distance to discard. From the spec:
@@ -61,7 +62,9 @@ lower_write(nir_builder *b, nir_intrinsic_instr *intr, UNUSED void *data)
 bool
 agx_nir_lower_cull_distance_vs(nir_shader *s)
 {
-   assert(s->info.stage == MESA_SHADER_VERTEX);
+   assert(s->info.stage == MESA_SHADER_VERTEX ||
+          s->info.stage == MESA_SHADER_TESS_EVAL);
+
    assert(s->info.outputs_written & VARYING_BIT_CULL_DIST0);
 
    nir_shader_intrinsics_pass(
diff --git a/src/asahi/lib/agx_nir_lower_gs.c b/src/asahi/lib/agx_nir_lower_gs.c
index 238126e2492..fe8b3f64083 100644
--- a/src/asahi/lib/agx_nir_lower_gs.c
+++ b/src/asahi/lib/agx_nir_lower_gs.c
@@ -131,15 +131,10 @@ add_counter(nir_builder *b, nir_def *counter, nir_def *increment)
 }
 
 /* Helpers for lowering I/O to variables */
-struct lower_output_to_var_state {
-   nir_variable *outputs[NUM_TOTAL_VARYING_SLOTS];
-   bool arrayed;
-};
-
-static bool
-lower_output_to_var(nir_builder *b, nir_instr *instr, void *data)
+bool
+agx_lower_output_to_var(nir_builder *b, nir_instr *instr, void *data)
 {
-   struct lower_output_to_var_state *state = data;
+   struct agx_lower_output_to_var_state *state = data;
    if (instr->type != nir_instr_type_intrinsic)
       return false;
 
@@ -201,7 +196,7 @@ load_instance_id(nir_builder *b)
 static bool
 lower_gs_inputs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
 {
-   struct lower_output_to_var_state *vs_state = data;
+   struct agx_lower_output_to_var_state *vs_state = data;
    if (intr->intrinsic != nir_intrinsic_load_per_vertex_input)
       return false;
 
@@ -249,7 +244,7 @@ lower_id_in_prim(nir_builder *b, nir_instr *instr, void *data)
 static void
 agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs)
 {
-   struct lower_output_to_var_state state = {.arrayed = true};
+   struct agx_lower_output_to_var_state state = {.arrayed = true};
 
    /* Vertex shader outputs will be placed in arrays. Create those arrays. */
    u_foreach_bit64(slot, vs->info.outputs_written) {
@@ -278,7 +273,7 @@ agx_nir_link_vs_gs(nir_shader *vs, nir_shader *gs)
 
    /* The vertex shader needs to be expressed in terms of that index */
    nir_function_instructions_pass(
-      vs_function->impl, lower_output_to_var,
+      vs_function->impl, agx_lower_output_to_var,
       nir_metadata_block_index | nir_metadata_dominance, &state);
 
    nir_function_instructions_pass(
@@ -1144,7 +1139,7 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
       *gs_count = NULL;
 
    /* Geometry shader outputs are staged to temporaries */
-   struct lower_output_to_var_state state = {.arrayed = false};
+   struct agx_lower_output_to_var_state state = {.arrayed = false};
 
    u_foreach_bit64(slot, gs->info.outputs_written) {
       const char *slot_name =
@@ -1165,7 +1160,7 @@ agx_nir_lower_gs(nir_shader *gs, nir_shader *vs, const nir_shader *libagx,
       gs_state.stride_B += size_B;
    }
 
-   NIR_PASS(_, gs, nir_shader_instructions_pass, lower_output_to_var,
+   NIR_PASS(_, gs, nir_shader_instructions_pass, agx_lower_output_to_var,
             nir_metadata_block_index | nir_metadata_dominance, &state);
 
    /* Set flatshade_first. For now this is always a constant, but in the future
diff --git a/src/asahi/lib/agx_nir_lower_gs.h b/src/asahi/lib/agx_nir_lower_gs.h
index 3925f07e5da..e2799c748e2 100644
--- a/src/asahi/lib/agx_nir_lower_gs.h
+++ b/src/asahi/lib/agx_nir_lower_gs.h
@@ -7,11 +7,25 @@
 #define __AGX_NIR_LOWER_GS_H
 
 #include <stdbool.h>
+#include <stdint.h>
+#include "shader_enums.h"
 
 struct nir_shader;
 struct agx_ia_key;
 enum mesa_prim;
 
+struct nir_instr;
+struct nir_builder;
+struct nir_variable;
+
+struct agx_lower_output_to_var_state {
+   struct nir_variable *outputs[NUM_TOTAL_VARYING_SLOTS];
+   bool arrayed;
+};
+
+bool agx_lower_output_to_var(struct nir_builder *b, struct nir_instr *instr,
+                             void *data);
+
 bool agx_nir_lower_ia(struct nir_shader *s, struct agx_ia_key *ia);
 
 bool agx_nir_lower_multidraw(struct nir_shader *s, struct agx_ia_key *key);
@@ -33,4 +47,13 @@ struct nir_shader *agx_nir_unroll_restart(const struct nir_shader *libagx,
                                           enum mesa_prim prim,
                                           unsigned index_size_B);
 
+bool agx_nir_lower_tcs(struct nir_shader *tcs, const struct nir_shader *vs,
+                       const struct nir_shader *libagx, uint8_t index_size_B);
+
+bool agx_nir_lower_tes(struct nir_shader *tes, const struct nir_shader *libagx);
+
+uint64_t agx_tcs_per_vertex_outputs(const struct nir_shader *nir);
+
+unsigned agx_tcs_output_stride(const struct nir_shader *nir);
+
 #endif
diff --git a/src/asahi/lib/agx_nir_lower_ia.c b/src/asahi/lib/agx_nir_lower_ia.c
index c14e59191e3..2c97bab1fb0 100644
--- a/src/asahi/lib/agx_nir_lower_ia.c
+++ b/src/asahi/lib/agx_nir_lower_ia.c
@@ -12,6 +12,7 @@
 #include "nir.h"
 #include "nir_builder_opcodes.h"
 #include "nir_intrinsics.h"
+#include "shader_enums.h"
 
 /*
  * This file implements input assembly in software for geometry/tessellation
@@ -27,14 +28,60 @@
  * This multidraw implementation kicks off the prefix sum and lowered draw.
  */
 
+/*
+ * Sync with geometry.cl, this is preferred to avoid NIR needing to chew through
+ * the massive switch statement (bad for compile time).
+ */
+static nir_def *
+vertex_id_for_topology(nir_builder *b, struct agx_ia_key *key)
+{
+   nir_def *prim = nir_load_primitive_id(b);
+   nir_def *vert = nir_load_vertex_id_in_primitive_agx(b);
+   nir_def *flatshade_first = nir_imm_bool(b, key->flatshade_first);
+
+   switch (key->mode) {
+   case MESA_PRIM_POINTS:
+   case MESA_PRIM_LINES:
+   case MESA_PRIM_TRIANGLES:
+   case MESA_PRIM_LINES_ADJACENCY:
+   case MESA_PRIM_TRIANGLES_ADJACENCY:
+      return nir_iadd(
+         b, nir_imul_imm(b, prim, mesa_vertices_per_prim(key->mode)), vert);
+
+   case MESA_PRIM_LINE_LOOP:
+      return libagx_vertex_id_for_line_loop(b, prim, vert,
+                                            nir_load_num_vertices(b));
+
+   case MESA_PRIM_LINE_STRIP:
+   case MESA_PRIM_LINE_STRIP_ADJACENCY:
+      return nir_iadd(b, prim, vert);
+
+   case MESA_PRIM_TRIANGLE_STRIP: {
+      return nir_iadd(
+         b, prim,
+         libagx_map_vertex_in_tri_strip(b, prim, vert, flatshade_first));
+   }
+
+   case MESA_PRIM_TRIANGLE_FAN:
+      return libagx_vertex_id_for_tri_fan(b, prim, vert, flatshade_first);
+
+   case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return libagx_vertex_id_for_tri_strip_adj(
+         b, prim, vert, nir_load_num_vertices(b), flatshade_first);
+
+   case MESA_PRIM_PATCHES:
+      return nir_iadd(b, nir_imul(b, prim, nir_load_patch_vertices_in(b)),
+                      nir_load_invocation_id(b));
+
+   default:
+      unreachable("invalid mode");
+   }
+}
+
 static nir_def *
 load_vertex_id(nir_builder *b, struct agx_ia_key *key)
 {
-   /* Tessellate by primitive mode */
-   nir_def *id = libagx_vertex_id_for_topology(
-      b, nir_imm_int(b, key->mode), nir_imm_bool(b, key->flatshade_first),
-      nir_load_primitive_id(b), nir_load_vertex_id_in_primitive_agx(b),
-      nir_load_num_vertices(b));
+   nir_def *id = vertex_id_for_topology(b, key);
 
    /* If drawing with an index buffer, pull the vertex ID. Otherwise, the
     * vertex ID is just the index as-is.
diff --git a/src/asahi/lib/agx_nir_lower_tess.c b/src/asahi/lib/agx_nir_lower_tess.c
new file mode 100644
index 00000000000..6381139d218
--- /dev/null
+++ b/src/asahi/lib/agx_nir_lower_tess.c
@@ -0,0 +1,395 @@
+/*
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "shaders/geometry.h"
+#include "util/bitscan.h"
+#include "util/macros.h"
+#include "agx_nir_lower_gs.h"
+#include "glsl_types.h"
+#include "libagx_shaders.h"
+#include "nir.h"
+#include "nir_builder.h"
+#include "nir_builder_opcodes.h"
+#include "nir_intrinsics.h"
+#include "nir_intrinsics_indices.h"
+#include "shader_enums.h"
+
+struct tcs_state {
+   struct agx_lower_output_to_var_state vs_vars;
+   uint64_t vs_outputs_written;
+};
+
+static nir_def *
+tcs_patch_id(nir_builder *b)
+{
+   return nir_channel(b, nir_load_workgroup_id(b), 0);
+}
+
+static nir_def *
+tcs_instance_id(nir_builder *b)
+{
+   return nir_channel(b, nir_load_workgroup_id(b), 1);
+}
+
+static nir_def *
+tcs_unrolled_id(nir_builder *b)
+{
+   nir_def *stride = nir_channel(b, nir_load_num_workgroups(b), 0);
+
+   return nir_iadd(b, nir_imul(b, tcs_instance_id(b), stride), tcs_patch_id(b));
+}
+
+uint64_t
+agx_tcs_per_vertex_outputs(const nir_shader *nir)
+{
+   return nir->info.outputs_written &
+          ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER |
+            VARYING_BIT_BOUNDING_BOX0 | VARYING_BIT_BOUNDING_BOX1);
+}
+
+unsigned
+agx_tcs_output_stride(const nir_shader *nir)
+{
+   return libagx_tcs_out_stride(util_last_bit(nir->info.patch_outputs_written),
+                                nir->info.tess.tcs_vertices_out,
+                                agx_tcs_per_vertex_outputs(nir));
+}
+
+static nir_def *
+tcs_out_addr(nir_builder *b, nir_intrinsic_instr *intr, nir_def *vertex_id)
+{
+   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+
+   nir_def *offset = nir_get_io_offset_src(intr)->ssa;
+   nir_def *addr = libagx_tcs_out_address(
+      b, nir_load_tess_param_buffer_agx(b), tcs_unrolled_id(b), vertex_id,
+      nir_iadd_imm(b, offset, sem.location),
+      nir_imm_int(b, util_last_bit(b->shader->info.patch_outputs_written)),
+      nir_imm_int(b, b->shader->info.tess.tcs_vertices_out),
+      nir_imm_int64(b, agx_tcs_per_vertex_outputs(b->shader)));
+
+   addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
+
+   return addr;
+}
+
+static nir_def *
+lower_tes_load(nir_builder *b, nir_intrinsic_instr *intr)
+{
+   gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
+   nir_src *offset_src = nir_get_io_offset_src(intr);
+
+   nir_def *vertex = nir_imm_int(b, 0);
+   nir_def *offset = offset_src ? offset_src->ssa : nir_imm_int(b, 0);
+
+   if (intr->intrinsic == nir_intrinsic_load_per_vertex_input)
+      vertex = intr->src[0].ssa;
+
+   nir_def *addr = libagx_tes_in_address(b, nir_load_tess_param_buffer_agx(b),
+                                         nir_load_vertex_id(b), vertex,
+                                         nir_iadd_imm(b, offset, location));
+
+   if (nir_intrinsic_has_component(intr))
+      addr = nir_iadd_imm(b, addr, nir_intrinsic_component(intr) * 4);
+
+   return nir_load_global_constant(b, addr, 4, intr->def.num_components,
+                                   intr->def.bit_size);
+}
+
+static nir_def *
+tcs_load_input(nir_builder *b, nir_intrinsic_instr *intr,
+               struct tcs_state *state)
+{
+   nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+
+   nir_def *off = libagx_tcs_in_offset(
+      b, intr->src[0].ssa, nir_iadd_imm(b, intr->src[1].ssa, sem.location),
+      nir_imm_int64(b, state->vs_outputs_written));
+
+   off = nir_iadd_imm(b, off, 4 * nir_intrinsic_component(intr));
+
+   return nir_load_shared(b, intr->def.num_components, 32, off);
+}
+
+static nir_def *
+lower_tcs_impl(nir_builder *b, nir_intrinsic_instr *intr,
+               struct tcs_state *state)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_barrier:
+      /* A patch fits in a subgroup, so the barrier is unnecessary. */
+      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
+
+   case nir_intrinsic_load_primitive_id:
+      return tcs_patch_id(b);
+
+   case nir_intrinsic_load_instance_id:
+      return tcs_instance_id(b);
+
+   case nir_intrinsic_load_invocation_id:
+      return nir_channel(b, nir_load_local_invocation_id(b), 0);
+
+   case nir_intrinsic_load_per_vertex_input:
+      return tcs_load_input(b, intr, state);
+
+   case nir_intrinsic_load_patch_vertices_in:
+      return libagx_tcs_patch_vertices_in(b, nir_load_tess_param_buffer_agx(b));
+
+   case nir_intrinsic_load_tess_level_outer_default:
+      return libagx_tess_level_outer_default(b,
+                                             nir_load_tess_param_buffer_agx(b));
+
+   case nir_intrinsic_load_tess_level_inner_default:
+      return libagx_tess_level_inner_default(b,
+                                             nir_load_tess_param_buffer_agx(b));
+
+   case nir_intrinsic_load_output: {
+      nir_def *addr = tcs_out_addr(b, intr, nir_undef(b, 1, 32));
+      return nir_load_global(b, addr, 4, intr->def.num_components,
+                             intr->def.bit_size);
+   }
+
+   case nir_intrinsic_load_per_vertex_output: {
+      nir_def *addr = tcs_out_addr(b, intr, intr->src[0].ssa);
+      return nir_load_global(b, addr, 4, intr->def.num_components,
+                             intr->def.bit_size);
+   }
+
+   case nir_intrinsic_store_output: {
+      nir_store_global(b, tcs_out_addr(b, intr, nir_undef(b, 1, 32)), 4,
+                       intr->src[0].ssa, nir_intrinsic_write_mask(intr));
+      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
+   }
+
+   case nir_intrinsic_store_per_vertex_output: {
+      nir_store_global(b, tcs_out_addr(b, intr, intr->src[1].ssa), 4,
+                       intr->src[0].ssa, nir_intrinsic_write_mask(intr));
+      return NIR_LOWER_INSTR_PROGRESS_REPLACE;
+   }
+
+   default:
+      return NULL;
+   }
+}
+
+static bool
+lower_tcs(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   b->cursor = nir_before_instr(&intr->instr);
+
+   nir_def *repl = lower_tcs_impl(b, intr, data);
+   if (!repl)
+      return false;
+
+   if (repl != NIR_LOWER_INSTR_PROGRESS_REPLACE)
+      nir_def_rewrite_uses(&intr->def, repl);
+
+   nir_instr_remove(&intr->instr);
+   return true;
+}
+
+static void
+link_libagx(nir_shader *nir, const nir_shader *libagx)
+{
+   nir_link_shader_functions(nir, libagx);
+   NIR_PASS(_, nir, nir_inline_functions);
+   nir_remove_non_entrypoints(nir);
+   NIR_PASS(_, nir, nir_lower_indirect_derefs, nir_var_function_temp, 64);
+   NIR_PASS(_, nir, nir_opt_dce);
+   NIR_PASS(_, nir, nir_lower_vars_to_explicit_types, nir_var_function_temp,
+            glsl_get_cl_type_size_align);
+   NIR_PASS(_, nir, nir_opt_deref);
+   NIR_PASS(_, nir, nir_lower_vars_to_ssa);
+   NIR_PASS(_, nir, nir_lower_explicit_io,
+            nir_var_shader_temp | nir_var_function_temp | nir_var_mem_shared |
+               nir_var_mem_global,
+            nir_address_format_62bit_generic);
+}
+
+/*
+ * Predicate the TCS so the merged shader works when input patch size > output
+ * patch size.
+ */
+static bool
+agx_nir_predicate_tcs(nir_shader *tcs)
+{
+   nir_function_impl *entry = nir_shader_get_entrypoint(tcs);
+   nir_cf_list list;
+   nir_cf_extract(&list, nir_before_impl(entry), nir_after_impl(entry));
+
+   nir_builder b = nir_builder_at(nir_after_block(nir_start_block(entry)));
+   nir_def *input_vtx_id = nir_load_invocation_id(&b);
+   unsigned verts = tcs->info.tess.tcs_vertices_out;
+
+   nir_push_if(&b, nir_ult_imm(&b, input_vtx_id, verts));
+   {
+      nir_cf_reinsert(&list, b.cursor);
+   }
+   nir_pop_if(&b, NULL);
+
+   nir_metadata_preserve(entry, nir_metadata_none);
+   return false;
+}
+
+bool
+agx_nir_lower_tcs(nir_shader *tcs, const nir_shader *vs,
+                  const struct nir_shader *libagx, uint8_t index_size_B)
+{
+   agx_nir_predicate_tcs(tcs);
+
+   nir_function_impl *tcs_entry = nir_shader_get_entrypoint(tcs);
+
+   /* Link the vertex shader with the TCS. This assumes that all functions have
+    * been inlined in the vertex shader.
+    */
+   nir_function_impl *vs_entry = nir_shader_get_entrypoint(vs);
+   nir_function *vs_function = nir_function_create(tcs, "vertex");
+   vs_function->impl = nir_function_impl_clone(tcs, vs_entry);
+   vs_function->impl->function = vs_function;
+
+   /* Vertex shader outputs are staged to temporaries */
+   struct tcs_state state = {
+      .vs_vars.arrayed = false,
+      .vs_outputs_written = vs->info.outputs_written & tcs->info.inputs_read,
+   };
+
+   u_foreach_bit64(slot, vs->info.outputs_written) {
+      const char *slot_name =
+         gl_varying_slot_name_for_stage(slot, MESA_SHADER_VERTEX);
+
+      state.vs_vars.outputs[slot] = nir_variable_create(
+         tcs, nir_var_shader_temp, glsl_uvec4_type(), slot_name);
+   }
+
+   nir_function_instructions_pass(
+      vs_function->impl, agx_lower_output_to_var,
+      nir_metadata_block_index | nir_metadata_dominance, &state.vs_vars);
+
+   /* Invoke the VS first for each vertex in the input patch */
+   nir_builder b_ = nir_builder_at(nir_before_impl(tcs_entry));
+   nir_builder *b = &b_;
+
+   nir_def *input_vtx_id = nir_load_invocation_id(b);
+   nir_push_if(b, nir_ult(b, input_vtx_id, nir_load_patch_vertices_in(b)));
+   {
+      nir_inline_function_impl(b, vs_function->impl, NULL, NULL);
+
+      /* To handle cross-invocation VS output reads, dump everything in
+       * shared local memory.
+       *
+       * TODO: Optimize to registers.
+       */
+      u_foreach_bit64(slot, state.vs_outputs_written) {
+         nir_def *off =
+            libagx_tcs_in_offset(b, input_vtx_id, nir_imm_int(b, slot),
+                                 nir_imm_int64(b, state.vs_outputs_written));
+
+         nir_store_shared(b, nir_load_var(b, state.vs_vars.outputs[slot]), off,
+                          .write_mask = nir_component_mask(4));
+      }
+   }
+   nir_pop_if(b, NULL);
+
+   /* Clean up after inlining VS into TCS */
+   exec_node_remove(&vs_function->node);
+   nir_lower_global_vars_to_local(tcs);
+
+   /* Lower I/A. TODO: Indirect multidraws */
+   agx_nir_lower_ia(tcs, &(struct agx_ia_key){
+                            .index_size = index_size_B,
+                            .mode = MESA_PRIM_PATCHES,
+                         });
+
+   /* Lower TCS outputs */
+   nir_shader_intrinsics_pass(tcs, lower_tcs,
+                              nir_metadata_block_index | nir_metadata_dominance,
+                              &state);
+   link_libagx(tcs, libagx);
+   nir_metadata_preserve(b->impl, nir_metadata_none);
+   return true;
+}
+
+static nir_def *
+lower_tes_impl(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_tess_coord_xy:
+      return libagx_load_tess_coord(b, nir_load_tess_param_buffer_agx(b),
+                                    nir_load_vertex_id(b));
+
+   case nir_intrinsic_load_primitive_id:
+      return libagx_tes_patch_id(b, nir_load_tess_param_buffer_agx(b),
+                                 nir_load_vertex_id(b));
+
+   case nir_intrinsic_load_input:
+   case nir_intrinsic_load_per_vertex_input:
+   case nir_intrinsic_load_tess_level_inner:
+   case nir_intrinsic_load_tess_level_outer:
+      return lower_tes_load(b, intr);
+
+   case nir_intrinsic_load_patch_vertices_in:
+      return libagx_tes_patch_vertices_in(b, nir_load_tess_param_buffer_agx(b));
+
+   default:
+      return NULL;
+   }
+}
+
+static bool
+lower_tes(nir_builder *b, nir_intrinsic_instr *intr, void *data)
+{
+   b->cursor = nir_before_instr(&intr->instr);
+   nir_def *repl = lower_tes_impl(b, intr, data);
+
+   if (repl) {
+      nir_def_rewrite_uses(&intr->def, repl);
+      nir_instr_remove(&intr->instr);
+      return true;
+   } else {
+      return false;
+   }
+}
+
+static int
+glsl_type_size(const struct glsl_type *type, bool bindless)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
+bool
+agx_nir_lower_tes(nir_shader *tes, const nir_shader *libagx)
+{
+   nir_lower_tess_coord_z(
+      tes, tes->info.tess._primitive_mode == TESS_PRIMITIVE_TRIANGLES);
+
+   nir_shader_intrinsics_pass(
+      tes, lower_tes, nir_metadata_block_index | nir_metadata_dominance, NULL);
+
+   /* Points mode renders as points, make sure we write point size for the HW */
+   if (tes->info.tess.point_mode &&
+       !(tes->info.outputs_written & VARYING_BIT_PSIZ)) {
+
+      nir_function_impl *impl = nir_shader_get_entrypoint(tes);
+      nir_builder b = nir_builder_at(nir_after_impl(impl));
+
+      nir_store_output(&b, nir_imm_float(&b, 1.0), nir_imm_int(&b, 0),
+                       .io_semantics.location = VARYING_SLOT_PSIZ,
+                       .write_mask = nir_component_mask(1), .range = 1);
+
+      tes->info.outputs_written |= VARYING_BIT_PSIZ;
+   }
+
+   /* We lower to a HW VS, so update the shader info so the compiler does the
+    * right thing.
+    */
+   tes->info.stage = MESA_SHADER_VERTEX;
+   memset(&tes->info.vs, 0, sizeof(tes->info.vs));
+   tes->info.vs.tes_agx = true;
+
+   link_libagx(tes, libagx);
+   nir_lower_idiv(tes, &(nir_lower_idiv_options){.allow_fp16 = true});
+   nir_metadata_preserve(nir_shader_get_entrypoint(tes), nir_metadata_none);
+   return true;
+}
diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build
index ebfb5a83125..856bfe7a344 100644
--- a/src/asahi/lib/meson.build
+++ b/src/asahi/lib/meson.build
@@ -17,6 +17,7 @@ libasahi_lib_files = files(
   'agx_nir_lower_ia.c',
   'agx_nir_lower_msaa.c',
   'agx_nir_lower_sample_intrinsics.c',
+  'agx_nir_lower_tess.c',
   'agx_nir_lower_tilebuffer.c',
   'agx_nir_lower_vbo.c',
   'agx_nir_predicate_layer_id.c',
@@ -32,6 +33,8 @@ libagx_shader_files = files(
   'shaders/libagx.h',
   'shaders/geometry.cl',
   'shaders/geometry.h',
+  'shaders/tessellation.cl',
+  'shaders/tessellator.cl',
   'shaders/texture.cl',
 )
 
diff --git a/src/asahi/lib/shaders/geometry.cl b/src/asahi/lib/shaders/geometry.cl
index 366d66846b6..d1b68b8e632 100644
--- a/src/asahi/lib/shaders/geometry.cl
+++ b/src/asahi/lib/shaders/geometry.cl
@@ -40,7 +40,80 @@ libagx_xfb_vertex_address(global struct agx_geometry_params *p, uint base_index,
    return (uintptr_t)(p->xfb_base[buffer]) + xfb_offset;
 }
 
-/* TODO: Primitive restart */
+uint
+libagx_vertex_id_for_line_loop(uint prim, uint vert, uint num_prims)
+{
+   /* (0, 1), (1, 2), (2, 0) */
+   if (prim == (num_prims - 1) && vert == 1)
+      return 0;
+   else
+      return prim + vert;
+}
+
+uint
+libagx_vertex_id_for_tri_fan(uint prim, uint vert, bool flatshade_first)
+{
+   /* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking
+    * first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last.
+    * Piglit clipflat expects us to switch between these orders depending on
+    * provoking vertex, to avoid trivializing the fan.
+    *
+    * Rotate accordingly.
+    */
+   if (flatshade_first)
+      vert = (vert + 1) % 3;
+
+   /* The simpler form assuming last is provoking. */
+   return (vert == 0) ? 0 : prim + vert;
+}
+
+uint
+libagx_vertex_id_for_tri_strip_adj(uint prim, uint vert, uint num_prims,
+                                   bool flatshade_first)
+{
+   /* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency".
+    *
+    * There are different cases for first/middle/last/only primitives and for
+    * odd/even primitives.  Determine which case we're in.
+    */
+   bool last = prim == (num_prims - 1);
+   bool first = prim == 0;
+   bool even = (prim & 1) == 0;
+   bool even_or_first = even || first;
+
+   /* When the last vertex is provoking, we rotate the primitives
+    * accordingly. This seems required for OpenGL.
+    */
+   if (!flatshade_first && !even_or_first) {
+      vert = (vert + 4u) % 6u;
+   }
+
+   /* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily,
+    * there are lots of patterns we can exploit, avoiding a full 6x6 LUT.
+    *
+    * Here we assume the first vertex is provoking, the Vulkan default.
+    */
+   uint offsets[6] = {
+      0,
+      first ? 1 : (even ? -2 : 3),
+      even_or_first ? 2 : 4,
+      last ? 5 : 6,
+      even_or_first ? 4 : 2,
+      even_or_first ? 3 : -2,
+   };
+
+   /* Ensure NIR can see thru the local array */
+   uint offset = 0;
+   for (uint i = 1; i < 6; ++i) {
+      if (i == vert)
+         offset = offsets[i];
+   }
+
+   /* Finally add to the base of the primitive */
+   return (prim * 2) + offset;
+}
+
+/* Sync with agx_nir_lower_ia.c, this is for the restart unrolling */
 uint
 libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first,
                               uint prim, uint vert, uint num_prims)
@@ -50,24 +123,17 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first,
    case MESA_PRIM_LINES:
    case MESA_PRIM_TRIANGLES:
    case MESA_PRIM_LINES_ADJACENCY:
-   case MESA_PRIM_TRIANGLES_ADJACENCY: {
+   case MESA_PRIM_TRIANGLES_ADJACENCY:
       /* Regular primitive: every N vertices defines a primitive */
       return (prim * mesa_vertices_per_prim(mode)) + vert;
-   }
 
-   case MESA_PRIM_LINE_LOOP: {
-      /* (0, 1), (1, 2), (2, 0) */
-      if (prim == (num_prims - 1) && vert == 1)
-         return 0;
-      else
-         return prim + vert;
-   }
+   case MESA_PRIM_LINE_LOOP:
+      return libagx_vertex_id_for_line_loop(prim, vert, num_prims);
 
    case MESA_PRIM_LINE_STRIP:
-   case MESA_PRIM_LINE_STRIP_ADJACENCY: {
+   case MESA_PRIM_LINE_STRIP_ADJACENCY:
       /* (i, i + 1) or (i, ..., i + 3) */
       return prim + vert;
-   }
 
    case MESA_PRIM_TRIANGLE_STRIP: {
       /* Order depends on the provoking vert.
@@ -80,66 +146,14 @@ libagx_vertex_id_for_topology(enum mesa_prim mode, bool flatshade_first,
       return prim + libagx_map_vertex_in_tri_strip(prim, vert, flatshade_first);
    }
 
-   case MESA_PRIM_TRIANGLE_FAN: {
-      /* Vulkan spec section 20.1.7 gives (i + 1, i + 2, 0) for a provoking
-       * first. OpenGL instead wants (0, i + 1, i + 2) with a provoking last.
-       * Piglit clipflat expects us to switch between these orders depending on
-       * provoking vertex, to avoid trivializing the fan.
-       *
-       * Rotate accordingly.
-       */
-      if (flatshade_first)
-         vert = (vert + 1) % 3;
+   case MESA_PRIM_TRIANGLE_FAN:
+      return libagx_vertex_id_for_tri_fan(prim, vert, flatshade_first);
 
-      /* The simpler form assuming last is provoking. */
-      return (vert == 0) ? 0 : prim + vert;
-   }
-
-   case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY: {
-      /* See Vulkan spec section 20.1.11 "Triangle Strips With Adjancency".
-       *
-       * There are different cases for first/middle/last/only primitives and for
-       * odd/even primitives.  Determine which case we're in.
-       */
-      bool last = prim == (num_prims - 1);
-      bool first = prim == 0;
-      bool even = (prim & 1) == 0;
-      bool even_or_first = even || first;
-
-      /* When the last vertex is provoking, we rotate the primitives
-       * accordingly. This seems required for OpenGL.
-       */
-      if (!flatshade_first && !even_or_first) {
-         vert = (vert + 4u) % 6u;
-      }
-
-      /* Offsets per the spec. The spec lists 6 cases with 6 offsets. Luckily,
-       * there are lots of patterns we can exploit, avoiding a full 6x6 LUT.
-       *
-       * Here we assume the first vertex is provoking, the Vulkan default.
-       */
-      uint offsets[6] = {
-         0,
-         first ? 1 : (even ? -2 : 3),
-         even_or_first ? 2 : 4,
-         last ? 5 : 6,
-         even_or_first ? 4 : 2,
-         even_or_first ? 3 : -2,
-      };
-
-      /* Ensure NIR can see thru the local array */
-      uint offset = 0;
-      for (uint i = 1; i < 6; ++i) {
-         if (i == vert)
-            offset = offsets[i];
-      }
-
-      /* Finally add to the base of the primitive */
-      return (prim * 2) + offset;
-   }
+   case MESA_PRIM_TRIANGLE_STRIP_ADJACENCY:
+      return libagx_vertex_id_for_tri_strip_adj(prim, vert, num_prims,
+                                                flatshade_first);
 
    default:
-      /* Invalid */
       return 0;
    }
 }
diff --git a/src/asahi/lib/shaders/geometry.h b/src/asahi/lib/shaders/geometry.h
index fe7e220d567..b2d5dc54dbe 100644
--- a/src/asahi/lib/shaders/geometry.h
+++ b/src/asahi/lib/shaders/geometry.h
@@ -8,13 +8,16 @@
 #include "libagx.h"
 
 #ifndef __OPENCL_VERSION__
+#include "util/bitscan.h"
 #include "util/macros.h"
-#define GLOBAL(type_) uint64_t
-#define CONST(type_)  uint64_t
+#define GLOBAL(type_)      uint64_t
+#define CONST(type_)       uint64_t
+#define libagx_popcount(x) util_bitcount64(x)
 #else
 #define PACKED
-#define GLOBAL(type_) global type_ *
-#define CONST(type_)  constant type_ *
+#define GLOBAL(type_)      global type_ *
+#define CONST(type_)       constant type_ *
+#define libagx_popcount(x) popcount(x)
 #endif
 
 #ifndef LIBAGX_GEOMETRY_H
@@ -156,4 +159,116 @@ struct agx_geometry_params {
    uint32_t count_buffer_stride;
 } PACKED;
 
+struct agx_tess_params {
+   /* Persistent (cross-draw) geometry state */
+   GLOBAL(struct agx_geometry_state) state;
+
+   /* Patch coordinate offsets in patch_coord_buffer, indexed by patch ID. */
+   GLOBAL(uint) patch_coord_offs;
+
+   /* Patch coordinate buffer, indexed as:
+    *
+    *    patch_coord_offs[patch_ID] + vertex_in_patch
+    *
+    * Currently float2s, but we might be able to compact later?
+    */
+   GLOBAL(float2) patch_coord_buffer;
+
+   /* Tessellation control shader output buffer, indexed by patch ID. */
+   GLOBAL(uchar) tcs_buffer;
+
+   /* Bitfield of TCS per-vertex outputs */
+   uint64_t tcs_per_vertex_outputs;
+
+   /* Default tess levels used in OpenGL when there is no TCS in the pipeline.
+    * Unused in Vulkan and OpenGL ES.
+    */
+   float tess_level_outer_default[4];
+   float tess_level_inner_default[4];
+
+   /* Number of vertices in the input patch */
+   uint input_patch_size;
+
+   /* Number of vertices in the TCS output patch */
+   uint output_patch_size;
+
+   /* Number of patch constants written by TCS */
+   uint tcs_patch_constants;
+
+   /* Number of input patches per instance of the VS/TCS */
+   uint patches_per_instance;
+} PACKED;
+
+/* TCS shared memory layout:
+ *
+ *    vec4 vs_outputs[VERTICES_IN_INPUT_PATCH][TOTAL_VERTEX_OUTPUTS];
+ *
+ * TODO: compact.
+ */
+static inline ushort
+libagx_tcs_in_offs(uint vtx, gl_varying_slot location,
+                   uint64_t crosslane_vs_out_mask)
+{
+   uint base = vtx * libagx_popcount(crosslane_vs_out_mask);
+   uint offs = libagx_popcount(crosslane_vs_out_mask &
+                               (((uint64_t)(1) << location) - 1));
+
+   return (base + offs) * 16;
+}
+
+static inline uint
+libagx_tcs_in_size(uint32_t vertices_in_patch, uint64_t crosslane_vs_out_mask)
+{
+   return libagx_tcs_in_offs(vertices_in_patch - 1, VARYING_SLOT_VAR31,
+                             crosslane_vs_out_mask);
+}
+
+/*
+ * TCS out buffer layout, per-patch:
+ *
+ *    float tess_level_outer[4];
+ *    float tess_level_inner[2];
+ *    vec4 patch_out[MAX_PATCH_OUTPUTS];
+ *    vec4 vtx_out[OUT_PATCH_SIZE][TOTAL_VERTEX_OUTPUTS];
+ *
+ * Vertex out are compacted based on the mask of written out. Patch
+ * out are used as-is.
+ *
+ * Bounding boxes are ignored.
+ */
+static inline uint
+libagx_tcs_out_offs(uint vtx_id, gl_varying_slot location, uint nr_patch_out,
+                    uint out_patch_size, uint64_t vtx_out_mask)
+{
+   uint off = 0;
+   if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
+      return off;
+
+   off += 4 * sizeof(float);
+   if (location == VARYING_SLOT_TESS_LEVEL_INNER)
+      return off;
+
+   off += 2 * sizeof(float);
+   if (location >= VARYING_SLOT_PATCH0)
+      return off + (16 * (location - VARYING_SLOT_PATCH0));
+
+   /* Anything else is a per-vtx output */
+   off += 16 * nr_patch_out;
+   off += 16 * vtx_id * libagx_popcount(vtx_out_mask);
+
+   uint idx = libagx_popcount(vtx_out_mask & (((uint64_t)(1) << location) - 1));
+   return off + (16 * idx);
+}
+
+static inline uint
+libagx_tcs_out_stride(uint nr_patch_out, uint out_patch_size,
+                      uint64_t vtx_out_mask)
+{
+   return libagx_tcs_out_offs(out_patch_size, VARYING_SLOT_VAR0, nr_patch_out,
+                              out_patch_size, vtx_out_mask);
+}
+
+/* In a tess eval shader, stride for hw vertex ID */
+#define LIBAGX_TES_PATCH_ID_STRIDE 8192
+
 #endif
diff --git a/src/asahi/lib/shaders/tessellation.cl b/src/asahi/lib/shaders/tessellation.cl
new file mode 100644
index 00000000000..c4d549b9ff7
--- /dev/null
+++ b/src/asahi/lib/shaders/tessellation.cl
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2023 Alyssa Rosenzweig
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "geometry.h"
+
+uint
+libagx_tcs_patch_vertices_in(constant struct agx_tess_params *p)
+{
+   return p->input_patch_size;
+}
+
+uint
+libagx_tes_patch_vertices_in(constant struct agx_tess_params *p)
+{
+   return p->output_patch_size;
+}
+
+ushort
+libagx_tcs_in_offset(uint vtx, gl_varying_slot location,
+                     uint64_t crosslane_vs_out_mask)
+{
+   return libagx_tcs_in_offs(vtx, location, crosslane_vs_out_mask);
+}
+
+uintptr_t
+libagx_tcs_out_address(constant struct agx_tess_params *p, uint patch_id,
+                       uint vtx_id, gl_varying_slot location, uint nr_patch_out,
+                       uint out_patch_size, uint64_t vtx_out_mask)
+{
+   uint stride =
+      libagx_tcs_out_stride(nr_patch_out, out_patch_size, vtx_out_mask);
+
+   uint offs = libagx_tcs_out_offs(vtx_id, location, nr_patch_out,
+                                   out_patch_size, vtx_out_mask);
+
+   return (uintptr_t)(p->tcs_buffer) + (patch_id * stride) + offs;
+}
+
+static uint
+libagx_tes_unrolled_patch_id(uint raw_id)
+{
+   return raw_id / LIBAGX_TES_PATCH_ID_STRIDE;
+}
+
+uint
+libagx_tes_patch_id(constant struct agx_tess_params *p, uint raw_id)
+{
+   return libagx_tes_unrolled_patch_id(raw_id) % p->patches_per_instance;
+}
+
+static uint
+tes_vertex_id_in_patch(uint raw_id)
+{
+   return raw_id % LIBAGX_TES_PATCH_ID_STRIDE;
+}
+
+float2
+libagx_load_tess_coord(constant struct agx_tess_params *p, uint raw_id)
+{
+   uint patch = libagx_tes_unrolled_patch_id(raw_id);
+   uint vtx = tes_vertex_id_in_patch(raw_id);
+
+   return p->patch_coord_buffer[p->patch_coord_offs[patch] + vtx];
+}
+
+uintptr_t
+libagx_tes_in_address(constant struct agx_tess_params *p, uint raw_id,
+                      uint vtx_id, gl_varying_slot location)
+{
+   uint patch = libagx_tes_unrolled_patch_id(raw_id);
+
+   return libagx_tcs_out_address(p, patch, vtx_id, location,
+                                 p->tcs_patch_constants, p->output_patch_size,
+                                 p->tcs_per_vertex_outputs);
+}
+
+float4
+libagx_tess_level_outer_default(constant struct agx_tess_params *p)
+{
+   return (
+      float4)(p->tess_level_outer_default[0], p->tess_level_outer_default[1],
+              p->tess_level_outer_default[2], p->tess_level_outer_default[3]);
+}
+
+float2
+libagx_tess_level_inner_default(constant struct agx_tess_params *p)
+{
+   return (float2)(p->tess_level_inner_default[0],
+                   p->tess_level_inner_default[1]);
+}
diff --git a/src/asahi/lib/shaders/tessellator.cl b/src/asahi/lib/shaders/tessellator.cl
new file mode 100644
index 00000000000..0a1fe63e66a
--- /dev/null
+++ b/src/asahi/lib/shaders/tessellator.cl
@@ -0,0 +1,8 @@
+/*
+ * Copyright 2023 Alyssa Rosenzweig
+ * Copyright (c) Microsoft Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "geometry.h"
+
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index 2955e88882b..4a676f06145 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -1899,6 +1899,9 @@ system_value("input_assembly_buffer_agx", 1, bit_sizes=[64])
 # Address of the parameter buffer for AGX geometry shaders
 system_value("geometry_param_buffer_agx", 1, bit_sizes=[64])
 
+# Address of the parameter buffer for AGX tessellation shaders
+system_value("tess_param_buffer_agx", 1, bit_sizes=[64])
+
 # Loads the vertex index within the current decomposed primitive. For a
 # triangle, this will be in [0, 2], where 2 is the last vertex. This is defined
 # only when the vertex shader is reinvoked for the same vertex in each
diff --git a/src/gallium/drivers/asahi/agx_blit.c b/src/gallium/drivers/asahi/agx_blit.c
index 7f6cccf171f..ca1e9d24af8 100644
--- a/src/gallium/drivers/asahi/agx_blit.c
+++ b/src/gallium/drivers/asahi/agx_blit.c
@@ -323,6 +323,10 @@ agx_blitter_save(struct agx_context *ctx, struct blitter_context *blitter,
    util_blitter_save_vertex_elements(blitter, ctx->attributes);
    util_blitter_save_vertex_shader(blitter,
                                    ctx->stage[PIPE_SHADER_VERTEX].shader);
+   util_blitter_save_tessctrl_shader(blitter,
+                                     ctx->stage[PIPE_SHADER_TESS_CTRL].shader);
+   util_blitter_save_tesseval_shader(blitter,
+                                     ctx->stage[PIPE_SHADER_TESS_EVAL].shader);
    util_blitter_save_geometry_shader(blitter,
                                      ctx->stage[PIPE_SHADER_GEOMETRY].shader);
    util_blitter_save_rasterizer(blitter, ctx->rast);
diff --git a/src/gallium/drivers/asahi/agx_disk_cache.c b/src/gallium/drivers/asahi/agx_disk_cache.c
index e33a819098c..dc578a21754 100644
--- a/src/gallium/drivers/asahi/agx_disk_cache.c
+++ b/src/gallium/drivers/asahi/agx_disk_cache.c
@@ -37,6 +37,8 @@ agx_disk_cache_compute_key(struct disk_cache *cache,
       key_size = sizeof(shader_key->vs);
    else if (uncompiled->type == PIPE_SHADER_GEOMETRY)
       key_size = sizeof(shader_key->gs);
+   else if (uncompiled->type == PIPE_SHADER_TESS_CTRL)
+      key_size = sizeof(shader_key->tcs);
    else if (uncompiled->type == PIPE_SHADER_FRAGMENT)
       key_size = sizeof(shader_key->fs);
    else if (uncompiled->type == PIPE_SHADER_COMPUTE)
@@ -68,8 +70,9 @@ agx_disk_cache_store(struct disk_cache *cache,
    if (!cache)
       return;
 
-   /* TODO: Support caching GS */
-   if (uncompiled->type == PIPE_SHADER_GEOMETRY)
+   /* TODO: Support caching GS/TCS */
+   if (uncompiled->type == PIPE_SHADER_GEOMETRY ||
+       uncompiled->type == PIPE_SHADER_TESS_CTRL)
       return;
 
    assert(binary->bo->ptr.cpu != NULL && "shaders must be CPU mapped");
diff --git a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
index 0fcaf870ddb..53800dc6986 100644
--- a/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
+++ b/src/gallium/drivers/asahi/agx_nir_lower_sysvals.c
@@ -11,6 +11,7 @@
 #include "nir_builder_opcodes.h"
 #include "nir_intrinsics.h"
 #include "nir_intrinsics_indices.h"
+#include "shader_enums.h"
 
 #define AGX_TEXTURE_DESC_STRIDE 24
 
@@ -89,8 +90,12 @@ load_sysval_indirect(nir_builder *b, unsigned dim, unsigned bitsize,
 static unsigned
 stage_table(nir_builder *b)
 {
-   assert(b->shader->info.stage < PIPE_SHADER_TYPES);
-   return AGX_SYSVAL_STAGE(b->shader->info.stage);
+   gl_shader_stage stage = b->shader->info.stage;
+   if (stage == MESA_SHADER_VERTEX && b->shader->info.vs.tes_agx)
+      stage = MESA_SHADER_TESS_EVAL;
+
+   assert(stage < PIPE_SHADER_TYPES);
+   return AGX_SYSVAL_STAGE(stage);
 }
 
 static nir_def *
@@ -161,6 +166,8 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
       return load_sysval_root(b, 1, 64, &u->input_assembly);
    case nir_intrinsic_load_geometry_param_buffer_agx:
       return load_sysval_root(b, 1, 64, &u->geometry_params);
+   case nir_intrinsic_load_tess_param_buffer_agx:
+      return load_sysval_root(b, 1, 64, &u->tess_params);
    case nir_intrinsic_load_fixed_point_size_agx:
       return load_sysval_root(b, 1, 32, &u->fixed_point_size);
    case nir_intrinsic_load_tex_sprite_mask_agx:
diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c
index 989927e6ccb..9a1902a6e90 100644
--- a/src/gallium/drivers/asahi/agx_pipe.c
+++ b/src/gallium/drivers/asahi/agx_pipe.c
@@ -46,6 +46,7 @@
 #include "agx_public.h"
 #include "agx_state.h"
 #include "agx_tilebuffer.h"
+#include "shader_enums.h"
 
 /* Fake values, pending UAPI upstreaming */
 #ifndef DRM_FORMAT_MOD_APPLE_TWIDDLED
@@ -1533,6 +1534,7 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_FS_FINE_DERIVATIVE:
    case PIPE_CAP_CULL_DISTANCE_NOCOMBINE:
    case PIPE_CAP_NIR_COMPACT_ARRAYS:
+   case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:
       return 1;
 
    case PIPE_CAP_CLIP_HALFZ:
@@ -1699,7 +1701,10 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
    case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:
       return 4;
 
+   case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:
+      return 32;
    case PIPE_CAP_MAX_VARYINGS:
+      /* TODO: Probably should bump to 32? */
       return 16;
 
    case PIPE_CAP_FLATSHADE:
@@ -1726,7 +1731,8 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
              BITFIELD_BIT(MESA_PRIM_LINES_ADJACENCY) |
              BITFIELD_BIT(MESA_PRIM_LINE_STRIP_ADJACENCY) |
              BITFIELD_BIT(MESA_PRIM_TRIANGLES_ADJACENCY) |
-             BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY);
+             BITFIELD_BIT(MESA_PRIM_TRIANGLE_STRIP_ADJACENCY) |
+             BITFIELD_BIT(MESA_PRIM_PATCHES);
 
    case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE:
       return 1;
@@ -1790,6 +1796,8 @@ agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
    case PIPE_SHADER_COMPUTE:
    case PIPE_SHADER_GEOMETRY:
       break;
+   case PIPE_SHADER_TESS_CTRL:
+   case PIPE_SHADER_TESS_EVAL:
    default:
       return false;
    }
@@ -1836,7 +1844,11 @@ agx_get_shader_param(struct pipe_screen *pscreen, enum pipe_shader_type shader,
       return 1;
 
    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+      return shader == PIPE_SHADER_TESS_CTRL || shader == PIPE_SHADER_TESS_EVAL;
+
    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+      return shader == PIPE_SHADER_TESS_CTRL;
+
    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
    case PIPE_SHADER_CAP_SUBROUTINES:
    case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c
index 95003d44b33..0234010ff22 100644
--- a/src/gallium/drivers/asahi/agx_state.c
+++ b/src/gallium/drivers/asahi/agx_state.c
@@ -31,6 +31,8 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
+#include "tessellator/p_tessellator.h"
+#include "util/bitscan.h"
 #include "util/bitset.h"
 #include "util/blend.h"
 #include "util/blob.h"
@@ -47,6 +49,7 @@
 #include "util/u_resource.h"
 #include "util/u_transfer.h"
 #include "util/u_upload_mgr.h"
+#include "agx_bo.h"
 #include "agx_device.h"
 #include "agx_disk_cache.h"
 #include "agx_nir_lower_gs.h"
@@ -185,6 +188,24 @@ agx_set_blend_color(struct pipe_context *pctx,
    ctx->dirty |= AGX_DIRTY_BLEND_COLOR;
 }
 
+static void
+agx_set_patch_vertices(struct pipe_context *pctx, unsigned char n)
+{
+   struct agx_context *ctx = agx_context(pctx);
+   ctx->patch_vertices = n;
+}
+
+static void
+agx_set_tess_state(struct pipe_context *pctx,
+                   const float default_outer_level[4],
+                   const float default_inner_level[2])
+{
+   struct agx_context *ctx = agx_context(pctx);
+
+   memcpy(ctx->default_outer_level, default_outer_level, 4 * sizeof(float));
+   memcpy(ctx->default_inner_level, default_inner_level, 2 * sizeof(float));
+}
+
 static void *
 agx_create_blend_state(struct pipe_context *ctx,
                        const struct pipe_blend_state *state)
@@ -586,6 +607,7 @@ static enum pipe_shader_type
 merged_stage(struct agx_context *ctx, enum pipe_shader_type stage)
 {
    switch (stage) {
+   case MESA_SHADER_VERTEX:
    case MESA_SHADER_GEOMETRY:
       return ctx->stage[PIPE_SHADER_TESS_EVAL].shader ? MESA_SHADER_TESS_EVAL
                                                       : MESA_SHADER_VERTEX;
@@ -1504,6 +1526,18 @@ asahi_fs_shader_key_equal(const void *a, const void *b)
    return memcmp(a, b, sizeof(struct asahi_fs_shader_key)) == 0;
 }
 
+static uint32_t
+asahi_tcs_shader_key_hash(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct asahi_tcs_shader_key));
+}
+
+static bool
+asahi_tcs_shader_key_equal(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct asahi_tcs_shader_key)) == 0;
+}
+
 /* No compute variants */
 static uint32_t
 asahi_cs_shader_key_hash(const void *key)
@@ -1837,6 +1871,22 @@ agx_compile_variant(struct agx_device *dev, struct pipe_context *pctx,
          NIR_PASS(_, nir, nir_shader_intrinsics_pass, agx_nir_lower_clip_m1_1,
                   nir_metadata_block_index | nir_metadata_dominance, NULL);
       }
+   } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
+      struct asahi_tcs_shader_key *key = &key_->tcs;
+
+      /* TODO: Deduplicate this logic from the GS case! */
+      struct blob_reader vs_reader;
+      blob_reader_init(&vs_reader, linked_so->serialized_nir.data,
+                       linked_so->serialized_nir.size);
+      nir_shader *vs = nir_deserialize(NULL, &agx_nir_options, &vs_reader);
+
+      /* Apply the VS key to the VS before linking it in */
+      NIR_PASS_V(vs, agx_nir_lower_vbo, key->attribs);
+      NIR_PASS_V(vs, nir_lower_io_to_scalar, nir_var_shader_out, NULL, NULL);
+      NIR_PASS_V(vs, agx_nir_lower_sysvals, false);
+
+      NIR_PASS_V(nir, agx_nir_lower_tcs, vs, dev->libagx, key->index_size_B);
+      ralloc_free(vs);
    } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
       struct asahi_gs_shader_key *key = &key_->gs;
 
@@ -2040,6 +2090,8 @@ agx_get_shader_variant(struct agx_screen *screen, struct pipe_context *pctx,
       memcpy(cloned_key, key, sizeof(struct asahi_vs_shader_key));
    } else if (so->type == PIPE_SHADER_GEOMETRY) {
       memcpy(cloned_key, key, sizeof(struct asahi_gs_shader_key));
+   } else if (so->type == PIPE_SHADER_TESS_CTRL) {
+      memcpy(cloned_key, key, sizeof(struct asahi_tcs_shader_key));
    } else {
       assert(gl_shader_stage_is_compute(so->type));
       /* No key */
@@ -2057,8 +2109,6 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so,
    if (nir->info.stage == MESA_SHADER_KERNEL)
       nir->info.stage = MESA_SHADER_COMPUTE;
 
-   so->type = pipe_shader_type_from_mesa(nir->info.stage);
-
    blob_init(&so->early_serialized_nir);
    nir_serialize(&so->early_serialized_nir, nir, true);
 
@@ -2103,11 +2153,16 @@ agx_shader_initialize(struct agx_device *dev, struct agx_uncompiled_shader *so,
                nir_metadata_block_index | nir_metadata_dominance, NULL);
    }
 
+   if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+      NIR_PASS(_, nir, agx_nir_lower_tes, dev->libagx);
+   }
+
    blob_init(&so->serialized_nir);
    nir_serialize(&so->serialized_nir, nir, true);
    _mesa_sha1_compute(so->serialized_nir.data, so->serialized_nir.size,
                       so->nir_sha1);
 
+   so->type = pipe_shader_type_from_mesa(nir->info.stage);
    so->has_xfb_info = (nir->xfb_info != NULL);
 
    static_assert(
@@ -2147,11 +2202,34 @@ agx_create_shader_state(struct pipe_context *pctx,
    } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
       so->variants = _mesa_hash_table_create(NULL, asahi_gs_shader_key_hash,
                                              asahi_gs_shader_key_equal);
+
+   } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+      /* No variants */
+      so->variants = _mesa_hash_table_create(NULL, asahi_cs_shader_key_hash,
+                                             asahi_cs_shader_key_equal);
+   } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
+      so->variants = _mesa_hash_table_create(NULL, asahi_tcs_shader_key_hash,
+                                             asahi_tcs_shader_key_equal);
    } else {
       so->variants = _mesa_hash_table_create(so, asahi_fs_shader_key_hash,
                                              asahi_fs_shader_key_equal);
    }
 
+   if (nir->info.stage == MESA_SHADER_TESS_EVAL ||
+       nir->info.stage == MESA_SHADER_TESS_CTRL) {
+
+      so->tess.ccw = nir->info.tess.ccw;
+      so->tess.point_mode = nir->info.tess.point_mode;
+      so->tess.spacing = nir->info.tess.spacing;
+      so->tess.output_patch_size = nir->info.tess.tcs_vertices_out;
+      so->tess.primitive = nir->info.tess._primitive_mode;
+      so->tess.per_vertex_outputs = agx_tcs_per_vertex_outputs(nir);
+      so->tess.nr_patch_outputs =
+         util_last_bit(nir->info.patch_outputs_written);
+      if (nir->info.stage == MESA_SHADER_TESS_CTRL)
+         so->tess.output_stride = agx_tcs_output_stride(nir);
+   }
+
    agx_shader_initialize(dev, so, nir, ctx->support_lod_bias);
 
    /* We're done with the NIR, throw it away */
@@ -2178,7 +2256,9 @@ agx_create_shader_state(struct pipe_context *pctx,
       }
 
       case PIPE_SHADER_GEOMETRY:
-         /* TODO: Geometry shaders with shader-db */
+      case PIPE_SHADER_TESS_CTRL:
+      case PIPE_SHADER_TESS_EVAL:
+         /* TODO: Geometry/tessellation shaders with shader-db */
          return so;
 
       case PIPE_SHADER_FRAGMENT:
@@ -2276,7 +2356,7 @@ agx_update_shader(struct agx_context *ctx, struct agx_compiled_shader **out,
    }
 
    struct agx_uncompiled_shader *linked_so = NULL;
-   if (stage == PIPE_SHADER_GEOMETRY)
+   if (stage == PIPE_SHADER_TESS_CTRL || stage == PIPE_SHADER_GEOMETRY)
       linked_so = ctx->stage[PIPE_SHADER_VERTEX].shader;
 
    struct agx_screen *screen = agx_screen(ctx->base.screen);
@@ -2351,6 +2431,30 @@ translate_ia_mode(enum mesa_prim prim)
    }
 }
 
+static bool
+agx_update_tcs(struct agx_context *ctx, const struct pipe_draw_info *info)
+{
+   assert(info->mode == MESA_PRIM_PATCHES);
+
+   /* We don't bother to dirty track yet, update! */
+   struct asahi_tcs_shader_key key = {
+      .index_size_B = info->index_size,
+   };
+
+   memcpy(key.attribs, ctx->attributes,
+          sizeof(key.attribs[0]) * AGX_MAX_ATTRIBS);
+
+   static_assert(sizeof(key.input_nir_sha1) ==
+                    sizeof(ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1),
+                 "common size for shader sha-1");
+
+   memcpy(key.input_nir_sha1, ctx->stage[PIPE_SHADER_VERTEX].shader->nir_sha1,
+          sizeof(key.input_nir_sha1));
+
+   return agx_update_shader(ctx, &ctx->tcs, PIPE_SHADER_TESS_CTRL,
+                            (union asahi_shader_key *)&key);
+}
+
 /*
  * Triangle strips and fans are rotated based on the provoking vertex, but other
  * primitive types are not and do not need to know the provoking vertex.
@@ -2487,6 +2591,18 @@ agx_bind_gs_state(struct pipe_context *pctx, void *cso)
    agx_bind_shader_state(pctx, cso, PIPE_SHADER_GEOMETRY);
 }
 
+static void
+agx_bind_tcs_state(struct pipe_context *pctx, void *cso)
+{
+   agx_bind_shader_state(pctx, cso, PIPE_SHADER_TESS_CTRL);
+}
+
+static void
+agx_bind_tes_state(struct pipe_context *pctx, void *cso)
+{
+   agx_bind_shader_state(pctx, cso, PIPE_SHADER_TESS_EVAL);
+}
+
 static void
 agx_bind_cs_state(struct pipe_context *pctx, void *cso)
 {
@@ -2850,7 +2966,7 @@ agx_build_pipeline(struct agx_batch *batch, struct agx_compiled_shader *cs,
 
    if (stage == PIPE_SHADER_FRAGMENT) {
       agx_usc_tilebuffer(&b, &batch->tilebuffer_layout);
-   } else if (stage == PIPE_SHADER_COMPUTE) {
+   } else if (stage == PIPE_SHADER_COMPUTE || stage == PIPE_SHADER_TESS_CTRL) {
       unsigned size = cs->info.local_size + variable_shared_mem;
 
       agx_usc_pack(&b, SHARED, cfg) {
@@ -4185,6 +4301,291 @@ util_draw_multi_upload_indirect(struct pipe_context *pctx,
    pctx->draw_vbo(pctx, info, 0, &indirect_, draws, 1);
 }
 
+static void
+agx_upload_draw_params(struct agx_batch *batch,
+                       const struct pipe_draw_indirect_info *indirect,
+                       const struct pipe_draw_start_count_bias *draws,
+                       const struct pipe_draw_info *info)
+{
+   if (indirect) {
+      struct agx_resource *indirect_rsrc = agx_resource(indirect->buffer);
+      uint64_t address = indirect_rsrc->bo->ptr.gpu + indirect->offset;
+      agx_batch_reads(batch, indirect_rsrc);
+
+      /* To implement draw parameters, we use the last 2 words of the
+       * indirect draw descriptor. Offset by 3 words for indexed draw (5
+       * total) and 2 words for non-indexed (4 total).  See the layouts of
+       * indexed vs non-indexed draw descriptors.
+       *
+       * This gives us a consistent layout
+       *
+       *    uint32_t first_vertex;
+       *    uint32_t base_instance;
+       *
+       * and we can implement load_first_vertex & load_base_instance without
+       * checking for indexing.
+       */
+      uint32_t offset = info->index_size ? 3 : 2;
+      batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] = address + offset * 4;
+   } else {
+      /* Upload just those two words. */
+      uint32_t params[2] = {
+         info->index_size ? draws->index_bias : draws->start,
+         info->start_instance,
+      };
+
+      batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] =
+         agx_pool_upload_aligned(&batch->pool, params, sizeof(params), 4);
+   }
+}
+
+static void
+agx_draw_patches(struct agx_context *ctx, const struct pipe_draw_info *info,
+                 unsigned drawid_offset,
+                 const struct pipe_draw_indirect_info *indirect,
+                 const struct pipe_draw_start_count_bias *draws,
+                 unsigned num_draws)
+{
+   struct agx_device *dev = agx_device(ctx->base.screen);
+   perf_debug(dev, "Tessellation");
+
+   struct agx_uncompiled_shader *tcs = ctx->stage[MESA_SHADER_TESS_CTRL].shader;
+   struct agx_uncompiled_shader *tes = ctx->stage[MESA_SHADER_TESS_EVAL].shader;
+
+   assert(tes != NULL && "required with patches");
+
+   unsigned patch_vertices = ctx->patch_vertices;
+
+   /* OpenGL allows omitting the tcs, fill in a passthrough program if needed.
+    * In principle, we could optimize this case, but I don't think it matters.
+    */
+   bool unbind_tcs_when_done = false;
+   if (!tcs) {
+      struct agx_uncompiled_shader *vs = ctx->stage[MESA_SHADER_VERTEX].shader;
+
+      assert(patch_vertices >= 1 &&
+             patch_vertices <= ARRAY_SIZE(vs->passthrough_tcs));
+
+      if (!vs->passthrough_tcs[patch_vertices - 1]) {
+         struct blob_reader reader;
+         blob_reader_init(&reader, vs->early_serialized_nir.data,
+                          vs->early_serialized_nir.size);
+         nir_shader *vs_nir = nir_deserialize(NULL, &agx_nir_options, &reader);
+         nir_shader *nir = nir_create_passthrough_tcs(&agx_nir_options, vs_nir,
+                                                      patch_vertices);
+         ralloc_free(vs_nir);
+
+         /* Lower the tess level sysvals and gather info, since mesa/st won't do
+          * either for us.
+          */
+         NIR_PASS(_, nir, nir_lower_system_values);
+
+         nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+         vs->passthrough_tcs[patch_vertices - 1] =
+            pipe_shader_from_nir(&ctx->base, nir);
+      }
+
+      tcs = vs->passthrough_tcs[patch_vertices - 1];
+      ctx->base.bind_tcs_state(&ctx->base, tcs);
+      unbind_tcs_when_done = true;
+   }
+
+   unsigned in_vertices = draws->count;
+   unsigned in_patches = in_vertices / patch_vertices;
+
+   if (in_patches == 0)
+      return;
+
+   struct agx_batch *batch = agx_get_compute_batch(ctx);
+   agx_batch_init_state(batch);
+
+   struct pipe_resource *heap =
+      pipe_buffer_create(ctx->base.screen, PIPE_BIND_GLOBAL, PIPE_USAGE_DEFAULT,
+                         1024 * 1024 * 128);
+
+   uint64_t heap_gpu = agx_resource(heap)->bo->ptr.gpu;
+   uint8_t *heap_cpu = agx_resource(heap)->bo->ptr.cpu;
+
+   unsigned unrolled_patch_count = in_patches * info->instance_count;
+
+   uint32_t heap_water = 0;
+   uint32_t tcs_out_offs = heap_water;
+   heap_water += ALIGN(unrolled_patch_count * tcs->tess.output_stride, 4);
+
+   agx_batch_writes(batch, agx_resource(heap), 0);
+
+   uint64_t ib = 0;
+   size_t ib_extent = 0;
+
+   if (info->index_size)
+      ib = agx_index_buffer_ptr(batch, info, draws, &ib_extent);
+
+   agx_upload_ia_params(batch, info, indirect, ib, ib_extent, 0);
+   agx_upload_draw_params(batch, indirect, draws, info);
+
+   /* Setup parameters */
+   struct agx_tess_params tess_params = {
+      .tcs_buffer = heap_gpu + tcs_out_offs,
+      .input_patch_size = patch_vertices,
+      .output_patch_size = tcs->tess.output_patch_size,
+      .tcs_patch_constants = tcs->tess.nr_patch_outputs,
+      .tcs_per_vertex_outputs = tcs->tess.per_vertex_outputs,
+      .patch_coord_buffer = heap_gpu,
+      .patches_per_instance = in_patches,
+   };
+
+   memcpy(&tess_params.tess_level_outer_default, ctx->default_outer_level,
+          sizeof(ctx->default_outer_level));
+   memcpy(&tess_params.tess_level_inner_default, ctx->default_inner_level,
+          sizeof(ctx->default_inner_level));
+
+   batch->uniforms.tess_params =
+      agx_pool_upload(&batch->pool, &tess_params, sizeof(tess_params));
+
+   /* Run VS+TCS as compute */
+   agx_upload_vbos(batch);
+   agx_update_vs(ctx);
+   agx_update_tcs(ctx, info);
+   /* XXX */
+   ctx->stage[PIPE_SHADER_TESS_CTRL].dirty = ~0;
+   ctx->stage[PIPE_SHADER_TESS_EVAL].dirty = ~0;
+   agx_update_descriptors(batch, ctx->vs, PIPE_SHADER_VERTEX);
+   agx_update_descriptors(batch, ctx->tcs, PIPE_SHADER_TESS_CTRL);
+
+   struct pipe_grid_info tcs_grid = {
+      .block = {MAX2(patch_vertices, tcs->tess.output_patch_size), 1, 1},
+      .grid = {in_patches, info->instance_count, 1},
+      /* XXX */
+      .variable_shared_mem = 32768,
+   };
+
+   agx_launch(batch, &tcs_grid, ctx->tcs, PIPE_SHADER_TESS_CTRL);
+
+   agx_flush_all(ctx, "HACK");
+   agx_sync_all(ctx, "HACK");
+
+   /* Setup batch */
+   batch = agx_get_batch(ctx);
+
+   enum tess_primitive_mode mode =
+      MAX2(tcs->tess.primitive, tes->tess.primitive);
+   enum gl_tess_spacing spacing = MAX2(tcs->tess.spacing, tes->tess.spacing);
+
+   enum pipe_tess_spacing pspacing = spacing == TESS_SPACING_EQUAL
+                                        ? PIPE_TESS_SPACING_EQUAL
+                                     : spacing == TESS_SPACING_FRACTIONAL_ODD
+                                        ? PIPE_TESS_SPACING_FRACTIONAL_ODD
+                                        : PIPE_TESS_SPACING_FRACTIONAL_EVEN;
+
+   bool point_mode = MAX2(tcs->tess.point_mode, tes->tess.point_mode);
+   enum mesa_prim in_prim = mode == TESS_PRIMITIVE_ISOLINES ? MESA_PRIM_LINES
+                            : mode == TESS_PRIMITIVE_QUADS
+                               ? MESA_PRIM_QUADS
+                               : MESA_PRIM_TRIANGLES;
+   enum mesa_prim out_prim = point_mode ? MESA_PRIM_POINTS
+                             : mode == TESS_PRIMITIVE_ISOLINES
+                                ? MESA_PRIM_LINES
+                                : MESA_PRIM_TRIANGLES;
+
+   struct pipe_tessellator *tess =
+      p_tess_init(in_prim, pspacing, tes->tess.ccw, point_mode);
+
+   struct pipe_tessellator_data data = {0};
+
+   /* Mem allocate */
+   uint32_t patch_coord_offs_offs = heap_water;
+   tess_params.patch_coord_offs = heap_gpu + heap_water;
+   heap_water += align(4 * unrolled_patch_count, 4);
+
+   uint32_t draws_off = heap_water;
+   uint32_t *patch_draws = (uint32_t *)(heap_cpu + heap_water);
+   heap_water += align(sizeof(uint32_t) * 5 * unrolled_patch_count, 4);
+
+   uint32_t *patch_offs = (uint32_t *)(heap_cpu + patch_coord_offs_offs);
+
+   for (unsigned patch = 0; patch < unrolled_patch_count; ++patch) {
+      float *addr =
+         (float *)(heap_cpu + tcs_out_offs + tcs->tess.output_stride * patch);
+
+      struct pipe_tessellation_factors factors = {
+         .outer_tf = {addr[0], addr[1], addr[2], addr[3]},
+         .inner_tf = {addr[4], addr[5]},
+      };
+      p_tessellate(tess, &factors, &data);
+
+      /* Mem allocate indices */
+      uint32_t index_off = heap_water;
+      uint16_t *indices = (uint16_t *)(heap_cpu + heap_water);
+      heap_water += align(sizeof(*indices) * data.num_indices, 4);
+
+      for (unsigned idx = 0; idx < data.num_indices; ++idx) {
+         indices[idx] = data.indices[idx];
+      }
+
+      /* Mem allocate patch coords */
+      heap_water = align(heap_water, 8);
+      patch_offs[patch] = heap_water / 8;
+      float *patch_coords = (float *)(heap_cpu + heap_water);
+      heap_water += align(8 * data.num_domain_points, 4);
+
+      for (unsigned p = 0; p < data.num_domain_points; ++p) {
+         patch_coords[2 * p + 0] = data.domain_points_u[p];
+         patch_coords[2 * p + 1] = data.domain_points_v[p];
+      }
+      assert(data.num_indices < 32768);
+      assert(data.num_domain_points < 8192);
+
+      /* Generate a draw for the patch */
+      uint32_t *desc = patch_draws + (patch * 5);
+
+      desc[0] = data.num_indices;                   /* count */
+      desc[1] = 1;                                  /* instance_count */
+      desc[2] = index_off / sizeof(*indices);       /* start */
+      desc[3] = patch * LIBAGX_TES_PATCH_ID_STRIDE; /* index_bias */
+      desc[4] = 0;                                  /* start_instance */
+   }
+   p_tess_destroy(tess);
+
+   /* Run TES as VS */
+   agx_batch_init_state(batch);
+   void *vs_cso = ctx->stage[PIPE_SHADER_VERTEX].shader;
+   ctx->base.bind_vs_state(&ctx->base,
+                           ctx->stage[PIPE_SHADER_TESS_EVAL].shader);
+   agx_update_vs(ctx);
+   agx_update_descriptors(batch, ctx->vs, PIPE_SHADER_TESS_EVAL);
+
+   struct pipe_draw_info draw_info = {
+      .mode = out_prim,
+      .index_size = 2,
+      .index.resource = heap,
+      .instance_count = 1,
+      .view_mask = info->view_mask,
+   };
+
+   /* Wrap the pool allocation in a fake resource for meta-Gallium use */
+   struct pipe_draw_indirect_info copy_indirect = {
+      .buffer = heap,
+      .offset = draws_off,
+      .stride = 5 * sizeof(uint32_t),
+      .draw_count = in_patches * info->instance_count,
+   };
+
+   batch->uniforms.tess_params =
+      agx_pool_upload(&batch->pool, &tess_params, sizeof(tess_params));
+
+   ctx->base.draw_vbo(&ctx->base, &draw_info, 0, &copy_indirect, NULL, 1);
+
+   /* Restore vertex state */
+   ctx->base.bind_vs_state(&ctx->base, vs_cso);
+
+   pipe_resource_reference(&heap, NULL);
+
+   if (unbind_tcs_when_done) {
+      ctx->base.bind_tcs_state(&ctx->base, NULL);
+   }
+}
+
 static void
 agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
              unsigned drawid_offset,
@@ -4205,7 +4606,7 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
       assert(drawid_offset == 0);
       assert(num_draws == 1);
 
-      util_draw_multi_upload_indirect(pctx, info, indirect, draws);
+      util_draw_multi_unroll_indirect(pctx, info, indirect, draws);
       return;
    }
 
@@ -4214,6 +4615,17 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
       return;
    }
 
+   /* TODO: stop cheating */
+   if (info->mode == MESA_PRIM_PATCHES && indirect) {
+      perf_debug_ctx(ctx, "indirect tessellation");
+      util_draw_indirect(pctx, info, indirect);
+   }
+
+   if (info->mode == MESA_PRIM_PATCHES) {
+      agx_draw_patches(ctx, info, drawid_offset, indirect, draws, num_draws);
+      return;
+   }
+
    if (agx_needs_passthrough_gs(ctx, info, indirect)) {
       agx_apply_passthrough_gs(ctx, info, drawid_offset, indirect, draws,
                                num_draws);
@@ -4330,39 +4742,9 @@ agx_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
    enum mesa_prim mode = info->mode;
 
    if (ctx->vs->info.uses_base_param || ctx->gs) {
+      agx_upload_draw_params(batch, indirect, draws, info);
+
       batch->uniforms.is_indexed_draw = (idx_size > 0);
-
-      if (indirect) {
-         struct agx_resource *indirect_rsrc = agx_resource(indirect->buffer);
-         uint64_t address = indirect_rsrc->bo->ptr.gpu + indirect->offset;
-         agx_batch_reads(batch, indirect_rsrc);
-
-         /* To implement draw parameters, we use the last 2 words of the
-          * indirect draw descriptor. Offset by 3 words for indexed draw (5
-          * total) and 2 words for non-indexed (4 total).  See the layouts of
-          * indexed vs non-indexed draw descriptors.
-          *
-          * This gives us a consistent layout
-          *
-          *    uint32_t first_vertex;
-          *    uint32_t base_instance;
-          *
-          * and we can implement load_first_vertex & load_base_instance without
-          * checking for indexing.
-          */
-         uint32_t offset = idx_size ? 3 : 2;
-         batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] = address + offset * 4;
-      } else {
-         /* Upload just those two words. */
-         uint32_t params[2] = {
-            idx_size ? draws->index_bias : draws->start,
-            info->start_instance,
-         };
-
-         batch->uniforms.tables[AGX_SYSVAL_TABLE_PARAMS] =
-            agx_pool_upload_aligned(&batch->pool, params, sizeof(params), 4);
-      }
-
       ctx->dirty |= AGX_DIRTY_VS;
    }
 
@@ -4774,6 +5156,8 @@ agx_init_state_functions(struct pipe_context *ctx)
    ctx->create_vertex_elements_state = agx_create_vertex_elements;
    ctx->create_vs_state = agx_create_shader_state;
    ctx->create_gs_state = agx_create_shader_state;
+   ctx->create_tcs_state = agx_create_shader_state;
+   ctx->create_tes_state = agx_create_shader_state;
    ctx->create_compute_state = agx_create_compute_state;
    ctx->bind_blend_state = agx_bind_blend_state;
    ctx->bind_depth_stencil_alpha_state = agx_bind_zsa_state;
@@ -4783,6 +5167,8 @@ agx_init_state_functions(struct pipe_context *ctx)
    ctx->bind_vertex_elements_state = agx_bind_vertex_elements_state;
    ctx->bind_vs_state = agx_bind_vs_state;
    ctx->bind_gs_state = agx_bind_gs_state;
+   ctx->bind_tcs_state = agx_bind_tcs_state;
+   ctx->bind_tes_state = agx_bind_tes_state;
    ctx->bind_compute_state = agx_bind_cs_state;
    ctx->delete_blend_state = agx_delete_state;
    ctx->delete_depth_stencil_alpha_state = agx_delete_state;
@@ -4793,6 +5179,8 @@ agx_init_state_functions(struct pipe_context *ctx)
    ctx->delete_vertex_elements_state = agx_delete_state;
    ctx->delete_vs_state = agx_delete_shader_state;
    ctx->delete_gs_state = agx_delete_shader_state;
+   ctx->delete_tcs_state = agx_delete_shader_state;
+   ctx->delete_tes_state = agx_delete_shader_state;
    ctx->set_blend_color = agx_set_blend_color;
    ctx->set_clip_state = agx_set_clip_state;
    ctx->set_constant_buffer = agx_set_constant_buffer;
@@ -4801,6 +5189,7 @@ agx_init_state_functions(struct pipe_context *ctx)
    ctx->set_sampler_views = agx_set_sampler_views;
    ctx->set_framebuffer_state = agx_set_framebuffer_state;
    ctx->set_polygon_stipple = agx_set_polygon_stipple;
+   ctx->set_patch_vertices = agx_set_patch_vertices;
    ctx->set_sample_mask = agx_set_sample_mask;
    ctx->set_scissor_states = agx_set_scissor_states;
    ctx->set_stencil_ref = agx_set_stencil_ref;
@@ -4813,4 +5202,5 @@ agx_init_state_functions(struct pipe_context *ctx)
    ctx->set_global_binding = agx_set_global_binding;
    ctx->texture_barrier = agx_texture_barrier;
    ctx->get_compute_state_info = agx_get_compute_state_info;
+   ctx->set_tess_state = agx_set_tess_state;
 }
diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h
index 6f2ded18bb2..e6a229bf836 100644
--- a/src/gallium/drivers/asahi/agx_state.h
+++ b/src/gallium/drivers/asahi/agx_state.h
@@ -106,6 +106,9 @@ struct PACKED agx_draw_uniforms {
    /* Address of input assembly buffer if geom/tess is used, else 0 */
    uint64_t input_assembly;
 
+   /* Address of tessellation param buffer if tessellation is used, else 0 */
+   uint64_t tess_params;
+
    /* Address of geometry param buffer if geometry shaders are used, else 0 */
    uint64_t geometry_params;
 
@@ -213,6 +216,7 @@ struct agx_uncompiled_shader {
    struct agx_uncompiled_shader_info info;
    struct hash_table *variants;
    struct agx_uncompiled_shader *passthrough_progs[MESA_PRIM_COUNT][3][2];
+   struct agx_uncompiled_shader *passthrough_tcs[32];
 
    uint32_t xfb_strides[4];
    bool has_xfb_info;
@@ -222,6 +226,18 @@ struct agx_uncompiled_shader {
 
    /* Set on VS, passed to FS for linkage */
    unsigned base_varying;
+
+   /* Tessellation info */
+   struct {
+      uint64_t per_vertex_outputs;
+      uint32_t output_stride;
+      enum gl_tess_spacing spacing;
+      enum tess_primitive_mode primitive;
+      uint8_t output_patch_size;
+      uint8_t nr_patch_outputs;
+      bool ccw;
+      bool point_mode;
+   } tess;
 };
 
 enum agx_stage_dirty {
@@ -407,6 +423,18 @@ struct asahi_fs_shader_key {
    enum pipe_format rt_formats[PIPE_MAX_COLOR_BUFS];
 };
 
+struct asahi_tcs_shader_key {
+   /* Input assembly key. Simplified because we know we're operating on patches.
+    */
+   uint8_t index_size_B;
+
+   /* Vertex shader key */
+   struct agx_attribute attribs[AGX_MAX_VBUFS];
+
+   /* Tessellation control shaders must be linked with a vertex shader. */
+   uint8_t input_nir_sha1[20];
+};
+
 struct asahi_gs_shader_key {
    /* Input assembly key */
    struct agx_ia_key ia;
@@ -426,6 +454,7 @@ struct asahi_gs_shader_key {
 
 union asahi_shader_key {
    struct asahi_vs_shader_key vs;
+   struct asahi_tcs_shader_key tcs;
    struct asahi_gs_shader_key gs;
    struct asahi_fs_shader_key fs;
 };
@@ -498,7 +527,7 @@ struct asahi_blitter {
 
 struct agx_context {
    struct pipe_context base;
-   struct agx_compiled_shader *vs, *fs, *gs;
+   struct agx_compiled_shader *vs, *fs, *gs, *tcs, *tes;
    uint32_t dirty;
 
    /* Heap for dynamic memory allocation for geometry/tessellation shaders */
@@ -527,6 +556,10 @@ struct agx_context {
    struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
    uint32_t vb_mask;
 
+   unsigned patch_vertices;
+   float default_outer_level[4];
+   float default_inner_level[2];
+
    struct agx_stage stage[PIPE_SHADER_TYPES];
    struct agx_attribute *attributes;
    struct agx_rasterizer *rast;