diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c
index 60af0fa3157..895b3979c81 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -118,6 +118,10 @@ VkResult
 anv_reloc_list_add_bo_impl(struct anv_reloc_list *list,
                            struct anv_bo *target_bo)
 {
+   /* This can happen with sparse resources. */
+   if (!target_bo)
+      return VK_SUCCESS;
+
    uint32_t idx = target_bo->gem_handle;
    VkResult result = anv_reloc_list_grow_deps(list,
                                               (idx / BITSET_WORDBITS) + 1);
@@ -1693,6 +1697,39 @@ anv_queue_submit_simple_batch(struct anv_queue *queue,
    return result;
 }
 
+VkResult
+anv_queue_submit_trtt_batch(struct anv_queue *queue,
+                            struct anv_batch *batch)
+{
+   struct anv_device *device = queue->device;
+   VkResult result = VK_SUCCESS;
+
+   uint32_t batch_size = align(batch->next - batch->start, 8);
+
+   struct anv_bo *batch_bo;
+   result = anv_bo_pool_alloc(&device->batch_bo_pool, batch_size, &batch_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   memcpy(batch_bo->map, batch->start, batch_size);
+#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
+   if (device->physical->memory.need_flush)
+      intel_flush_range(batch_bo->map, batch_size);
+#endif
+
+   if (INTEL_DEBUG(DEBUG_BATCH)) {
+      intel_print_batch(queue->decoder, batch_bo->map, batch_bo->size,
+                        batch_bo->offset, false);
+   }
+
+   result = device->kmd_backend->execute_trtt_batch(queue, batch_bo,
+                                                    batch_size);
+
+   anv_bo_pool_free(&device->batch_bo_pool, batch_bo);
+
+   return result;
+}
+
 void
 anv_cmd_buffer_clflush(struct anv_cmd_buffer **cmd_buffers,
                        uint32_t num_cmd_buffers)
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index ea0dd5cd749..85028608366 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1444,8 +1444,17 @@ anv_physical_device_try_create(struct vk_instance *vk_instance,
 
    device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
 
-   device->has_sparse = device->info.kmd_type == INTEL_KMD_TYPE_XE &&
-      debug_get_bool_option("ANV_SPARSE", true);
+   /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
+   if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
+      device->has_sparse = true;
+      device->sparse_uses_trtt =
+         debug_get_bool_option("ANV_SPARSE_USE_TRTT", false);
+   } else {
+      device->has_sparse =
+         device->info.ver >= 12 &&
+         debug_get_bool_option("ANV_SPARSE", false);
+      device->sparse_uses_trtt = true;
+   }
 
    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
       driQueryOptionb(&instance->dri_options, "always_flush_cache");
@@ -1732,6 +1741,11 @@ void anv_GetPhysicalDeviceProperties(
    const bool has_sparse_or_fake = pdevice->instance->has_fake_sparse ||
                                    pdevice->has_sparse;
 
+   uint64_t sparse_addr_space_size =
+      !has_sparse_or_fake ? 0 :
+      pdevice->sparse_uses_trtt ? pdevice->va.trtt.size :
+      1ULL << 48;
+
    VkSampleCountFlags sample_counts =
       isl_device_get_sample_counts(&pdevice->isl_dev);
 
@@ -1749,7 +1763,7 @@ void anv_GetPhysicalDeviceProperties(
       .maxMemoryAllocationCount                 = UINT32_MAX,
       .maxSamplerAllocationCount                = 64 * 1024,
       .bufferImageGranularity                   = 1,
-      .sparseAddressSpaceSize                   = has_sparse_or_fake ? (1uLL << 48) : 0,
+      .sparseAddressSpaceSize                   = sparse_addr_space_size,
       .maxBoundDescriptorSets                   = MAX_SETS,
       .maxPerStageDescriptorSamplers            = max_samplers,
       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
@@ -3083,6 +3097,33 @@ anv_device_destroy_context_or_vm(struct anv_device *device)
    }
 }
 
+static VkResult
+anv_device_init_trtt(struct anv_device *device)
+{
+   struct anv_trtt *trtt = &device->trtt;
+
+   if (pthread_mutex_init(&trtt->mutex, NULL) != 0)
+      return vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
+
+   return VK_SUCCESS;
+}
+
+static void
+anv_device_finish_trtt(struct anv_device *device)
+{
+   struct anv_trtt *trtt = &device->trtt;
+
+   pthread_mutex_destroy(&trtt->mutex);
+
+   vk_free(&device->vk.alloc, trtt->l3_mirror);
+   vk_free(&device->vk.alloc, trtt->l2_mirror);
+
+   for (int i = 0; i < trtt->num_page_table_bos; i++)
+      anv_device_release_bo(device, trtt->page_table_bos[i]);
+
+   vk_free(&device->vk.alloc, trtt->page_table_bos);
+}
+
 VkResult anv_CreateDevice(
     VkPhysicalDevice                            physicalDevice,
     const VkDeviceCreateInfo*                   pCreateInfo,
@@ -3542,16 +3583,20 @@ VkResult anv_CreateDevice(
          goto fail_trivial_batch_bo_and_scratch_pool;
    }
 
-   result = anv_genX(device->info, init_device_state)(device);
+   result = anv_device_init_trtt(device);
    if (result != VK_SUCCESS)
       goto fail_btd_fifo_bo;
 
+   result = anv_genX(device->info, init_device_state)(device);
+   if (result != VK_SUCCESS)
+      goto fail_trtt;
+
    struct vk_pipeline_cache_create_info pcc_info = { };
    device->default_pipeline_cache =
       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
    if (!device->default_pipeline_cache) {
       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
-      goto fail_btd_fifo_bo;
+      goto fail_trtt;
    }
 
    /* Internal shaders need their own pipeline cache because, unlike the rest
@@ -3654,6 +3699,8 @@ VkResult anv_CreateDevice(
    vk_pipeline_cache_destroy(device->internal_cache, NULL);
  fail_default_pipeline_cache:
    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
+ fail_trtt:
+   anv_device_finish_trtt(device);
  fail_btd_fifo_bo:
    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
       anv_device_release_bo(device, device->btd_fifo_bo);
@@ -3754,6 +3801,8 @@ void anv_DestroyDevice(
    vk_pipeline_cache_destroy(device->internal_cache, NULL);
    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
 
+   anv_device_finish_trtt(device);
+
    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
       anv_device_release_bo(device, device->btd_fifo_bo);
 
diff --git a/src/intel/vulkan/anv_gem_stubs.c b/src/intel/vulkan/anv_gem_stubs.c
index f2b8103ea81..55f7a403ee7 100644
--- a/src/intel/vulkan/anv_gem_stubs.c
+++ b/src/intel/vulkan/anv_gem_stubs.c
@@ -65,6 +65,13 @@ stub_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
    return VK_ERROR_UNKNOWN;
 }
 
+static VkResult
+stub_execute_trtt_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+                        uint32_t batch_size)
+{
+   return VK_ERROR_UNKNOWN;
+}
+
 static VkResult
 stub_queue_exec_locked(struct anv_queue *queue,
                        uint32_t wait_count,
@@ -190,6 +197,7 @@ const struct anv_kmd_backend *anv_stub_kmd_backend_get(void)
       .vm_bind_bo = stub_vm_bind_bo,
       .vm_unbind_bo = stub_vm_bind_bo,
       .execute_simple_batch = stub_execute_simple_batch,
+      .execute_trtt_batch = stub_execute_trtt_batch,
       .queue_exec_locked = stub_queue_exec_locked,
       .queue_exec_trace = stub_queue_exec_trace,
       .bo_alloc_flags_to_bo_flags = stub_bo_alloc_flags_to_bo_flags,
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index a9bd1134982..745a5d38907 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -283,3 +283,7 @@ genX(simple_shader_push_state_address)(struct anv_simple_shader *state,
 
 void
 genX(emit_simple_shader_end)(struct anv_simple_shader *state);
+
+VkResult genX(init_trtt_context_state)(struct anv_queue *queue);
+
+VkResult genX(write_trtt_entries)(struct anv_trtt_submission *submit);
diff --git a/src/intel/vulkan/anv_kmd_backend.h b/src/intel/vulkan/anv_kmd_backend.h
index ed860eba81f..5e3f508e49b 100644
--- a/src/intel/vulkan/anv_kmd_backend.h
+++ b/src/intel/vulkan/anv_kmd_backend.h
@@ -77,6 +77,9 @@ struct anv_kmd_backend {
                                     struct anv_bo *batch_bo,
                                     uint32_t batch_bo_size,
                                     bool is_companion_rcs_batch);
+   VkResult (*execute_trtt_batch)(struct anv_queue *queue,
+                                  struct anv_bo *batch_bo,
+                                  uint32_t batch_size);
    VkResult (*queue_exec_locked)(struct anv_queue *queue,
                                  uint32_t wait_count,
                                  const struct vk_sync_wait *waits,
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 08f3ee3652d..52f1cd722c6 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -238,6 +238,22 @@ struct intel_perf_query_result;
 #define SO_BUFFER_INDEX_0_CMD 0x60
 #define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))
 
+/* The TR-TT L1 page table entries may contain these values instead of actual
+ * pointers to indicate the regions are either NULL or invalid. We program
+ * these values to TR-TT registers, so we could change them, but it's super
+ * convenient to have the NULL value be 0 because everything is
+ * zero-initialized when allocated.
+ *
+ * Since we reserve these values for NULL/INVALID, then we can't use them as
+ * destinations for TR-TT address translation. Both values are shifted by 16
+ * bits, wich results in graphic addresses 0 and 64k. On Anv the first vma
+ * starts at 2MB, so we already don't use 0 and 64k for anything, so there's
+ * nothing really to reserve. We could instead just reserve random 64kb
+ * ranges from any of the non-TR-TT vmas and use their addresses.
+ */
+#define ANV_TRTT_L1_NULL_TILE_VAL 0
+#define ANV_TRTT_L1_INVALID_TILE_VAL 1
+
 static inline uint32_t
 align_down_npot_u32(uint32_t v, uint32_t a)
 {
@@ -695,6 +711,21 @@ struct anv_state_stream {
    struct util_dynarray all_blocks;
 };
 
+struct anv_trtt_bind {
+   uint64_t pte_addr;
+   uint64_t entry_addr;
+};
+
+struct anv_trtt_submission {
+   struct anv_queue *queue;
+
+   struct anv_trtt_bind *l3l2_binds;
+   struct anv_trtt_bind *l1_binds;
+
+   int l3l2_binds_len;
+   int l1_binds_len;
+};
+
 /* The block_pool functions exported for testing only.  The block pool should
  * only be used via a state pool (see below).
  */
@@ -912,6 +943,7 @@ struct anv_physical_device {
      * a vm_bind ioctl).
      */
     bool                                        has_sparse;
+    bool                                        sparse_uses_trtt;
 
     /** True if HW supports ASTC LDR */
     bool                                        has_astc_ldr;
@@ -1724,6 +1756,40 @@ struct anv_device {
      */
     VkCommandPool                               companion_rcs_cmd_pool;
 
+    struct anv_trtt {
+       pthread_mutex_t mutex;
+
+       /* Sometimes we need to run batches from places where we don't have a
+        * queue coming from the API, so we use this.
+        */
+       struct anv_queue *queue;
+
+       /* There's only one L3 table, so if l3_addr is zero that means we
+        * didn't initialize the TR-TT context yet (i.e., we're not using TR-TT
+        * yet in this context).
+        */
+       uint64_t l3_addr;
+
+       /* We don't want to access the page tables from the CPU, so just
+        * maintain a mirror that we can use.
+        */
+       uint64_t *l3_mirror;
+       uint64_t *l2_mirror;
+
+       /* We keep a dynamic list of page table bos, and each bo can store
+        * multiple page tables.
+        */
+       struct anv_bo **page_table_bos;
+       int num_page_table_bos;
+       int page_table_bos_capacity;
+
+       /* These are used to keep track of space available for more page tables
+        * within a bo.
+        */
+       struct anv_bo *cur_page_table_bo;
+       uint64_t next_page_table_bo_offset;
+    } trtt;
+
     /* This is true if the user ever bound a sparse resource to memory. This
      * is used for a workaround that makes every memoryBarrier flush more
      * things than it should. Many applications request for the sparse
@@ -1861,6 +1927,8 @@ VkResult anv_queue_submit(struct vk_queue *queue,
 VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,
                                        struct anv_batch *batch,
                                        bool is_companion_rcs_batch);
+VkResult anv_queue_submit_trtt_batch(struct anv_queue *queue,
+                                     struct anv_batch *batch);
 
 void anv_queue_trace(struct anv_queue *queue, const char *label,
                      bool frame, bool begin);
diff --git a/src/intel/vulkan/anv_sparse.c b/src/intel/vulkan/anv_sparse.c
index 8e8024db1ab..3da57f35de8 100644
--- a/src/intel/vulkan/anv_sparse.c
+++ b/src/intel/vulkan/anv_sparse.c
@@ -276,6 +276,275 @@ anv_sparse_get_standard_image_block_shape(enum isl_format format,
    return vk_extent3d_el_to_px(block_shape, layout);
 }
 
+/* We really want to try to have all the page tables on as few BOs as possible
+ * to benefit from cache locality and to keep the i915.ko relocation lists
+ * small. On the other hand, we don't want to waste memory on unused space.
+ */
+#define ANV_TRTT_PAGE_TABLE_BO_SIZE (2 * 1024 * 1024)
+
+static VkResult
+trtt_make_page_table_bo(struct anv_device *device, struct anv_bo **bo)
+{
+   VkResult result;
+   struct anv_trtt *trtt = &device->trtt;
+
+   result = anv_device_alloc_bo(device, "trtt-page-table",
+                                ANV_TRTT_PAGE_TABLE_BO_SIZE, 0, 0, bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   if (trtt->num_page_table_bos < trtt->page_table_bos_capacity) {
+      trtt->page_table_bos[trtt->num_page_table_bos++] = *bo;
+   } else {
+
+      int new_capacity = MAX2(8, trtt->page_table_bos_capacity * 2);
+      struct anv_bo **new_page_table_bos =
+         vk_realloc(&device->vk.alloc, trtt->page_table_bos,
+                    new_capacity * sizeof(*trtt->page_table_bos), 8,
+                    VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+      if (!new_page_table_bos) {
+         anv_device_release_bo(device, *bo);
+         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+
+      new_page_table_bos[trtt->num_page_table_bos] = *bo;
+
+      trtt->page_table_bos = new_page_table_bos;
+      trtt->page_table_bos_capacity = new_capacity;
+      trtt->num_page_table_bos++;
+   }
+
+   trtt->cur_page_table_bo = *bo;
+   trtt->next_page_table_bo_offset = 0;
+
+   sparse_debug("new number of page table BOs: %d\n",
+                trtt->num_page_table_bos);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+trtt_get_page_table_bo(struct anv_device *device, struct anv_bo **bo,
+                       uint64_t *bo_addr)
+{
+   struct anv_trtt *trtt = &device->trtt;
+   VkResult result;
+
+   if (!trtt->cur_page_table_bo) {
+      result = trtt_make_page_table_bo(device, bo);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   *bo = trtt->cur_page_table_bo;
+   *bo_addr = trtt->cur_page_table_bo->offset +
+              trtt->next_page_table_bo_offset;
+
+   trtt->next_page_table_bo_offset += 4096;
+   if (trtt->next_page_table_bo_offset >= ANV_TRTT_PAGE_TABLE_BO_SIZE)
+      trtt->cur_page_table_bo = NULL;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+anv_trtt_init_context_state(struct anv_queue *queue)
+{
+   struct anv_device *device = queue->device;
+   struct anv_trtt *trtt = &device->trtt;
+
+   struct anv_bo *l3_bo;
+   VkResult result = trtt_get_page_table_bo(device, &l3_bo, &trtt->l3_addr);
+   if (result != VK_SUCCESS)
+      return result;
+
+   trtt->l3_mirror = vk_zalloc(&device->vk.alloc, 4096, 8,
+                                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!trtt->l3_mirror) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      return result;
+   }
+
+   /* L3 has 512 entries, so we can have up to 512 L2 tables. */
+   trtt->l2_mirror = vk_zalloc(&device->vk.alloc, 512 * 4096, 8,
+                               VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!trtt->l2_mirror) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto fail_free_l3;
+   }
+
+   result = anv_genX(device->info, init_trtt_context_state)(queue);
+
+   return result;
+
+fail_free_l3:
+   vk_free(&device->vk.alloc, trtt->l3_mirror);
+   return result;
+}
+
+static void
+anv_trtt_bind_list_add_entry(struct anv_trtt_bind *binds, int *binds_len,
+                             uint64_t pte_addr, uint64_t entry_addr)
+{
+   binds[*binds_len] = (struct anv_trtt_bind) {
+      .pte_addr = pte_addr,
+      .entry_addr = entry_addr,
+   };
+   (*binds_len)++;
+}
+
+/* For L3 and L2 pages, null and invalid entries are indicated by bits 1 and 0
+ * respectively. For L1 entries, the hardware compares the addresses against
+ * what we program to the GFX_TRTT_NULL and GFX_TRTT_INVAL registers.
+ */
+#define ANV_TRTT_L3L2_NULL_ENTRY (1 << 1)
+#define ANV_TRTT_L3L2_INVALID_ENTRY (1 << 0)
+
+/* Adds elements to the anv_trtt_bind structs passed. This doesn't write the
+ * entries to the HW yet.
+ */
+static VkResult
+anv_trtt_bind_add(struct anv_device *device,
+                  uint64_t trtt_addr, uint64_t dest_addr,
+                  struct anv_trtt_submission *s)
+{
+   VkResult result = VK_SUCCESS;
+   struct anv_trtt *trtt = &device->trtt;
+   bool is_null_bind = dest_addr == ANV_TRTT_L1_NULL_TILE_VAL;
+
+   int l3_index = (trtt_addr >> 35) & 0x1FF;
+   int l2_index = (trtt_addr >> 26) & 0x1FF;
+   int l1_index = (trtt_addr >> 16) & 0x3FF;
+
+   uint64_t l2_addr = trtt->l3_mirror[l3_index];
+   if (l2_addr == ANV_TRTT_L3L2_NULL_ENTRY && is_null_bind) {
+      return VK_SUCCESS;
+   } else if (l2_addr == 0 || l2_addr == ANV_TRTT_L3L2_NULL_ENTRY) {
+      if (is_null_bind) {
+         trtt->l3_mirror[l3_index] = ANV_TRTT_L3L2_NULL_ENTRY;
+
+         anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
+               trtt->l3_addr + l3_index * sizeof(uint64_t),
+               ANV_TRTT_L3L2_NULL_ENTRY);
+
+         return VK_SUCCESS;
+      }
+
+      struct anv_bo *l2_bo;
+      result = trtt_get_page_table_bo(device, &l2_bo, &l2_addr);
+      if (result != VK_SUCCESS)
+         return result;
+
+      trtt->l3_mirror[l3_index] = l2_addr;
+
+      anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
+            trtt->l3_addr + l3_index * sizeof(uint64_t), l2_addr);
+   }
+   assert(l2_addr != 0 && l2_addr != ANV_TRTT_L3L2_NULL_ENTRY);
+
+   /* The first page in the l2_mirror corresponds to l3_index=0 and so on. */
+   uint64_t l1_addr = trtt->l2_mirror[l3_index * 512 + l2_index];
+   if (l1_addr == ANV_TRTT_L3L2_NULL_ENTRY && is_null_bind) {
+      return VK_SUCCESS;
+   } else if (l1_addr == 0 || l1_addr == ANV_TRTT_L3L2_NULL_ENTRY) {
+      if (is_null_bind) {
+         trtt->l2_mirror[l3_index * 512 + l2_index] =
+            ANV_TRTT_L3L2_NULL_ENTRY;
+
+         anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
+               l2_addr + l2_index * sizeof(uint64_t),
+               ANV_TRTT_L3L2_NULL_ENTRY);
+
+         return VK_SUCCESS;
+      }
+
+      struct anv_bo *l1_bo;
+      result = trtt_get_page_table_bo(device, &l1_bo, &l1_addr);
+      if (result != VK_SUCCESS)
+         return result;
+
+      trtt->l2_mirror[l3_index * 512 + l2_index] = l1_addr;
+
+      anv_trtt_bind_list_add_entry(s->l3l2_binds, &s->l3l2_binds_len,
+            l2_addr + l2_index * sizeof(uint64_t), l1_addr);
+   }
+   assert(l1_addr != 0 && l1_addr != ANV_TRTT_L3L2_NULL_ENTRY);
+
+   anv_trtt_bind_list_add_entry(s->l1_binds, &s->l1_binds_len,
+            l1_addr + l1_index * sizeof(uint32_t), dest_addr);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+anv_sparse_bind_trtt(struct anv_device *device, int num_vm_binds,
+                     struct anv_vm_bind *vm_binds)
+{
+   struct anv_trtt *trtt = &device->trtt;
+   VkResult result;
+
+   /* These capacities are conservative estimations. For L1 binds the
+    * number will match exactly unless we skip NULL binds due to L2 already
+    * being NULL. For L3/L2 things are harder to estimate, but the resulting
+    * numbers are so small that a little overestimation won't hurt.
+    *
+    * We have assertions below to catch estimation errors.
+    */
+   int l3l2_binds_capacity = 1;
+   int l1_binds_capacity = 0;
+   for (int b = 0; b < num_vm_binds; b++) {
+      int pages = vm_binds[b].size / (64 * 1024);
+      l1_binds_capacity += pages;
+      l3l2_binds_capacity += (pages / 1024 + 1) * 2;
+   }
+
+   STACK_ARRAY(struct anv_trtt_bind, l3l2_binds, l3l2_binds_capacity);
+   STACK_ARRAY(struct anv_trtt_bind, l1_binds, l1_binds_capacity);
+   struct anv_trtt_submission s = {
+      .queue = trtt->queue,
+      .l3l2_binds = l3l2_binds,
+      .l1_binds = l1_binds,
+      .l3l2_binds_len = 0,
+      .l1_binds_len = 0,
+   };
+
+   pthread_mutex_lock(&trtt->mutex);
+
+   if (!trtt->l3_addr)
+      anv_trtt_init_context_state(s.queue);
+
+   assert(trtt->l3_addr);
+
+   for (int b = 0; b < num_vm_binds; b++) {
+      for (size_t i = 0; i < vm_binds[b].size; i += 64 * 1024) {
+         uint64_t trtt_addr = vm_binds[b].address + i;
+         uint64_t dest_addr =
+            (vm_binds[b].op == ANV_VM_BIND && vm_binds[b].bo) ?
+               vm_binds[b].bo->offset + vm_binds[b].bo_offset + i :
+               ANV_TRTT_L1_NULL_TILE_VAL;
+
+         result = anv_trtt_bind_add(device, trtt_addr, dest_addr, &s);
+         if (result != VK_SUCCESS)
+            goto out;
+      }
+   }
+
+   assert(s.l3l2_binds_len <= l3l2_binds_capacity);
+   assert(s.l1_binds_len <= l1_binds_capacity);
+
+   sparse_debug("trtt_binds: num_vm_binds:%02d l3l2:%04d l1:%04d\n",
+                num_vm_binds, s.l3l2_binds_len, s.l1_binds_len);
+
+   if (s.l3l2_binds_len || s.l1_binds_len)
+      result = anv_genX(device->info, write_trtt_entries)(&s);
+
+out:
+   pthread_mutex_unlock(&trtt->mutex);
+   STACK_ARRAY_FINISH(l1_binds);
+   STACK_ARRAY_FINISH(l3l2_binds);
+   return result;
+}
+
 static VkResult
 anv_sparse_bind_vm_bind(struct anv_device *device, int num_binds,
                         struct anv_vm_bind *binds)
@@ -303,7 +572,9 @@ anv_sparse_bind(struct anv_device *device,
          dump_anv_vm_bind(device, sparse, &binds[b]);
    }
 
-   return anv_sparse_bind_vm_bind(device, num_binds, binds);
+   return device->physical->sparse_uses_trtt ?
+            anv_sparse_bind_trtt(device, num_binds, binds) :
+            anv_sparse_bind_vm_bind(device, num_binds, binds);
 }
 
 VkResult
@@ -316,6 +587,9 @@ anv_init_sparse_bindings(struct anv_device *device,
 {
    uint64_t size = align64(size_, ANV_SPARSE_BLOCK_SIZE);
 
+   if (device->physical->sparse_uses_trtt)
+      alloc_flags |= ANV_BO_ALLOC_TRTT;
+
    sparse->address = anv_vma_alloc(device, size, ANV_SPARSE_BLOCK_SIZE,
                                    alloc_flags,
                                    intel_48b_address(client_address),
diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
index 2e5387d8227..496b7ed8557 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -8426,3 +8426,57 @@ genX(cmd_buffer_end_companion_rcs_syncpoint)(struct anv_cmd_buffer *cmd_buffer,
    unreachable("Not implemented");
 #endif
 }
+
+VkResult
+genX(write_trtt_entries)(struct anv_trtt_submission *submit)
+{
+#if GFX_VER >= 12
+   struct anv_queue *queue = submit->queue;
+   size_t batch_size = submit->l3l2_binds_len * 20 +
+                       submit->l1_binds_len * 16 + 8;
+   STACK_ARRAY(uint32_t, cmds, batch_size);
+   struct anv_batch batch = {
+      .start = cmds,
+      .next = cmds,
+      .end = (void *)cmds + batch_size,
+   };
+
+   /* TODO: writes to contiguous addresses can be combined into a single big
+    * MI_STORE_DATA_IMM instruction.
+    */
+
+   for (int i = 0; i < submit->l3l2_binds_len; i++) {
+      bool is_last_write = submit->l1_binds_len == 0 &&
+                           i + 1 == submit->l3l2_binds_len;
+
+      anv_batch_emitn(&batch, 5, GENX(MI_STORE_DATA_IMM),
+         .ForceWriteCompletionCheck = is_last_write,
+         .StoreQword = true,
+         .Address = anv_address_from_u64(submit->l3l2_binds[i].pte_addr),
+         .ImmediateData = submit->l3l2_binds[i].entry_addr,
+      );
+   }
+
+   for (int i = 0; i < submit->l1_binds_len; i++) {
+      bool is_last_write = i + 1 == submit->l1_binds_len;
+
+      anv_batch_emit(&batch, GENX(MI_STORE_DATA_IMM), sdi) {
+         sdi.ForceWriteCompletionCheck = is_last_write;
+         sdi.Address = anv_address_from_u64(submit->l1_binds[i].pte_addr);
+         sdi.ImmediateData =
+            (submit->l1_binds[i].entry_addr >> 16) & 0xFFFFFFFF;
+      }
+   }
+
+   anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
+
+   assert(batch.next <= batch.end);
+
+   VkResult result = anv_queue_submit_trtt_batch(queue, &batch);
+   STACK_ARRAY_FINISH(cmds);
+
+   return result;
+
+#endif
+   return VK_SUCCESS;
+}
diff --git a/src/intel/vulkan/genX_init_state.c b/src/intel/vulkan/genX_init_state.c
index 98d0c40a490..1586686fa99 100644
--- a/src/intel/vulkan/genX_init_state.c
+++ b/src/intel/vulkan/genX_init_state.c
@@ -606,6 +606,9 @@ init_render_queue_state(struct anv_queue *queue, bool is_companion_rcs_batch)
 
    assert(batch.next <= batch.end);
 
+   if (!device->trtt.queue)
+      device->trtt.queue = queue;
+
    return anv_queue_submit_simple_batch(queue, &batch, is_companion_rcs_batch);
 }
 
@@ -1205,3 +1208,56 @@ genX(apply_task_urb_workaround)(struct anv_cmd_buffer *cmd_buffer)
        WriteImmediateData, cmd_buffer->device->workaround_address, 0, 0);
 #endif
 }
+
+VkResult
+genX(init_trtt_context_state)(struct anv_queue *queue)
+{
+#if GFX_VER >= 12
+   struct anv_device *device = queue->device;
+   struct anv_trtt *trtt = &device->trtt;
+
+   uint32_t cmds[128];
+   struct anv_batch batch = {
+      .start = cmds,
+      .next = cmds,
+      .end = (void *)cmds + sizeof(cmds),
+   };
+
+   anv_batch_write_reg(&batch, GENX(GFX_TRTT_INVAL), trtt_inval) {
+      trtt_inval.InvalidTileDetectionValue = ANV_TRTT_L1_INVALID_TILE_VAL;
+   }
+   anv_batch_write_reg(&batch, GENX(GFX_TRTT_NULL), trtt_null) {
+      trtt_null.NullTileDetectionValue = ANV_TRTT_L1_NULL_TILE_VAL;
+   }
+   anv_batch_write_reg(&batch, GENX(GFX_TRTT_VA_RANGE), trtt_va_range) {
+      trtt_va_range.TRVAMaskValue = 0xF;
+      trtt_va_range.TRVADataValue = 0xF;
+   }
+
+   uint64_t l3_addr = trtt->l3_addr;
+   assert((l3_addr & 0xFFF) == 0);
+   anv_batch_write_reg(&batch, GENX(GFX_TRTT_L3_BASE_LOW), trtt_base_low) {
+      trtt_base_low.TRVAL3PointerLowerAddress =
+         (l3_addr & 0xFFFFF000) >> 12;
+   }
+   anv_batch_write_reg(&batch, GENX(GFX_TRTT_L3_BASE_HIGH),
+         trtt_base_high) {
+      trtt_base_high.TRVAL3PointerUpperAddress =
+         (l3_addr >> 32) & 0xFFFF;
+   }
+   /* Enabling TR-TT needs to be done after setting up the other registers.
+   */
+   anv_batch_write_reg(&batch, GENX(GFX_TRTT_CR), trtt_cr) {
+      trtt_cr.TRTTEnable = true;
+   }
+
+   anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
+   assert(batch.next <= batch.end);
+
+   VkResult res = anv_queue_submit_simple_batch(queue, &batch, false);
+   if (res != VK_SUCCESS)
+      return res;
+
+#endif
+   return VK_SUCCESS;
+}
diff --git a/src/intel/vulkan/i915/anv_batch_chain.c b/src/intel/vulkan/i915/anv_batch_chain.c
index 925c40e09ca..1ea0697f2bd 100644
--- a/src/intel/vulkan/i915/anv_batch_chain.c
+++ b/src/intel/vulkan/i915/anv_batch_chain.c
@@ -338,6 +338,31 @@ get_context_and_exec_flags(struct anv_queue *queue,
                  device->context_id;
 }
 
+static VkResult
+anv_execbuf_add_trtt_bos(struct anv_device *device,
+                         struct anv_execbuf *execbuf)
+{
+   struct anv_trtt *trtt = &device->trtt;
+   VkResult result = VK_SUCCESS;
+
+   /* If l3_addr is zero we're not using TR-TT, there's no bo to add. */
+   if (!trtt->l3_addr)
+      return VK_SUCCESS;
+
+   pthread_mutex_lock(&trtt->mutex);
+
+   for (int i = 0; i < trtt->num_page_table_bos; i++) {
+      result = anv_execbuf_add_bo(device, execbuf, trtt->page_table_bos[i],
+                                  NULL, 0);
+      if (result != VK_SUCCESS)
+         goto out;
+   }
+
+out:
+   pthread_mutex_unlock(&trtt->mutex);
+   return result;
+}
+
 static VkResult
 setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
                               struct anv_queue *queue,
@@ -401,7 +426,8 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
       return result;
 
    /* Add the BOs for all user allocated memory objects because we can't
-    * track after binding updates of VK_EXT_descriptor_indexing.
+    * track after binding updates of VK_EXT_descriptor_indexing and due to how
+    * sparse resources work.
     */
    list_for_each_entry(struct anv_device_memory, mem,
                        &device->memory_objects, link) {
@@ -410,6 +436,10 @@ setup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf,
          return result;
    }
 
+   result = anv_execbuf_add_trtt_bos(device, execbuf);
+   if (result != VK_SUCCESS)
+      return result;
+
    /* Add all the private BOs from images because we can't track after binding
     * updates of VK_EXT_descriptor_indexing.
     */
@@ -954,6 +984,73 @@ fail:
    return result;
 }
 
+VkResult
+i915_execute_trtt_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+                        uint32_t batch_size)
+{
+   struct anv_device *device = queue->device;
+   struct anv_trtt *trtt = &device->trtt;
+   struct anv_execbuf execbuf = {
+      .alloc = &device->vk.alloc,
+      .alloc_scope = VK_SYSTEM_ALLOCATION_SCOPE_DEVICE,
+   };
+   VkResult result;
+
+   result = anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL,
+                               0);
+   if (result != VK_SUCCESS)
+      goto out;
+
+   for (int i = 0; i < trtt->num_page_table_bos; i++) {
+      result = anv_execbuf_add_bo(device, &execbuf, trtt->page_table_bos[i],
+                                  NULL, EXEC_OBJECT_WRITE);
+      if (result != VK_SUCCESS)
+         goto out;
+   }
+
+   result = anv_execbuf_add_bo(device, &execbuf, batch_bo, NULL, 0);
+   if (result != VK_SUCCESS)
+      goto out;
+
+   if (INTEL_DEBUG(DEBUG_SUBMIT))
+      anv_i915_debug_submit(&execbuf);
+
+   uint64_t exec_flags = 0;
+   uint32_t context_id;
+   get_context_and_exec_flags(queue, false, &exec_flags, &context_id);
+
+   execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
+      .buffers_ptr = (uintptr_t) execbuf.objects,
+      .buffer_count = execbuf.bo_count,
+      .batch_start_offset = 0,
+      .batch_len = batch_size,
+      .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | exec_flags,
+      .rsvd1 = context_id,
+      .rsvd2 = 0,
+   };
+
+   int ret = queue->device->info->no_hw ? 0 :
+      anv_gem_execbuffer(device, &execbuf.execbuf);
+   if (ret) {
+      result = vk_device_set_lost(&device->vk,
+                                  "trtt anv_gem_execbuffer failed: %m");
+      goto out;
+   }
+
+   /* TODO: we can get rid of this wait once we can properly handle the buffer
+    * lifetimes.
+    */
+   result = anv_device_wait(device, batch_bo, INT64_MAX);
+   if (result != VK_SUCCESS) {
+      result = vk_device_set_lost(&device->vk,
+                                  "trtt anv_device_wait failed: %m");
+   }
+
+out:
+   anv_execbuf_finish(&execbuf);
+   return result;
+}
+
 VkResult
 i915_queue_exec_trace(struct anv_queue *queue,
                       struct anv_utrace_submit *submit)
diff --git a/src/intel/vulkan/i915/anv_batch_chain.h b/src/intel/vulkan/i915/anv_batch_chain.h
index f46f19c90bc..eff38ce2ee2 100644
--- a/src/intel/vulkan/i915/anv_batch_chain.h
+++ b/src/intel/vulkan/i915/anv_batch_chain.h
@@ -29,6 +29,7 @@
 
 #include "vk_sync.h"
 
+struct anv_device;
 struct anv_queue;
 struct anv_bo;
 struct anv_cmd_buffer;
@@ -41,6 +42,11 @@ i915_queue_exec_trace(struct anv_queue *queue,
 VkResult
 i915_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
                           uint32_t batch_bo_size, bool is_companion_rcs_batch);
+
+VkResult
+i915_execute_trtt_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+                        uint32_t batch_size);
+
 VkResult
 i915_queue_exec_locked(struct anv_queue *queue,
                        uint32_t wait_count,
diff --git a/src/intel/vulkan/i915/anv_kmd_backend.c b/src/intel/vulkan/i915/anv_kmd_backend.c
index fe9be942ca8..887a9dbf6c5 100644
--- a/src/intel/vulkan/i915/anv_kmd_backend.c
+++ b/src/intel/vulkan/i915/anv_kmd_backend.c
@@ -277,6 +277,7 @@ anv_i915_kmd_backend_get(void)
       .vm_bind_bo = i915_vm_bind_bo,
       .vm_unbind_bo = i915_vm_bind_bo,
       .execute_simple_batch = i915_execute_simple_batch,
+      .execute_trtt_batch = i915_execute_trtt_batch,
       .queue_exec_locked = i915_queue_exec_locked,
       .queue_exec_trace = i915_queue_exec_trace,
       .bo_alloc_flags_to_bo_flags = i915_bo_alloc_flags_to_bo_flags,
diff --git a/src/intel/vulkan/xe/anv_batch_chain.c b/src/intel/vulkan/xe/anv_batch_chain.c
index baf04db3cb3..187be25e5ca 100644
--- a/src/intel/vulkan/xe/anv_batch_chain.c
+++ b/src/intel/vulkan/xe/anv_batch_chain.c
@@ -178,6 +178,51 @@ xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count,
                                    is_companion_rcs_cmd_buffer);
 }
 
+VkResult
+xe_execute_trtt_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+                      uint32_t batch_size)
+{
+   struct anv_device *device = queue->device;
+   VkResult result = VK_SUCCESS;
+
+   uint32_t syncobj_handle;
+   if (drmSyncobjCreate(device->fd, 0, &syncobj_handle))
+      return vk_errorf(device, VK_ERROR_UNKNOWN, "Unable to create sync obj");
+
+   struct drm_xe_sync sync = {
+      .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL,
+      .handle = syncobj_handle,
+   };
+   struct drm_xe_exec exec = {
+      .exec_queue_id = queue->exec_queue_id,
+      .num_batch_buffer = 1,
+      .address = batch_bo->offset,
+      .num_syncs = 1,
+      .syncs = (uintptr_t)&sync,
+   };
+
+   if (!device->info->no_hw) {
+      if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec)) {
+         result = vk_device_set_lost(&device->vk, "XE_EXEC failed: %m");
+         goto exec_error;
+      }
+   }
+
+   /* FIXME: we shouldn't need this wait, figure out a way to remove it. */
+   struct drm_syncobj_wait wait = {
+      .handles = (uintptr_t)&syncobj_handle,
+      .timeout_nsec = INT64_MAX,
+      .count_handles = 1,
+   };
+   if (intel_ioctl(device->fd, DRM_IOCTL_SYNCOBJ_WAIT, &wait))
+      result = vk_device_set_lost(&device->vk, "DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
+
+exec_error:
+   drmSyncobjDestroy(device->fd, syncobj_handle);
+
+   return result;
+}
+
 VkResult
 xe_queue_exec_utrace_locked(struct anv_queue *queue,
                             struct anv_utrace_submit *utrace_submit)
diff --git a/src/intel/vulkan/xe/anv_batch_chain.h b/src/intel/vulkan/xe/anv_batch_chain.h
index f664f9673dd..d11dd11316a 100644
--- a/src/intel/vulkan/xe/anv_batch_chain.h
+++ b/src/intel/vulkan/xe/anv_batch_chain.h
@@ -28,6 +28,7 @@
 #include "vulkan/vulkan_core.h"
 #include "vk_sync.h"
 
+struct anv_device;
 struct anv_queue;
 struct anv_bo;
 struct anv_cmd_buffer;
@@ -38,6 +39,10 @@ VkResult
 xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
                         uint32_t batch_bo_size, bool is_companion_rcs_batch);
 VkResult
+xe_execute_trtt_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
+                      uint32_t batch_size);
+
+VkResult
 xe_queue_exec_locked(struct anv_queue *queue,
                      uint32_t wait_count,
                      const struct vk_sync_wait *waits,
diff --git a/src/intel/vulkan/xe/anv_kmd_backend.c b/src/intel/vulkan/xe/anv_kmd_backend.c
index 5063b19ec2c..a80bfb57c3b 100644
--- a/src/intel/vulkan/xe/anv_kmd_backend.c
+++ b/src/intel/vulkan/xe/anv_kmd_backend.c
@@ -220,6 +220,7 @@ anv_xe_kmd_backend_get(void)
       .vm_bind_bo = xe_vm_bind_bo,
       .vm_unbind_bo = xe_vm_unbind_bo,
       .execute_simple_batch = xe_execute_simple_batch,
+      .execute_trtt_batch = xe_execute_trtt_batch,
       .queue_exec_locked = xe_queue_exec_locked,
       .queue_exec_trace = xe_queue_exec_utrace_locked,
       .bo_alloc_flags_to_bo_flags = xe_bo_alloc_flags_to_bo_flags,