diff --git a/meson.build b/meson.build
index 393f3ac1b93..d1a5cc2cb4c 100644
--- a/meson.build
+++ b/meson.build
@@ -95,6 +95,7 @@ with_vulkan_overlay_layer = get_option('vulkan-layers').contains('overlay')
 with_vulkan_device_select_layer = get_option('vulkan-layers').contains('device-select')
 with_vulkan_screenshot_layer = get_option('vulkan-layers').contains('screenshot')
 with_vulkan_vram_report_limit_layer = get_option('vulkan-layers').contains('vram-report-limit')
+with_vulkan_anti_lag_layer = get_option('vulkan-layers').contains('anti-lag')
 with_tools = get_option('tools')
 if with_tools.contains('all')
   with_tools = [
diff --git a/meson.options b/meson.options
index c3c02c4c94f..cd0e56cc429 100644
--- a/meson.options
+++ b/meson.options
@@ -299,7 +299,7 @@ option(
   type : 'array',
   value : [],
   choices : [
-    'device-select', 'intel-nullhw', 'overlay', 'screenshot',
+    'device-select', 'intel-nullhw', 'overlay', 'screenshot', 'anti-lag',
     'vram-report-limit',
   ],
   description : 'List of vulkan layers to build'
diff --git a/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json b/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json
new file mode 100644
index 00000000000..4e2ab794c9e
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json
@@ -0,0 +1,26 @@
+{
+   "file_format_version": "1.2.1",
+   "layer": {
+      "name": "VK_LAYER_MESA_anti_lag",
+      "type": "GLOBAL",
+      "library_path": "libVkLayer_MESA_anti_lag.so",
+      "api_version": "1.4.303",
+      "implementation_version": "1",
+      "description": "Open-source implementation of the VK_AMD_anti_lag extension.",
+      "functions": {
+         "vkNegotiateLoaderLayerInterfaceVersion": "anti_lag_NegotiateLoaderLayerInterfaceVersion"
+      },
+      "device_extensions": [
+         {
+            "name": "VK_AMD_anti_lag",
+            "spec_version": "1",
+            "entrypoints": [
+               "vkAntiLagUpdateAMD"
+            ]
+         }
+      ],
+      "disable_environment": {
+         "DISABLE_LAYER_MESA_ANTI_LAG": "1"
+      }
+   }
+}
\ No newline at end of file
diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.c b/src/vulkan/anti-lag-layer/anti_lag_layer.c
new file mode 100644
index 00000000000..6c21e074024
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "anti_lag_layer.h"
+#include <string.h>
+#include "util/os_time.h"
+#include "util/simple_mtx.h"
+#include "vulkan/vulkan_core.h"
+#include "ringbuffer.h"
+#include "vk_alloc.h"
+#include "vk_util.h"
+
+static bool
+evaluate_frame(device_context *ctx, frame *frame, bool force_wait)
+{
+   if (frame->state != FRAME_PRESENT) {
+      /* This frame is not finished yet. */
+      assert(!force_wait);
+      return false;
+   }
+
+   int query_flags = VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT;
+   const uint32_t frame_idx = ringbuffer_index(ctx->frames, frame);
+
+   /* Before we commit to completing a frame, all submits on all queues must have completed. */
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      queue_context *queue_ctx = &ctx->queues[i];
+      ringbuffer_lock(queue_ctx->queries);
+      uint64_t expected_signal_value = queue_ctx->semaphore_value - queue_ctx->queries.size +
+                                       queue_ctx->submissions_per_frame[frame_idx];
+      ringbuffer_unlock(queue_ctx->queries);
+
+      if (force_wait) {
+         /* Wait for the timeline semaphore of the frame to be signaled. */
+         struct VkSemaphoreWaitInfo wait_info = {
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+            .semaphoreCount = 1,
+            .pSemaphores = &queue_ctx->semaphore,
+            .pValues = &expected_signal_value,
+         };
+         ctx->vtable.WaitSemaphores(ctx->device, &wait_info, 0);
+      } else {
+         /* Return early if the last timeline semaphore of the frame has not been signaled yet. */
+         uint64_t signal_value;
+         ctx->vtable.GetSemaphoreCounterValue(ctx->device, queue_ctx->semaphore, &signal_value);
+         if (signal_value < expected_signal_value)
+            return false;
+      }
+   }
+
+   /* For each queue, retrieve timestamp query results. */
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      queue_context *queue_ctx = &ctx->queues[i];
+
+      /* As we hold a global mtx and this is the only place where queries are free'd,
+       * we don't need to lock the query ringbuffer here in order to read the first entry.
+       */
+      struct query *query = ringbuffer_first(queue_ctx->queries);
+      uint32_t query_idx = ringbuffer_index(queue_ctx->queries, query);
+      int num_timestamps =
+         MIN2(queue_ctx->submissions_per_frame[frame_idx], MAX_QUERIES - query_idx);
+
+      while (num_timestamps > 0) {
+         /* Retreive timestamp results from this queue. */
+         ctx->vtable.GetQueryPoolResults(ctx->device, queue_ctx->queryPool, query_idx,
+                                         num_timestamps, sizeof(uint64_t), &query->begin_gpu_ts,
+                                         sizeof(struct query), query_flags);
+
+         ringbuffer_lock(queue_ctx->queries);
+         for (unsigned j = 0; j < num_timestamps; j++) {
+
+            /* Calibrate device timestamps. */
+            query->begin_gpu_ts =
+               ctx->calibration.delta +
+               (uint64_t)(query->begin_gpu_ts * ctx->calibration.timestamp_period);
+            if (query->begin_gpu_ts > query->submit_cpu_ts)
+               frame->min_delay =
+                  MIN2(frame->min_delay, query->begin_gpu_ts - query->submit_cpu_ts);
+
+            /* Check if we can reset half of the query pool at once. */
+            uint32_t next_idx = ringbuffer_index(queue_ctx->queries, query) + 1;
+            const bool reset = next_idx == MAX_QUERIES || next_idx == MAX_QUERIES / 2;
+            if (reset) {
+               ringbuffer_unlock(queue_ctx->queries);
+               ctx->vtable.ResetQueryPool(ctx->device, queue_ctx->queryPool,
+                                          next_idx - MAX_QUERIES / 2, MAX_QUERIES / 2);
+               ringbuffer_lock(queue_ctx->queries);
+            }
+
+            /* Free query. */
+            ringbuffer_free(queue_ctx->queries, query);
+            queue_ctx->submissions_per_frame[frame_idx]--;
+
+            query = ringbuffer_first(queue_ctx->queries);
+         }
+
+         /* Ensure that the total number of queries across all frames is correct. */
+         ASSERTED uint32_t count = 0;
+         for (unsigned i = 0; i < MAX_FRAMES; i++)
+            count += queue_ctx->submissions_per_frame[i];
+         assert(count == queue_ctx->queries.size);
+
+         query_idx = ringbuffer_index(queue_ctx->queries, query);
+         num_timestamps =
+            MIN2(queue_ctx->submissions_per_frame[frame_idx], MAX_QUERIES - query_idx);
+
+         ringbuffer_unlock(queue_ctx->queries);
+      }
+   }
+
+   frame->min_delay++; /* wrap UINT64_MAX in case we didn't have any submissions. */
+
+   return true;
+}
+
+static bool
+calibrate_timestamps(device_context *ctx)
+{
+   uint64_t ts[2];
+   uint64_t deviation;
+
+   VkCalibratedTimestampInfoKHR info[2] = {
+      {
+         .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
+         .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
+      },
+      {
+         .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
+         .timeDomain = VK_TIME_DOMAIN_DEVICE_KHR,
+      },
+   };
+
+   VkResult result = ctx->vtable.GetCalibratedTimestampsKHR(ctx->device, 2, info, ts, &deviation);
+   if (result == VK_SUCCESS) {
+      /* We take a moving average in order to avoid variance. */
+      int64_t new_delta = ts[0] - (int64_t)(ts[1] * ctx->calibration.timestamp_period);
+
+      if (ctx->calibration.delta == 0) {
+         ctx->calibration.delta = new_delta;
+      } else {
+         int64_t diff = new_delta - ctx->calibration.delta;
+         ctx->calibration.delta += diff / 8;
+      }
+
+      /* Take a new calibrated timestamp every second. */
+      ctx->calibration.recalibrate_when = ts[0] + 1000000000ull;
+   }
+
+   return result == VK_SUCCESS;
+}
+
+static void
+begin_next_frame(device_context *ctx)
+{
+   frame *next_frame;
+   if (ctx->active_frame) {
+      assert(ctx->active_frame->state == FRAME_SUBMIT);
+      ctx->active_frame->state = FRAME_PRESENT;
+      next_frame = ringbuffer_next(ctx->frames, ctx->active_frame);
+   } else {
+      next_frame = ringbuffer_last(ctx->frames);
+   }
+
+   /* If there is a frame ready, it becomes active. */
+   if (next_frame->state == FRAME_INPUT) {
+      next_frame->state = FRAME_SUBMIT;
+      ctx->active_frame = next_frame;
+   } else {
+      ctx->active_frame = NULL;
+   }
+}
+
+static void
+anti_lag_disable(device_context *ctx)
+{
+   ringbuffer_lock(ctx->frames);
+   while (ctx->frames.size) {
+      /* Set force-wait=true, so that all pending timestamp queries get completed. */
+      begin_next_frame(ctx);
+      frame *frame = ringbuffer_first(ctx->frames);
+      evaluate_frame(ctx, frame, true);
+      frame->state = FRAME_INVALID;
+      ringbuffer_free(ctx->frames, frame);
+   }
+   assert(!ctx->active_frame);
+   ringbuffer_unlock(ctx->frames);
+}
+
+#define TARGET_DELAY 4000000ll /* 4 ms */
+/**
+ * Returns the amount of time that we want the next frame to be delayed.
+ *
+ * The algorithm used by this function is very simplistic and only aims
+ * to minimize the delay between calls to vkQueueSubmit or vkQueueSubmit2
+ * and the begin of the execution of the submission.
+ */
+static int64_t
+get_wait_time(device_context *ctx)
+{
+   /* Take the previous evaluated frame's delay as baseline. */
+   int64_t imposed_delay = ctx->base_delay;
+   int64_t adaptation = 0;
+
+   ringbuffer_lock(ctx->frames);
+   /* In case our ringbuffer is completely full and no frame is in PRESENT stage,
+    * just move the oldest frame to PRESENT stage, and force-wait.
+    */
+   bool force_wait = ctx->frames.size == MAX_FRAMES;
+   frame *next_frame = ringbuffer_first(ctx->frames);
+   if (force_wait && next_frame->state != FRAME_PRESENT)
+      begin_next_frame(ctx);
+
+   /* Also force-wait for the oldest frame if there is already 2 frames in PRESENT stage. */
+   force_wait |= ringbuffer_next(ctx->frames, next_frame)->state == FRAME_PRESENT;
+   ringbuffer_unlock(ctx->frames);
+
+   /* Take new evaluated frames into consideration. */
+   while (evaluate_frame(ctx, next_frame, force_wait)) {
+
+      if (next_frame->min_delay < TARGET_DELAY / 2 && ctx->adaptation <= 0) {
+         /* If there is no delay between submission and GPU start, halve the base delay and
+          * set the delay for this frame to zero, in order to account for sudden changes.
+          */
+         ctx->base_delay = ctx->base_delay / 2;
+         adaptation = -ctx->base_delay;
+      } else {
+         /* We use some kind of exponential weighted moving average function here,
+          * in order to determine a base-delay. We use a smoothing-factor of roughly
+          * 3%, but don't discount the previous value. This helps keeping the delay
+          * slightly below the target of 5 ms, most of the time.
+          */
+         int64_t diff = (int64_t)next_frame->min_delay - TARGET_DELAY;
+         ctx->base_delay = MAX2(0, ctx->base_delay + diff / 32); /* corresponds to ~3 % */
+
+         /* As the base-delay gets adjusted rather slowly, we additionally use the half of the
+          * diff as adaptation delay to account for sudden changes. A quarter of the adaptation
+          * is then subtracted for the next frame, so that we can avoid overcompensation.
+          */
+         adaptation = diff / 2 - ctx->adaptation / 4;
+      }
+
+      /* We only need space for one frame. */
+      force_wait = false;
+
+      ringbuffer_lock(ctx->frames);
+      next_frame->state = FRAME_INVALID;
+      ringbuffer_free(ctx->frames, next_frame);
+      next_frame = ringbuffer_first(ctx->frames);
+      ringbuffer_unlock(ctx->frames);
+   }
+   imposed_delay = ctx->base_delay + adaptation;
+   ctx->adaptation = adaptation;
+
+   if (imposed_delay > 100000000) {
+      /* This corresponds to <10 FPS. Something might have gone wrong. */
+      calibrate_timestamps(ctx);
+      ctx->base_delay = ctx->adaptation = imposed_delay = 0;
+   }
+
+   return MAX2(0, imposed_delay);
+}
+
+static void
+reset_frame(frame *frame)
+{
+   assert(frame->state == FRAME_INVALID);
+   frame->frame_idx = 0;
+   frame->frame_start_time = 0;
+   frame->min_delay = UINT64_MAX;
+   frame->state = FRAME_INPUT;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+anti_lag_AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD *pData)
+{
+   if (pData == NULL)
+      return;
+
+   device_context *ctx = get_device_context(device);
+   if (pData->mode == VK_ANTI_LAG_MODE_OFF_AMD) {
+      /* Application request to disable Anti-Lag. */
+      simple_mtx_lock(&ctx->mtx);
+      anti_lag_disable(ctx);
+      simple_mtx_unlock(&ctx->mtx);
+      return;
+   }
+
+   uint64_t frame_idx = 0;
+   int64_t now = os_time_get_nano();
+   int64_t imposed_delay = 0;
+   int64_t last_frame_begin = 0;
+
+   if (pData->pPresentationInfo) {
+      /* The same frameIndex value should be used with VK_ANTI_LAG_STAGE_INPUT_AMD before
+       * the frame begins and with VK_ANTI_LAG_STAGE_PRESENT_AMD when the frame ends.
+       */
+      frame_idx = pData->pPresentationInfo->frameIndex;
+
+      /* This marks the end of the current frame. */
+      if (pData->pPresentationInfo->stage == VK_ANTI_LAG_STAGE_PRESENT_AMD) {
+         /* If there is already a new frame pending, any submission that happens afterwards
+          * gets associated with the new frame.
+          */
+         ringbuffer_lock(ctx->frames);
+         /* Check that the currently active frame is indeed the frame we are ending now. */
+         while (ctx->active_frame && ctx->active_frame->frame_idx <= frame_idx) {
+            begin_next_frame(ctx);
+         }
+         ringbuffer_unlock(ctx->frames);
+         return;
+      }
+   }
+
+   /* Lock this function, in order to avoid race conditions on frame allocation. */
+   simple_mtx_lock(&ctx->mtx);
+
+   /* VK_ANTI_LAG_STAGE_INPUT_AMD: This marks the begin of a new frame.
+    * Evaluate previous frames in order to determine the wait time.
+    */
+   imposed_delay = get_wait_time(ctx);
+   int64_t next_deadline = now + imposed_delay;
+
+   /* Ensure maxFPS adherence. */
+   if (pData->maxFPS) {
+      int64_t frametime_period = 1000000000u / pData->maxFPS;
+      last_frame_begin = ringbuffer_last(ctx->frames)->frame_start_time;
+      next_deadline = MAX2(next_deadline, last_frame_begin + frametime_period);
+   }
+
+   /* Recalibrate every now and then. */
+   if (next_deadline > ctx->calibration.recalibrate_when)
+      calibrate_timestamps(ctx);
+
+   /* Sleep until deadline is met. */
+   os_time_nanosleep_until(next_deadline);
+
+   /* Initialize new frame. */
+   ringbuffer_lock(ctx->frames);
+   frame *new_frame = ringbuffer_alloc(ctx->frames);
+   reset_frame(new_frame);
+   new_frame->frame_start_time = next_deadline;
+   new_frame->imposed_delay = imposed_delay;
+   new_frame->frame_idx = frame_idx;
+
+   /* Immediately set the frame active if there is no other frame already active. */
+   if (!ctx->active_frame)
+      begin_next_frame(ctx);
+
+   ringbuffer_unlock(ctx->frames);
+   simple_mtx_unlock(&ctx->mtx);
+}
+
+static queue_context *
+get_queue_context(device_context *ctx, VkQueue queue)
+{
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      if (ctx->queues[i].queue == queue)
+         return &ctx->queues[i];
+   }
+
+   return NULL;
+}
+
+static struct query *
+allocate_query(device_context *ctx, queue_context *queue_ctx)
+{
+   if (!ctx->active_frame)
+      return NULL;
+
+   /* Allow for a single frame to use at most half of the query pool. */
+   uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame);
+   if (queue_ctx->submissions_per_frame[frame_idx] > MAX_QUERIES / 2)
+      return NULL;
+
+   /* Check that the next query index has been reset properly:
+    *
+    * We use some double-buffering here in order to reduce the number of
+    * VkResetQueryPool commands.
+    * Return false if the next query-index allocation crosses into the half
+    * which still contains active queries,
+    */
+   if (queue_ctx->queries.size > MAX_QUERIES / 2) {
+      struct query *last_query = ringbuffer_last(queue_ctx->queries);
+      uint32_t next_idx = ringbuffer_index(queue_ctx->queries, last_query) + 1;
+      if (next_idx == MAX_QUERIES || next_idx == MAX_QUERIES / 2)
+         return NULL;
+   }
+
+   return ringbuffer_alloc(queue_ctx->queries);
+}
+
+static bool
+get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer *cmdbuffer)
+{
+   uint64_t now = os_time_get_nano();
+
+   /* Begin critical section. */
+   ringbuffer_lock(ctx->frames);
+   ringbuffer_lock(queue_ctx->queries);
+   struct query *query = allocate_query(ctx, queue_ctx);
+   if (query == NULL) {
+      ringbuffer_unlock(queue_ctx->queries);
+      ringbuffer_unlock(ctx->frames);
+      return false;
+   }
+
+   query->submit_cpu_ts = now;
+
+   /* Assign commandBuffer for timestamp. */
+   *cmdbuffer = query->cmdbuffer;
+
+   /* Increment timeline semaphore count. */
+   queue_ctx->semaphore_value++;
+
+   /* Add new submission entry for the current frame */
+   assert(ctx->active_frame->state == FRAME_SUBMIT);
+   uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame);
+   queue_ctx->submissions_per_frame[frame_idx]++;
+
+   ringbuffer_unlock(queue_ctx->queries);
+   ringbuffer_unlock(ctx->frames);
+   return true;
+}
+
+static VkResult
+queue_submit2(device_context *ctx, VkQueue queue, uint32_t submitCount,
+              const VkSubmitInfo2 *pSubmits, VkFence fence, PFN_vkQueueSubmit2 queueSubmit2)
+{
+   queue_context *queue_ctx = get_queue_context(ctx, queue);
+   if (!ctx->active_frame || !queue_ctx)
+      return queueSubmit2(queue, submitCount, pSubmits, fence);
+
+   int first = -1;
+   VkCommandBuffer timestamp_cmdbuffer;
+   /* Check if any submission contains commandbuffers. */
+   for (unsigned i = 0; i < submitCount; i++) {
+      if (pSubmits[i].commandBufferInfoCount) {
+         first = i;
+         break;
+      }
+   }
+
+   /* Get timestamp commandbuffer. */
+   if (first == -1 || !get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer))
+      return queueSubmit2(queue, submitCount, pSubmits, fence);
+
+   VkSubmitInfo2 *submits;
+   VkCommandBufferSubmitInfo *cmdbuffers;
+   VkSemaphoreSubmitInfo *semaphores;
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &submits, VkSubmitInfo2, submitCount);
+   vk_multialloc_add(&ma, &cmdbuffers, VkCommandBufferSubmitInfo,
+                     pSubmits[first].commandBufferInfoCount + 1);
+   vk_multialloc_add(&ma, &semaphores, VkSemaphoreSubmitInfo,
+                     pSubmits[first].signalSemaphoreInfoCount + 1);
+   void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!buf)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   memcpy(submits, pSubmits, sizeof(VkSubmitInfo2) * submitCount);
+   VkSubmitInfo2 *submit_info = &submits[first];
+
+   /* Add commandbuffer to submission. */
+   cmdbuffers[0] = (VkCommandBufferSubmitInfo){
+      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+      .commandBuffer = timestamp_cmdbuffer,
+   };
+   memcpy(&cmdbuffers[1], submit_info->pCommandBufferInfos,
+          sizeof(VkCommandBufferSubmitInfo) * submit_info->commandBufferInfoCount);
+   submit_info->pCommandBufferInfos = cmdbuffers;
+   submit_info->commandBufferInfoCount++;
+
+   /* Add timeline semaphore to submission. */
+   memcpy(semaphores, submit_info->pSignalSemaphoreInfos,
+          sizeof(VkSemaphoreSubmitInfo) * submit_info->signalSemaphoreInfoCount);
+   semaphores[submit_info->signalSemaphoreInfoCount] = (VkSemaphoreSubmitInfo){
+      .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+      .semaphore = queue_ctx->semaphore,
+      .value = queue_ctx->semaphore_value,
+      .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+   };
+   submit_info->pSignalSemaphoreInfos = semaphores;
+   submit_info->signalSemaphoreInfoCount++;
+
+   /* Submit with added timestamp query commandbuffer. */
+   VkResult res = queueSubmit2(queue, submitCount, submits, fence);
+   vk_free(&ctx->alloc, submits);
+   return res;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits,
+                         VkFence fence)
+{
+   device_context *ctx = get_device_context(queue);
+   return queue_submit2(ctx, queue, submitCount, pSubmits, fence, ctx->vtable.QueueSubmit2KHR);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits,
+                      VkFence fence)
+{
+   device_context *ctx = get_device_context(queue);
+   return queue_submit2(ctx, queue, submitCount, pSubmits, fence, ctx->vtable.QueueSubmit2);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits,
+                     VkFence fence)
+{
+   device_context *ctx = get_device_context(queue);
+   queue_context *queue_ctx = get_queue_context(ctx, queue);
+   if (!ctx->active_frame || !queue_ctx)
+      return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
+
+   int first = -1;
+   VkCommandBuffer timestamp_cmdbuffer;
+   /* Check if any submission contains commandbuffers. */
+   for (unsigned i = 0; i < submitCount; i++) {
+      if (pSubmits[i].commandBufferCount) {
+         first = i;
+         break;
+      }
+   }
+
+   /* Get timestamp commandbuffer. */
+   if (first == -1 || !get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer))
+      return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
+
+   VkSubmitInfo *submits;
+   VkCommandBuffer *cmdbuffers;
+   VkSemaphore *semaphores;
+   VkTimelineSemaphoreSubmitInfo *semaphore_info;
+   uint64_t *semaphore_values;
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &submits, VkSubmitInfo, submitCount);
+   vk_multialloc_add(&ma, &cmdbuffers, VkCommandBuffer, pSubmits[first].commandBufferCount + 1);
+   vk_multialloc_add(&ma, &semaphores, VkSemaphore, pSubmits[first].signalSemaphoreCount + 1);
+   vk_multialloc_add(&ma, &semaphore_info, VkTimelineSemaphoreSubmitInfo, 1);
+   vk_multialloc_add(&ma, &semaphore_values, uint64_t, pSubmits[first].signalSemaphoreCount + 1);
+   void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!buf)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   memcpy(submits, pSubmits, sizeof(VkSubmitInfo) * submitCount);
+   VkSubmitInfo *submit_info = &submits[first];
+
+   /* Add commandbuffer to submission. */
+   cmdbuffers[0] = timestamp_cmdbuffer;
+   memcpy(&cmdbuffers[1], submit_info->pCommandBuffers,
+          sizeof(VkCommandBuffer) * submit_info->commandBufferCount);
+   submit_info->pCommandBuffers = cmdbuffers;
+   submit_info->commandBufferCount++;
+
+   /* Add timeline semaphore to submission. */
+   const VkTimelineSemaphoreSubmitInfo *tlssi =
+      vk_find_struct_const(pSubmits[first].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+   semaphores[0] = queue_ctx->semaphore;
+   memcpy(&semaphores[1], submit_info->pSignalSemaphores,
+          sizeof(VkSemaphore) * submit_info->signalSemaphoreCount);
+   submit_info->pSignalSemaphores = semaphores;
+   submit_info->signalSemaphoreCount++;
+   semaphore_values[0] = queue_ctx->semaphore_value;
+   if (tlssi) {
+      *semaphore_info = *tlssi; /* save original values */
+      memcpy(&semaphore_values[1], tlssi->pSignalSemaphoreValues,
+             sizeof(uint64_t) * tlssi->signalSemaphoreValueCount);
+      ((VkTimelineSemaphoreSubmitInfo *)tlssi)->pSignalSemaphoreValues = semaphore_values;
+      ((VkTimelineSemaphoreSubmitInfo *)tlssi)->signalSemaphoreValueCount =
+         submit_info->signalSemaphoreCount;
+   } else {
+      *semaphore_info = (VkTimelineSemaphoreSubmitInfo){
+         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+         .pNext = submit_info->pNext,
+         .signalSemaphoreValueCount = submit_info->signalSemaphoreCount,
+         .pSignalSemaphoreValues = semaphore_values,
+      };
+      submit_info->pNext = semaphore_info;
+   }
+
+   /* Submit with added timestamp query commandbuffer. */
+   VkResult res = ctx->vtable.QueueSubmit(queue, submitCount, submits, fence);
+   if (tlssi)
+      *(VkTimelineSemaphoreSubmitInfo *)tlssi = *semaphore_info; /* restore */
+   vk_free(&ctx->alloc, buf);
+   return res;
+}
diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.h b/src/vulkan/anti-lag-layer/anti_lag_layer.h
new file mode 100644
index 00000000000..31abb0f9aee
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef ANTI_LAG_LAYER_H
+#define ANTI_LAG_LAYER_H
+
+#include "util/simple_mtx.h"
+#include "vulkan/vk_layer.h"
+#include "vulkan/vulkan_core.h"
+#include "ringbuffer.h"
+
+#define MAX_FRAMES  8
+#define MAX_QUERIES 256
+
+enum frame_state {
+   FRAME_INVALID = 0,
+   FRAME_INPUT,   /* Frame is in input stage. */
+   FRAME_SUBMIT,  /* All current queueSubmit calls are associated with this frame. */
+   FRAME_PRESENT, /* Frame is in present stage and latencies can be evaluated. */
+};
+
+typedef struct frame {
+   uint64_t frame_idx;
+   uint64_t frame_start_time;
+   uint64_t min_delay;
+   uint64_t imposed_delay;
+   enum frame_state state;
+} frame;
+
+struct query {
+   uint64_t begin_gpu_ts;
+   uint64_t submit_cpu_ts;
+   VkCommandBuffer cmdbuffer;
+};
+
+typedef struct queue_context {
+   VkQueue queue;
+   uint32_t queue_family_idx;
+   VkCommandPool cmdPool;
+   VkQueryPool queryPool;
+   VkSemaphore semaphore;
+   uint64_t semaphore_value;
+   uint8_t submissions_per_frame[MAX_FRAMES];
+   RINGBUFFER_DECLARE(queries, struct query, MAX_QUERIES);
+} queue_context;
+
+typedef struct device_context {
+
+   struct DeviceDispatchTable {
+#define DECLARE_HOOK(fn) PFN_vk##fn fn
+      DECLARE_HOOK(GetDeviceProcAddr);
+      DECLARE_HOOK(SetDeviceLoaderData);
+      DECLARE_HOOK(DestroyDevice);
+      DECLARE_HOOK(QueueSubmit);
+      DECLARE_HOOK(QueueSubmit2);
+      DECLARE_HOOK(QueueSubmit2KHR);
+      DECLARE_HOOK(GetDeviceQueue);
+      DECLARE_HOOK(CreateCommandPool);
+      DECLARE_HOOK(DestroyCommandPool);
+      DECLARE_HOOK(CreateQueryPool);
+      DECLARE_HOOK(ResetQueryPool);
+      DECLARE_HOOK(DestroyQueryPool);
+      DECLARE_HOOK(GetQueryPoolResults);
+      DECLARE_HOOK(AllocateCommandBuffers);
+      DECLARE_HOOK(FreeCommandBuffers);
+      DECLARE_HOOK(BeginCommandBuffer);
+      DECLARE_HOOK(EndCommandBuffer);
+      DECLARE_HOOK(GetCalibratedTimestampsKHR);
+      DECLARE_HOOK(CmdWriteTimestamp);
+      DECLARE_HOOK(CreateSemaphore);
+      DECLARE_HOOK(DestroySemaphore);
+      DECLARE_HOOK(GetSemaphoreCounterValue);
+      DECLARE_HOOK(WaitSemaphores);
+#undef DECLARE_HOOK
+   } vtable;
+
+   VkDevice device;
+   VkAllocationCallbacks alloc;
+   simple_mtx_t mtx;
+
+   struct {
+      int64_t delta;
+      uint64_t recalibrate_when;
+      float timestamp_period;
+   } calibration;
+
+   RINGBUFFER_DECLARE(frames, frame, MAX_FRAMES);
+   frame *active_frame;
+   int64_t base_delay;
+   int64_t adaptation;
+
+   unsigned num_queues;
+   queue_context queues[];
+} device_context;
+
+device_context *get_device_context(const void *object);
+
+void anti_lag_AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD *pData);
+VkResult anti_lag_QueueSubmit2KHR(VkQueue queue, uint32_t submitCount,
+                                  const VkSubmitInfo2 *pSubmits, VkFence fence);
+VkResult anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits,
+                               VkFence fence);
+VkResult anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits,
+                              VkFence fence);
+
+VkResult anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct);
+
+#endif /* ANTI_LAG_LAYER_H */
diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
new file mode 100644
index 00000000000..d2ca4a7dd44
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
@@ -0,0 +1,899 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "util/simple_mtx.h"
+#include "vulkan/vk_layer.h"
+#include "vulkan/vulkan_core.h"
+#include "anti_lag_layer.h"
+#include "vk_alloc.h"
+#include "vk_util.h"
+
+static uintptr_t
+object_to_key(const void *object)
+{
+   return (uintptr_t)*(uintptr_t *)object;
+}
+
+typedef struct instance_data {
+   struct InstanceDispatchTable {
+#define DECLARE_HOOK(fn) PFN_vk##fn fn
+      DECLARE_HOOK(GetInstanceProcAddr);
+      DECLARE_HOOK(CreateInstance);
+      DECLARE_HOOK(DestroyInstance);
+      DECLARE_HOOK(CreateDevice);
+      DECLARE_HOOK(EnumerateDeviceExtensionProperties);
+      DECLARE_HOOK(GetPhysicalDeviceFeatures2KHR);
+      DECLARE_HOOK(GetPhysicalDeviceFeatures2);
+      DECLARE_HOOK(GetPhysicalDeviceProperties);
+      DECLARE_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
+      DECLARE_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
+      DECLARE_HOOK(GetPhysicalDeviceQueueFamilyProperties);
+#undef DECLARE_HOOK
+   } vtable;
+
+   VkInstance instance;
+   uint32_t apiVersion;
+   VkAllocationCallbacks alloc;
+   struct instance_data *next;
+} instance_data;
+
+static void
+init_instance_vtable(instance_data *ctx, PFN_vkGetInstanceProcAddr gpa)
+{
+   ctx->vtable.GetInstanceProcAddr = gpa;
+#define INIT_HOOK(fn) ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->instance, "vk" #fn)
+   INIT_HOOK(CreateInstance);
+   INIT_HOOK(DestroyInstance);
+   INIT_HOOK(CreateDevice);
+   INIT_HOOK(EnumerateDeviceExtensionProperties);
+   INIT_HOOK(GetPhysicalDeviceFeatures2KHR);
+   INIT_HOOK(GetPhysicalDeviceFeatures2);
+   INIT_HOOK(GetPhysicalDeviceProperties);
+   INIT_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
+   INIT_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
+   INIT_HOOK(GetPhysicalDeviceQueueFamilyProperties);
+#undef INIT_HOOK
+}
+
+static simple_mtx_t instance_mtx = SIMPLE_MTX_INITIALIZER;
+static instance_data *instance_list = NULL;
+
+static void
+add_instance(instance_data *instance)
+{
+   simple_mtx_lock(&instance_mtx);
+   instance_data **ptr = &instance_list;
+   while (*ptr != NULL)
+      ptr = &(*ptr)->next;
+   *ptr = instance;
+   simple_mtx_unlock(&instance_mtx);
+}
+
+static instance_data *
+remove_instance(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&instance_mtx);
+   instance_data **ptr = &instance_list;
+   while (*ptr && key != object_to_key((*ptr)->instance))
+      ptr = &(*ptr)->next;
+
+   instance_data *ctx = *ptr;
+   *ptr = ctx ? ctx->next : NULL;
+   simple_mtx_unlock(&instance_mtx);
+   return ctx;
+}
+
+static instance_data *
+get_instance_data(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&instance_mtx);
+   instance_data *ctx = instance_list;
+   while (ctx && key != object_to_key(ctx->instance))
+      ctx = ctx->next;
+   simple_mtx_unlock(&instance_mtx);
+   return ctx;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+                        const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
+{
+   VkLayerInstanceCreateInfo *chain_info = (VkLayerInstanceCreateInfo *)(pCreateInfo->pNext);
+   while (chain_info && !(chain_info->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO &&
+                          chain_info->function == VK_LAYER_LINK_INFO)) {
+      chain_info = (VkLayerInstanceCreateInfo *)(chain_info->pNext);
+   }
+
+   assert(chain_info && chain_info->u.pLayerInfo);
+   PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr =
+      chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+   PFN_vkCreateInstance fpCreateInstance =
+      (PFN_vkCreateInstance)fpGetInstanceProcAddr(NULL, "vkCreateInstance");
+   if (fpCreateInstance == NULL)
+      return VK_ERROR_INITIALIZATION_FAILED;
+
+   /* Advance the link info for the next element on the chain. */
+   chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
+
+   /* Create Instance. */
+   VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Create Instance context. */
+   const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : vk_default_allocator();
+   void *buf = vk_alloc(alloc, sizeof(instance_data), alignof(instance_data),
+                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!buf) {
+      PFN_vkDestroyInstance fpDestroyInstance =
+         (PFN_vkDestroyInstance)fpGetInstanceProcAddr(*pInstance, "vkDestroyInstance");
+      fpDestroyInstance(*pInstance, alloc);
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   }
+   instance_data *ctx = (instance_data *)buf;
+   ctx->apiVersion = pCreateInfo->pApplicationInfo && pCreateInfo->pApplicationInfo->apiVersion
+                        ? pCreateInfo->pApplicationInfo->apiVersion
+                        : VK_API_VERSION_1_0;
+   ctx->instance = *pInstance;
+   ctx->alloc = *alloc;
+   ctx->next = NULL;
+   init_instance_vtable(ctx, fpGetInstanceProcAddr);
+   add_instance(ctx);
+
+   return VK_SUCCESS;
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_DestroyInstance(VkInstance instance, const VkAllocationCallbacks *pAllocator)
+{
+   instance_data *ctx = remove_instance(instance);
+   if (ctx) {
+      ctx->vtable.DestroyInstance(instance, pAllocator);
+      vk_free(&ctx->alloc, ctx);
+   }
+}
+
+typedef struct device_data {
+   VkDevice device;
+   PFN_vkGetDeviceProcAddr GetDeviceProcAddr;
+   device_context *ctx; /* NULL if anti-lag ext is not enabled. */
+   struct device_data *next;
+} device_data;
+
+static void
+init_device_vtable(device_context *ctx, PFN_vkGetDeviceProcAddr gpa, PFN_vkSetDeviceLoaderData sld,
+                   bool calibrated_timestamps_khr, bool host_query_reset_ext,
+                   bool timeline_semaphore_khr)
+{
+   ctx->vtable.GetDeviceProcAddr = gpa;
+   ctx->vtable.SetDeviceLoaderData = sld;
+#define INIT_HOOK(fn) ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->device, "vk" #fn)
+#define INIT_HOOK_ALIAS(fn, alias, cond)                                                           \
+   ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->device, cond ? "vk" #alias : "vk" #fn)
+   INIT_HOOK(DestroyDevice);
+   INIT_HOOK(QueueSubmit);
+   INIT_HOOK(QueueSubmit2);
+   INIT_HOOK(QueueSubmit2KHR);
+   INIT_HOOK(GetDeviceQueue);
+   INIT_HOOK(CreateCommandPool);
+   INIT_HOOK(DestroyCommandPool);
+   INIT_HOOK(CreateQueryPool);
+   INIT_HOOK_ALIAS(ResetQueryPool, ResetQueryPoolEXT, host_query_reset_ext);
+   INIT_HOOK(DestroyQueryPool);
+   INIT_HOOK(GetQueryPoolResults);
+   INIT_HOOK(AllocateCommandBuffers);
+   INIT_HOOK(FreeCommandBuffers);
+   INIT_HOOK(BeginCommandBuffer);
+   INIT_HOOK(EndCommandBuffer);
+   INIT_HOOK_ALIAS(GetCalibratedTimestampsKHR, GetCalibratedTimestampsEXT, !calibrated_timestamps_khr);
+   INIT_HOOK(CmdWriteTimestamp);
+   INIT_HOOK(CreateSemaphore);
+   INIT_HOOK(DestroySemaphore);
+   INIT_HOOK_ALIAS(GetSemaphoreCounterValue, GetSemaphoreCounterValueKHR, timeline_semaphore_khr);
+   INIT_HOOK_ALIAS(WaitSemaphores, WaitSemaphoresKHR, timeline_semaphore_khr);
+#undef INIT_HOOK
+#undef INIT_HOOK_ALIAS
+}
+
+static simple_mtx_t device_mtx = SIMPLE_MTX_INITIALIZER;
+static device_data *device_list = NULL;
+
+static void
+add_device(device_data *device)
+{
+   simple_mtx_lock(&device_mtx);
+   device_data **ptr = &device_list;
+   while (*ptr != NULL)
+      ptr = &(*ptr)->next;
+   *ptr = device;
+   simple_mtx_unlock(&device_mtx);
+}
+
+static device_data *
+remove_device(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&device_mtx);
+   device_data **ptr = &device_list;
+   while (*ptr && key != object_to_key((*ptr)->device))
+      ptr = &(*ptr)->next;
+
+   device_data *ctx = *ptr;
+   *ptr = ctx ? ctx->next : NULL;
+   simple_mtx_unlock(&device_mtx);
+   return ctx;
+}
+
+static device_data *
+get_device_data(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&device_mtx);
+   device_data *ctx = device_list;
+   while (ctx && key != object_to_key(ctx->device))
+      ctx = ctx->next;
+   simple_mtx_unlock(&device_mtx);
+   return ctx;
+}
+
+device_context *
+get_device_context(const void *object)
+{
+   device_data *data = get_device_data(object);
+   assert(data && data->ctx);
+   return data->ctx;
+}
+
+static VkLayerDeviceCreateInfo *
+get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo, VkLayerFunction func)
+{
+   vk_foreach_struct_const (item, pCreateInfo->pNext) {
+      if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO &&
+          ((VkLayerDeviceCreateInfo *)item)->function == func)
+         return (VkLayerDeviceCreateInfo *)item;
+   }
+   return NULL;
+}
+
+static bool
+should_enable_layer(instance_data *ctx, VkPhysicalDevice physicalDevice,
+                    VkPhysicalDeviceAntiLagFeaturesAMD ext_feature)
+{
+   /* The extension is not requested by the application. */
+   if (!ext_feature.antiLag)
+      return false;
+
+   /* Ensure that the underlying implementation does not expose VK_AMD_anti_lag itself. */
+   ext_feature.antiLag = false;
+   VkPhysicalDeviceFeatures2 features = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+      .pNext = &ext_feature,
+   };
+
+   if (ctx->vtable.GetPhysicalDeviceFeatures2KHR) {
+      ctx->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, &features);
+      return !ext_feature.antiLag;
+   }
+
+   if (ctx->vtable.GetPhysicalDeviceFeatures2) {
+      ctx->vtable.GetPhysicalDeviceFeatures2(physicalDevice, &features);
+      return !ext_feature.antiLag;
+   }
+
+   return false;
+}
+
+static bool
+check_calibrated_timestamps(instance_data *data, VkPhysicalDevice physicalDevice, bool *has_khr)
+{
+   VkResult res;
+   uint32_t count = 0;
+   res = data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, NULL, &count, NULL);
+   VkExtensionProperties *extensions =
+      vk_alloc(&data->alloc, count * sizeof(VkExtensionProperties), alignof(VkExtensionProperties),
+               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!extensions)
+      return false;
+
+   res |= data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, NULL, &count, extensions);
+
+   *has_khr = false;
+   bool has_ext = false;
+   if (res == VK_SUCCESS) {
+      for (unsigned i = 0; i < count; i++) {
+         if (strcmp(extensions[i].extensionName, VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            *has_khr = true;
+         if (strcmp(extensions[i].extensionName, VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            has_ext = true;
+      }
+   }
+
+   vk_free(&data->alloc, extensions);
+   return *has_khr || has_ext;
+}
+
+/* Initialize per-queue context:
+ *
+ * This includes creating one CommandPool and one QueryPool per Queue as well as
+ * recording one CommandBuffer per timestamp query.
+ */
+static VkResult
+init_queue_context(device_context *ctx, queue_context *queue_ctx)
+{
+#define CHECK_RESULT(res, label)                                                                   \
+   if (res != VK_SUCCESS) {                                                                        \
+      goto label;                                                                                  \
+   }
+
+   VkResult result;
+
+   /* Create command pool */
+   struct VkCommandPoolCreateInfo pool_info = {
+      .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+      .pNext = NULL,
+      .flags = 0,
+      .queueFamilyIndex = queue_ctx->queue_family_idx,
+   };
+   result =
+      ctx->vtable.CreateCommandPool(ctx->device, &pool_info, &ctx->alloc, &queue_ctx->cmdPool);
+   CHECK_RESULT(result, fail_cmdpool)
+
+   /* Create query pool */
+   VkQueryPoolCreateInfo query_pool_info = {
+      .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+      .queryType = VK_QUERY_TYPE_TIMESTAMP,
+      .queryCount = MAX_QUERIES,
+   };
+   result = ctx->vtable.CreateQueryPool(ctx->device, &query_pool_info, &ctx->alloc,
+                                        &queue_ctx->queryPool);
+   CHECK_RESULT(result, fail_querypool)
+   ctx->vtable.ResetQueryPool(ctx->device, queue_ctx->queryPool, 0, MAX_QUERIES);
+   ringbuffer_init(queue_ctx->queries);
+
+   /* Create timeline semaphore */
+   VkSemaphoreTypeCreateInfo timelineCreateInfo = {
+      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+      .pNext = NULL,
+      .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+      .initialValue = 0,
+   };
+   VkSemaphoreCreateInfo createInfo = {
+      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+      .pNext = &timelineCreateInfo,
+      .flags = 0,
+   };
+   result =
+      ctx->vtable.CreateSemaphore(ctx->device, &createInfo, &ctx->alloc, &queue_ctx->semaphore);
+   CHECK_RESULT(result, fail_semaphore);
+
+   for (unsigned j = 0; j < MAX_QUERIES; j++) {
+      struct query *query = &queue_ctx->queries.data[j];
+
+      /* Allocate commandBuffer for timestamp. */
+      VkCommandBufferAllocateInfo buffer_info = {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+         .commandPool = queue_ctx->cmdPool,
+         .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+         .commandBufferCount = 1,
+      };
+      result = ctx->vtable.AllocateCommandBuffers(ctx->device, &buffer_info, &query->cmdbuffer);
+      CHECK_RESULT(result, fail)
+      result = ctx->vtable.SetDeviceLoaderData(ctx->device, query->cmdbuffer);
+      CHECK_RESULT(result, fail)
+
+      /* Record commandbuffer. */
+      VkCommandBufferBeginInfo beginInfo = {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+      };
+
+      result = ctx->vtable.BeginCommandBuffer(query->cmdbuffer, &beginInfo);
+      CHECK_RESULT(result, fail)
+      ctx->vtable.CmdWriteTimestamp(query->cmdbuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                    queue_ctx->queryPool, j);
+      result = ctx->vtable.EndCommandBuffer(query->cmdbuffer);
+      CHECK_RESULT(result, fail)
+   }
+
+#undef CHECK_RESULT
+   return result;
+
+fail:
+   ctx->vtable.DestroySemaphore(ctx->device, queue_ctx->semaphore, &ctx->alloc);
+fail_semaphore:
+   ctx->vtable.DestroyQueryPool(ctx->device, queue_ctx->queryPool, &ctx->alloc);
+fail_querypool:
+   ctx->vtable.DestroyCommandPool(ctx->device, queue_ctx->cmdPool, &ctx->alloc);
+fail_cmdpool:
+   for (queue_context *qctx = ctx->queues; qctx != queue_ctx; qctx++) {
+      ctx->vtable.DestroyQueryPool(ctx->device, qctx->queryPool, &ctx->alloc);
+      ctx->vtable.DestroyCommandPool(ctx->device, qctx->cmdPool, &ctx->alloc);
+   }
+
+   return result;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
+                      const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
+{
+   instance_data *instance_ctx = get_instance_data(physicalDevice);
+   VkLayerDeviceCreateInfo *chain_info = get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
+   PFN_vkGetDeviceProcAddr fpGetDeviceProcAddr = chain_info->u.pLayerInfo->pfnNextGetDeviceProcAddr;
+   PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr =
+      chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+   PFN_vkCreateDevice fpCreateDevice =
+      (PFN_vkCreateDevice)fpGetInstanceProcAddr(instance_ctx->instance, "vkCreateDevice");
+   if (fpCreateDevice == NULL)
+      return VK_ERROR_INITIALIZATION_FAILED;
+
+   /* Advance the link info for the next element on the chain. */
+   chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
+
+   const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : &instance_ctx->alloc;
+   device_data *data;
+   VkResult result;
+
+   /*  Only allocate a context and add to dispatch if the extension is enabled. */
+   const VkPhysicalDeviceAntiLagFeaturesAMD *ext_features =
+      vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
+   bool enable = ext_features && should_enable_layer(instance_ctx, physicalDevice, *ext_features);
+   if (enable) {
+      /* Count queues with sufficient timestamp valid bits. */
+      // TODO: make it work with less than 64 valid bits
+      unsigned num_queue_families = 0;
+      unsigned num_queues = 0;
+      for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
+         num_queue_families =
+            MAX2(num_queue_families, pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex + 1);
+      VkQueueFamilyProperties *queue_family_props =
+         vk_alloc(alloc, num_queue_families * sizeof(VkQueueFamilyProperties),
+                  alignof(VkQueueFamilyProperties), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+      if (!queue_family_props)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      instance_ctx->vtable.GetPhysicalDeviceQueueFamilyProperties(
+         physicalDevice, &num_queue_families, queue_family_props);
+      for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+         uint32_t queue_family_idx = pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex;
+         if (queue_family_props[queue_family_idx].timestampValidBits == 64 &&
+             (queue_family_props[queue_family_idx].queueFlags &
+              (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) {
+            num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
+         }
+      }
+
+      /* Allocate the context. */
+      device_context *ctx;
+      queue_context *queues;
+      VK_MULTIALLOC(ma);
+      vk_multialloc_add(&ma, &data, device_data, 1);
+      vk_multialloc_add(&ma, &ctx, struct device_context, 1);
+      vk_multialloc_add(&ma, &queues, queue_context, num_queues);
+      void *buf = vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+      if (!buf) {
+         vk_free(alloc, queue_family_props);
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      }
+
+      VkPhysicalDeviceProperties properties;
+      instance_ctx->vtable.GetPhysicalDeviceProperties(physicalDevice, &properties);
+
+      /* Ensure that calibrated timestamps and host query reset extensions are enabled. */
+      bool has_calibrated_timestamps = false;
+      bool has_calibrated_timestamps_khr = false;
+      bool has_vk12 = instance_ctx->apiVersion >= VK_API_VERSION_1_2 &&
+                      properties.apiVersion >= VK_API_VERSION_1_2;
+      bool has_host_query_reset = has_vk12;
+      bool has_host_query_reset_ext = false;
+      bool has_timeline_semaphore = has_vk12;
+      bool has_timeline_semaphore_khr = false;
+      for (unsigned i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            has_calibrated_timestamps = has_calibrated_timestamps_khr = true;
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            has_calibrated_timestamps = true;
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME) == 0)
+            has_host_query_reset = has_host_query_reset_ext = true;
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME) == 0)
+            has_timeline_semaphore = has_timeline_semaphore_khr = true;
+      }
+
+      /* Add missing extensions. */
+      VkDeviceCreateInfo create_info = *pCreateInfo;
+      const char **ext_names = NULL;
+      uint32_t num_extra_extensions =
+         !has_calibrated_timestamps + !has_host_query_reset + !has_timeline_semaphore;
+      if (num_extra_extensions) {
+         ext_names = vk_alloc(
+            alloc, (pCreateInfo->enabledExtensionCount + num_extra_extensions) * sizeof(char *),
+            alignof(char *), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+         if (!ext_names) {
+            result = VK_ERROR_OUT_OF_HOST_MEMORY;
+            goto fail;
+         }
+
+         memcpy(ext_names, pCreateInfo->ppEnabledExtensionNames,
+                sizeof(char *) * pCreateInfo->enabledExtensionCount);
+
+         if (!has_timeline_semaphore) {
+            has_timeline_semaphore_khr = true;
+            ext_names[create_info.enabledExtensionCount++] =
+               VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME;
+         }
+         if (!has_host_query_reset) {
+            has_host_query_reset_ext = true;
+            ext_names[create_info.enabledExtensionCount++] = VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME;
+         }
+         if (!has_calibrated_timestamps) {
+            check_calibrated_timestamps(instance_ctx, physicalDevice,
+                                        &has_calibrated_timestamps_khr);
+            ext_names[create_info.enabledExtensionCount++] =
+               has_calibrated_timestamps_khr ? VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME
+                                             : VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME;
+         }
+         create_info.ppEnabledExtensionNames = ext_names;
+      }
+
+      /* Ensure that hostQueryReset feature is enabled. */
+      const VkPhysicalDeviceVulkan12Features *vk12 =
+         vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
+      const VkPhysicalDeviceHostQueryResetFeatures *query_reset =
+         vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES);
+      const VkPhysicalDeviceTimelineSemaphoreFeatures *timeline_semaphore =
+         vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
+      uint32_t prev_hostQueryReset;
+      uint32_t prev_timelineSemaphore;
+      if (vk12) {
+         prev_hostQueryReset = vk12->hostQueryReset;
+         prev_timelineSemaphore = vk12->timelineSemaphore;
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->hostQueryReset = VK_TRUE;
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->timelineSemaphore = VK_TRUE;
+      } else {
+         if (query_reset) {
+            prev_hostQueryReset = query_reset->hostQueryReset;
+            ((VkPhysicalDeviceHostQueryResetFeatures *)query_reset)->hostQueryReset = VK_TRUE;
+         } else {
+            VkPhysicalDeviceHostQueryResetFeatures *feat =
+               alloca(sizeof(VkPhysicalDeviceHostQueryResetFeatures));
+            *feat = (VkPhysicalDeviceHostQueryResetFeatures){
+               .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES,
+               .pNext = (void *)create_info.pNext,
+               .hostQueryReset = VK_TRUE,
+            };
+            create_info.pNext = feat;
+         }
+         if (timeline_semaphore) {
+            prev_timelineSemaphore = timeline_semaphore->timelineSemaphore;
+            ((VkPhysicalDeviceTimelineSemaphoreFeatures *)timeline_semaphore)->timelineSemaphore =
+               VK_TRUE;
+         } else {
+            VkPhysicalDeviceTimelineSemaphoreFeatures *feat =
+               alloca(sizeof(VkPhysicalDeviceTimelineSemaphoreFeatures));
+            *feat = (VkPhysicalDeviceTimelineSemaphoreFeatures){
+               .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
+               .pNext = (void *)create_info.pNext,
+               .timelineSemaphore = VK_TRUE,
+            };
+            create_info.pNext = feat;
+         }
+      }
+
+      /* Create Device. */
+      result = fpCreateDevice(physicalDevice, &create_info, pAllocator, pDevice);
+
+      if (vk12) {
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->hostQueryReset = prev_hostQueryReset;
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->timelineSemaphore = prev_timelineSemaphore;
+      } else {
+         if (query_reset)
+            ((VkPhysicalDeviceHostQueryResetFeatures *)query_reset)->hostQueryReset =
+               prev_hostQueryReset;
+         if (timeline_semaphore)
+            ((VkPhysicalDeviceTimelineSemaphoreFeatures *)timeline_semaphore)->timelineSemaphore =
+               prev_timelineSemaphore;
+      }
+      if (ext_names)
+         vk_free(alloc, ext_names);
+
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      /* Initialize Context. */
+      data->ctx = ctx;
+      ctx->device = *pDevice;
+      chain_info = get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK);
+      PFN_vkSetDeviceLoaderData fpSetDeviceLoaderData =
+         (PFN_vkSetDeviceLoaderData)chain_info->u.pfnSetDeviceLoaderData;
+      init_device_vtable(ctx, fpGetDeviceProcAddr, fpSetDeviceLoaderData,
+                         has_calibrated_timestamps_khr, has_host_query_reset_ext,
+                         has_timeline_semaphore_khr);
+      simple_mtx_init(&ctx->mtx, mtx_plain);
+      ctx->num_queues = num_queues;
+      ctx->alloc = *alloc;
+      ctx->calibration.timestamp_period = properties.limits.timestampPeriod;
+      ringbuffer_init(ctx->frames);
+
+      /* Initialize Queue contexts. */
+      unsigned idx = 0;
+      for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+         /* Skip queue families without sufficient timestamp valid bits.
+          * Also skip queue families which cannot do GRAPHICS or COMPUTE since they
+          * always heavily async in nature (DMA transfers and sparse for example).
+          * Video is also irrelvant here since it should never be a critical path
+          * in a game that wants anti-lag. */
+         uint32_t queue_family_idx = pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex;
+         if (queue_family_props[queue_family_idx].timestampValidBits != 64 ||
+             !(queue_family_props[queue_family_idx].queueFlags &
+               (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
+            continue;
+
+         for (unsigned j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) {
+            VkQueue queue;
+            ctx->vtable.GetDeviceQueue(*pDevice, queue_family_idx, j, &queue);
+            ctx->queues[idx].queue = queue;
+            ctx->queues[idx].queue_family_idx = queue_family_idx;
+            result = init_queue_context(ctx, &ctx->queues[idx]);
+            idx++;
+            if (result != VK_SUCCESS)
+               goto fail;
+         }
+      }
+      assert(idx == num_queues);
+   fail:
+      vk_free(alloc, queue_family_props);
+   } else {
+      data = (device_data *)vk_alloc(alloc, sizeof(device_data), alignof(device_data),
+                                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+      if (!data)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      result = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice);
+      data->ctx = NULL;
+   }
+
+   if (result == VK_SUCCESS) {
+      data->device = *pDevice;
+      data->GetDeviceProcAddr = fpGetDeviceProcAddr;
+      data->next = NULL;
+      add_device(data);
+   } else {
+      vk_free(alloc, data);
+   }
+
+   return result;
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_DestroyDevice(VkDevice pDevice, const VkAllocationCallbacks *pAllocator)
+{
+   device_data *data = remove_device(pDevice);
+   assert(data && data->ctx);
+   device_context *ctx = data->ctx;
+
+   /* Destroy per-queue context.
+    * The application must ensure that no work is active on the device.
+    */
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      queue_context *queue_ctx = &ctx->queues[i];
+      ctx->vtable.DestroyQueryPool(ctx->device, queue_ctx->queryPool, &ctx->alloc);
+      ctx->vtable.DestroyCommandPool(ctx->device, queue_ctx->cmdPool, &ctx->alloc);
+      ctx->vtable.DestroySemaphore(ctx->device, queue_ctx->semaphore, &ctx->alloc);
+   }
+
+   ctx->vtable.DestroyDevice(pDevice, pAllocator);
+   vk_free(&ctx->alloc, data);
+}
+
+static bool
+is_anti_lag_supported(VkPhysicalDevice physicalDevice)
+{
+   instance_data *data = get_instance_data(physicalDevice);
+   VkPhysicalDeviceProperties properties;
+   data->vtable.GetPhysicalDeviceProperties(physicalDevice, &properties);
+   if (properties.limits.timestampPeriod == 0.0 || !properties.limits.timestampComputeAndGraphics)
+      return false;
+
+   /* Check whether calibrated timestamps are supported. */
+   bool has_khr;
+   if (!check_calibrated_timestamps(data, physicalDevice, &has_khr))
+      return false;
+
+   /* Check whether timeline semaphores and host query reset are supported. */
+   VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
+      .timelineSemaphore = VK_FALSE,
+   };
+   VkPhysicalDeviceHostQueryResetFeatures query_reset = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES,
+      .pNext = &timeline_semaphore,
+      .hostQueryReset = VK_FALSE,
+   };
+   VkPhysicalDeviceFeatures2 features = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+      .pNext = &query_reset,
+   };
+   if (data->vtable.GetPhysicalDeviceFeatures2KHR)
+      data->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, &features);
+   else if (data->vtable.GetPhysicalDeviceFeatures2)
+      data->vtable.GetPhysicalDeviceFeatures2(physicalDevice, &features);
+   if (!timeline_semaphore.timelineSemaphore || !query_reset.hostQueryReset)
+      return false;
+
+   /* Check that DEVICE and CLOCK_MONOTONIC time domains are available. */
+   VkResult res;
+   uint32_t count = 0;
+   PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsKHR ctd =
+      has_khr ? data->vtable.GetPhysicalDeviceCalibrateableTimeDomainsKHR
+              : data->vtable.GetPhysicalDeviceCalibrateableTimeDomainsEXT;
+   res = ctd(physicalDevice, &count, NULL);
+   VkTimeDomainKHR *time_domains = alloca(count * sizeof(VkTimeDomainKHR));
+   res |= ctd(physicalDevice, &count, time_domains);
+   if (res != VK_SUCCESS)
+      return false;
+
+   bool has_device_domain = false;
+   bool has_host_domain = false;
+   for (unsigned i = 0; i < count; i++) {
+      has_device_domain |= time_domains[i] == VK_TIME_DOMAIN_DEVICE_KHR;
+      has_host_domain |= time_domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
+   }
+
+   return has_device_domain && has_host_domain;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, const char *pLayerName,
+                                            uint32_t *pPropertyCount,
+                                            VkExtensionProperties *pProperties)
+{
+   instance_data *instance_data = get_instance_data(physicalDevice);
+
+   if (pLayerName && strcmp(pLayerName, "VK_LAYER_MESA_anti_lag") == 0) {
+      if (!is_anti_lag_supported(physicalDevice)) {
+         *pPropertyCount = 0;
+         return VK_SUCCESS;
+      }
+
+      VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties, pPropertyCount);
+      vk_outarray_append_typed(VkExtensionProperties, &out, prop)
+      {
+         *prop =
+            (VkExtensionProperties){VK_AMD_ANTI_LAG_EXTENSION_NAME, VK_AMD_ANTI_LAG_SPEC_VERSION};
+      }
+      return vk_outarray_status(&out);
+   }
+
+   return instance_data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, pLayerName,
+                                                                   pPropertyCount, pProperties);
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
+                                    VkPhysicalDeviceFeatures2 *pFeatures)
+{
+   instance_data *ctx = get_instance_data(physicalDevice);
+   ctx->vtable.GetPhysicalDeviceFeatures2(physicalDevice, pFeatures);
+   VkPhysicalDeviceAntiLagFeaturesAMD *anti_lag_features =
+      vk_find_struct(pFeatures->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
+
+   if (anti_lag_features) {
+      anti_lag_features->antiLag |= is_anti_lag_supported(physicalDevice);
+   }
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_GetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice,
+                                       VkPhysicalDeviceFeatures2 *pFeatures)
+{
+   instance_data *ctx = get_instance_data(physicalDevice);
+   ctx->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, pFeatures);
+   VkPhysicalDeviceAntiLagFeaturesAMD *anti_lag_features =
+      vk_find_struct(pFeatures->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
+
+   if (anti_lag_features) {
+      anti_lag_features->antiLag |= is_anti_lag_supported(physicalDevice);
+   }
+}
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetInstanceProcAddr(VkInstance instance, const char *pName);
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetDeviceProcAddr(VkDevice device, const char *pName);
+
+#define ADD_HOOK(fn) {"vk" #fn, (PFN_vkVoidFunction)anti_lag_##fn}
+static const struct {
+   const char *name;
+   PFN_vkVoidFunction ptr;
+} instance_funcptr_map[] = {
+   ADD_HOOK(GetInstanceProcAddr),
+   ADD_HOOK(CreateInstance),
+   ADD_HOOK(DestroyInstance),
+   ADD_HOOK(EnumerateDeviceExtensionProperties),
+   ADD_HOOK(CreateDevice),
+   ADD_HOOK(GetPhysicalDeviceFeatures2),
+   ADD_HOOK(GetPhysicalDeviceFeatures2KHR),
+};
+
+static const struct {
+   const char *name;
+   PFN_vkVoidFunction ptr;
+} device_funcptr_map[] = {
+   ADD_HOOK(GetDeviceProcAddr),
+   ADD_HOOK(DestroyDevice),
+   ADD_HOOK(AntiLagUpdateAMD),
+   ADD_HOOK(QueueSubmit),
+   ADD_HOOK(QueueSubmit2),
+   ADD_HOOK(QueueSubmit2KHR),
+};
+#undef ADD_HOOK
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetInstanceProcAddr(VkInstance instance, const char *pName)
+{
+   if (!pName)
+      return NULL;
+
+   PFN_vkVoidFunction result = NULL;
+   if (instance) {
+      instance_data *ctx = get_instance_data(instance);
+      if (ctx)
+         result = ctx->vtable.GetInstanceProcAddr(instance, pName);
+   }
+
+   /* Only hook instance functions which are exposed by the underlying impl.
+    * Ignore instance parameter for vkCreateInstance and vkCreateDevice.
+    */
+   if (result || strcmp(pName, "vkCreateInstance") == 0 || strcmp(pName, "vkCreateDevice") == 0) {
+      for (uint32_t i = 0; i < ARRAY_SIZE(instance_funcptr_map); i++) {
+         if (strcmp(pName, instance_funcptr_map[i].name) == 0)
+            return instance_funcptr_map[i].ptr;
+      }
+   }
+
+   return result;
+}
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetDeviceProcAddr(VkDevice device, const char *pName)
+{
+   if (!pName || !device)
+      return NULL;
+
+   device_data *data = get_device_data(device);
+   PFN_vkVoidFunction result = data->GetDeviceProcAddr(device, pName);
+
+   /* Only hook device functions if the Layer extension is enabled. */
+   if (data->ctx && (result || strcmp(pName, "vkAntiLagUpdateAMD") == 0)) {
+      for (uint32_t i = 0; i < ARRAY_SIZE(device_funcptr_map); i++) {
+         if (strcmp(pName, device_funcptr_map[i].name) == 0)
+            return device_funcptr_map[i].ptr;
+      }
+   }
+
+   return result;
+}
+
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct)
+{
+   assert(pVersionStruct != NULL);
+   assert(pVersionStruct->sType == LAYER_NEGOTIATE_INTERFACE_STRUCT);
+
+   if (pVersionStruct->loaderLayerInterfaceVersion >= 2) {
+      pVersionStruct->loaderLayerInterfaceVersion = 2;
+      pVersionStruct->pfnGetInstanceProcAddr = anti_lag_GetInstanceProcAddr;
+      pVersionStruct->pfnGetDeviceProcAddr = anti_lag_GetDeviceProcAddr;
+      pVersionStruct->pfnGetPhysicalDeviceProcAddr = NULL;
+   }
+
+   return VK_SUCCESS;
+}
diff --git a/src/vulkan/anti-lag-layer/meson.build b/src/vulkan/anti-lag-layer/meson.build
new file mode 100644
index 00000000000..264c55c8e75
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/meson.build
@@ -0,0 +1,26 @@
+# Copyright © 2025 Valve Corporation
+# SPDX-License-Identifier: MIT
+
+vklayer_files = files(
+  'anti_lag_layer.c',
+  'anti_lag_layer_interface.c',
+)
+
+shared_library(
+  'VkLayer_MESA_anti_lag',
+  vklayer_files,
+  c_args : [no_override_init_args],
+  gnu_symbol_visibility : 'hidden',
+  dependencies : [
+    idep_vulkan_util, idep_mesautil,
+  ],
+  include_directories : [inc_include, inc_util, inc_src],
+  link_args : cc.get_supported_link_arguments(['-Wl,-Bsymbolic-functions', '-Wl,-z,relro']),
+  install : true
+)
+
+install_data(
+  files('VkLayer_MESA_anti_lag.json'),
+  install_dir : join_paths(get_option('datadir'), 'vulkan', 'implicit_layer.d'),
+  install_tag : 'runtime',
+)
diff --git a/src/vulkan/anti-lag-layer/ringbuffer.h b/src/vulkan/anti-lag-layer/ringbuffer.h
new file mode 100644
index 00000000000..1747b7e720f
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/ringbuffer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef RINGBUFFER_H
+#define RINGBUFFER_H
+
+#include "util/macros.h"
+
+#define RINGBUFFER_DECLARE(name, type, N)                                                          \
+   struct {                                                                                        \
+      type data[N];                                                                                \
+      uint32_t head;                                                                               \
+      uint32_t tail;                                                                               \
+      uint32_t size;                                                                               \
+      simple_mtx_t mtx;                                                                            \
+   } name
+
+#define ringbuffer_init(buffer)                                                                    \
+   (buffer.head = buffer.tail = buffer.size = 0, simple_mtx_init(&buffer.mtx, mtx_plain))
+
+#define ringbuffer_lock(buffer)   simple_mtx_lock(&buffer.mtx)
+#define ringbuffer_unlock(buffer) simple_mtx_unlock(&buffer.mtx)
+
+static inline uint32_t
+__ringbuffer_add_wrap(uint32_t *val, uint32_t *size, uint32_t N)
+{
+   uint32_t prev = *val;
+   *val = (*val + 1) % N;
+   *size = *size + 1;
+   assert(*size <= N);
+   return prev;
+}
+
+#define ringbuffer_alloc(buffer)                                                                   \
+   (buffer.size == ARRAY_SIZE(buffer.data)                                                         \
+       ? NULL                                                                                      \
+       : &buffer.data[__ringbuffer_add_wrap(&buffer.head, &buffer.size, ARRAY_SIZE(buffer.data))])
+
+#define ringbuffer_free(buffer, elem)                                                              \
+   assert(elem == NULL || elem == &buffer.data[buffer.tail]);                                      \
+   buffer.size--;                                                                                  \
+   assert(buffer.size < ARRAY_SIZE(buffer.data));                                                  \
+   buffer.tail = (buffer.tail + 1) % ARRAY_SIZE(buffer.data)
+
+#define ringbuffer_first(buffer) (&buffer.data[buffer.tail])
+
+#define ringbuffer_last(buffer)                                                                    \
+   (&buffer.data[(buffer.head + ARRAY_SIZE(buffer.data) - 1) % ARRAY_SIZE(buffer.data)])
+
+#define ringbuffer_index(buffer, elem) (elem - buffer.data)
+
+#define ringbuffer_next(buffer, elem)                                                              \
+   (&buffer.data[(ringbuffer_index(buffer, elem) + 1) % ARRAY_SIZE(buffer.data)])
+
+#endif /* RINGBUFFER_H */
diff --git a/src/vulkan/meson.build b/src/vulkan/meson.build
index 3225b5f4a9d..cf62ecc6ae7 100644
--- a/src/vulkan/meson.build
+++ b/src/vulkan/meson.build
@@ -98,3 +98,6 @@ endif
 if with_vulkan_vram_report_limit_layer
   subdir('vram-report-limit-layer')
 endif
+if with_vulkan_anti_lag_layer
+  subdir('anti-lag-layer')
+endif