From 3fdc14b089262ed98aa5a8ccbc77191ac126d20b Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Thu, 4 Dec 2025 13:52:02 +0100
Subject: [PATCH] wsi/wayland: Implement EXT_present_timing on Wayland.

Only weakness right now is that we cannot implement VRR vs FRR query reliably.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
Reviewed-by: Derek Foreman <derek.foreman@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38770>
---
 src/vulkan/wsi/wsi_common_wayland.c | 198 +++++++++++++++++++++++++---
 1 file changed, 181 insertions(+), 17 deletions(-)

diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c
index 7aab7bec38a..29149fbf4a1 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -254,6 +254,8 @@ struct wsi_wl_swapchain {
       bool has_hdr_metadata;
    } color;
 
+   struct wsi_image_timing_request timing_request;
+
    struct wsi_wl_image images[0];
 };
 VK_DEFINE_NONDISP_HANDLE_CASTS(wsi_wl_swapchain, base.base, VkSwapchainKHR,
@@ -1789,7 +1791,8 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
 static VkResult
 wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface,
                                   struct wsi_device *wsi_device,
-                                  bool *has_wp_presentation)
+                                  bool *has_wp_presentation, clockid_t *clock_id,
+                                  bool *has_commit_timing, bool *has_fifo)
 {
    VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
    struct wsi_wayland *wsi =
@@ -1800,7 +1803,17 @@ wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface,
                            wsi_device->sw, "mesa check wp_presentation"))
       return VK_ERROR_SURFACE_LOST_KHR;
 
-   *has_wp_presentation = !!display.wp_presentation_notwrapped;
+   if (has_wp_presentation)
+      *has_wp_presentation = !!display.wp_presentation_notwrapped;
+
+   if (clock_id)
+      *clock_id = display.presentation_clock_id;
+
+   if (has_commit_timing)
+      *has_commit_timing = !!display.commit_timing_manager;
+
+   if (has_fifo)
+      *has_fifo = !!display.fifo_manager;
 
    wsi_wl_display_finish(&display);
 
@@ -1893,7 +1906,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
          bool has_feedback;
 
          result = wsi_wl_surface_check_presentation(surface, wsi_device,
-                                                    &has_feedback);
+                                                    &has_feedback, NULL, NULL, NULL);
          if (result != VK_SUCCESS)
             return result;
 
@@ -1906,7 +1919,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
          bool has_feedback;
 
          result = wsi_wl_surface_check_presentation(surface, wsi_device,
-                                                    &has_feedback);
+                                                    &has_feedback, NULL, NULL, NULL);
          if (result != VK_SUCCESS)
             return result;
 
@@ -1916,11 +1929,45 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
 
       case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
          VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
+         bool has_feedback, has_commit_timing, has_fifo;
 
          wait->presentStageQueries = 0;
          wait->presentTimingSupported = VK_FALSE;
          wait->presentAtAbsoluteTimeSupported = VK_FALSE;
          wait->presentAtRelativeTimeSupported = VK_FALSE;
+
+         clockid_t clock_id;
+
+         result = wsi_wl_surface_check_presentation(surface, wsi_device,
+                                                    &has_feedback, &clock_id,
+                                                    &has_commit_timing, &has_fifo);
+
+         if (result != VK_SUCCESS)
+            return result;
+
+         if (!has_feedback)
+            break;
+
+         /* We could deal with esoteric clock domains by exposing VK_TIME_DOMAIN_SWAPCHAIN or PRESENT_STAGE_LOCAL,
+          * but that requires a lot more scaffolding, and there's no need to add extra complexity if we can
+          * get away with this. */
+         if (clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_MONOTONIC_RAW)
+            break;
+
+         /* Presentation timing spec talks about the reported time targeting "pixel being visible".
+          * From presentation-time spec: "Note, that if the display path has a non-zero latency,
+          * the time instant specified by this counter may differ from the timestamp's."
+          * No compositor I know of reports where it takes display latency into account,
+          * so it's a little unclear if we should actually be reporting PIXEL_OUT or PIXEL_VISIBLE.
+          * Choose PIXEL_OUT for now since no known compositor out there actually implements
+          * PIXEL_VISIBLE as intended, and we don't want to promise something we cannot hold. */
+         wait->presentTimingSupported = VK_TRUE;
+         wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
+
+         /* We cannot reliably implement FIFO guarantee + absolute time without the FIFO barrier.
+          * Presentation timing is only defined to work with FIFO (and its variants like RELAXED and LATEST_READY). */
+         wait->presentAtAbsoluteTimeSupported = has_commit_timing && has_fifo;
+
          break;
       }
 
@@ -2414,6 +2461,7 @@ struct wsi_wl_present_id {
     * which uses frame callback to signal DRI3 COMPLETE. */
    struct wl_callback *frame;
    uint64_t present_id;
+   uint64_t timing_serial;
    struct mesa_trace_flow flow;
    uint64_t submission_time;
    const VkAllocationCallbacks *alloc;
@@ -2421,6 +2469,7 @@ struct wsi_wl_present_id {
    uint64_t target_time;
    uint64_t correction;
    struct wl_list link;
+   bool user_target_time;
 };
 
 static struct wsi_image *
@@ -2451,6 +2500,14 @@ wsi_wl_swapchain_set_present_mode(struct wsi_swapchain *wsi_chain,
    chain->base.present_mode = mode;
 }
 
+static void
+wsi_wl_swapchain_set_timing_request(struct wsi_swapchain *wsi_chain,
+                                    const struct wsi_image_timing_request *request)
+{
+   struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;
+   chain->timing_request = *request;
+}
+
 static VkResult
 dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_time)
 {
@@ -2524,6 +2581,15 @@ dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_
    return VK_SUCCESS;
 }
 
+static void
+wsi_wl_swapchain_poll_timing_request(struct wsi_swapchain *wsi_chain)
+{
+   /* Timing requests must complete in finite time, and if we're not calling present wait
+    * or queue present regularly, timing requests will never come back. */
+   struct timespec instant = {0};
+   dispatch_present_id_queue(wsi_chain, &instant);
+}
+
 static bool
 wsi_wl_swapchain_present_id_completes_in_finite_time_locked(struct wsi_wl_swapchain *chain,
                                                             uint64_t present_id)
@@ -2825,6 +2891,20 @@ presentation_handle_presented(void *data,
    struct wsi_wl_swapchain *chain = id->chain;
    uint64_t target_time = id->target_time;
 
+   /* In v1 of presentation time, we can know if we're likely running VRR, given refresh is 0.
+    * However, we cannot know what the base refresh rate is without some kind of external information.
+    * We also cannot know if we're actually driving the display in a VRR fashion.
+    * In v2, we should always know the "base refresh" rate, but that means we cannot know if we're driving
+    * the display VRR or FRR. We could try to deduce it based on timestamps, but that is too brittle.
+    * There is a v3 proposal that adds this information more formally so we don't have to guess.
+    * Knowing VRR or FRR is not mission critical for most use cases, so just report "Unknown" for now. */
+   wsi_swapchain_present_timing_update_refresh_rate(&chain->base, refresh, 0, 0);
+
+   /* Notify this before present wait to reduce latency of presentation timing requests
+    * if the application is driving its queries based off present waits. */
+   if (id->timing_serial)
+      wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, presentation_time);
+
    mtx_lock(&chain->present_ids.lock);
    chain->present_ids.refresh_nsec = refresh;
    if (!chain->present_ids.valid_refresh_nsec) {
@@ -2836,7 +2916,9 @@ presentation_handle_presented(void *data,
    if (presentation_time > chain->present_ids.displayed_time)
       chain->present_ids.displayed_time = presentation_time;
 
-   if (target_time && presentation_time > target_time)
+   /* If we have user-defined target time it can be arbitrarily early, and we don't
+    * want to start compensating for that error if application stops requesting specific time. */
+   if (!id->user_target_time && target_time && presentation_time > target_time)
       chain->present_ids.display_time_error = presentation_time - target_time;
    else
       chain->present_ids.display_time_error = 0;
@@ -2851,6 +2933,15 @@ presentation_handle_discarded(void *data)
    struct wsi_wl_present_id *id = data;
    struct wsi_wl_swapchain *chain = id->chain;
 
+   /* From Vulkan spec:
+    * "Timing information for some present stages may have a time value of 0,
+    * indicating that results for that present stage are not available."
+    * Worst case we can simply take a timestamp of clock_id and pretend, but
+    * applications may start to latch onto that timestamp as ground truth, which
+    * is obviously not correct. */
+   if (id->timing_serial)
+      wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, 0);
+
    mtx_lock(&chain->present_ids.lock);
    if (!chain->present_ids.valid_refresh_nsec) {
       /* We've started occluded, so make up some safe values to throttle us */
@@ -2905,6 +2996,29 @@ static const struct wl_callback_listener frame_listener = {
    frame_handle_done,
 };
 
+static bool
+set_application_driven_timestamp(struct wsi_wl_swapchain *chain,
+                                 uint64_t *timestamp,
+                                 uint64_t *correction)
+{
+   if (chain->timing_request.serial && chain->timing_request.time) {
+      /* Absolute time is requested before we have been able to report a reasonable refresh rate
+       * to application. This is valid, but we should not try to perform any rounding.
+       * NEAREST_REFRESH_CYCLE flag cannot be honored because it's impossible to know at this time. */
+      struct timespec target_ts;
+      timespec_from_nsec(&target_ts, chain->timing_request.time);
+      wp_commit_timer_v1_set_timestamp(chain->commit_timer,
+                                       (uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec,
+                                       target_ts.tv_nsec);
+      *timestamp = chain->timing_request.time;
+      *correction = 0;
+      chain->present_ids.last_target_time = chain->timing_request.time;
+      return true;
+   } else {
+      return false;
+   }
+}
+
 /* The present_ids lock must be held */
 static bool
 set_timestamp(struct wsi_wl_swapchain *chain,
@@ -2918,7 +3032,7 @@ set_timestamp(struct wsi_wl_swapchain *chain,
    int32_t error = 0;
 
    if (!chain->present_ids.valid_refresh_nsec)
-      return false;
+      return set_application_driven_timestamp(chain, timestamp, correction);
 
    displayed_time = chain->present_ids.displayed_time;
    refresh = chain->present_ids.refresh_nsec;
@@ -2928,7 +3042,7 @@ set_timestamp(struct wsi_wl_swapchain *chain,
     * timestamps at all, so bail out.
     */
    if (!refresh)
-      return false;
+      return set_application_driven_timestamp(chain, timestamp, correction);
 
    /* We assume we're being fed at the display's refresh rate, but
     * if that doesn't happen our timestamps fall into the past.
@@ -2946,6 +3060,10 @@ set_timestamp(struct wsi_wl_swapchain *chain,
       error = chain->present_ids.display_time_error -
               chain->present_ids.display_time_correction;
 
+   /* If we're driving timestamps from application, this is somewhat redundant
+    * but it will drain out any accumulated display_time_error over time.
+    * Accumulated errors are expected since application might not
+    * align the target time perfectly against a refresh cycle. */
    target = chain->present_ids.last_target_time;
    if (error > 0)  {
       target += (error / refresh) * refresh;
@@ -2955,19 +3073,41 @@ set_timestamp(struct wsi_wl_swapchain *chain,
    }
 
    chain->present_ids.display_time_correction += *correction;
-   target = next_phase_locked_time(displayed_time,
-                                   refresh,
-                                   target);
-  /* Take back 500 us as a safety margin, to ensure we don't miss our
-   * target due to round-off error.
-   */
-   timespec_from_nsec(&target_ts, target - 500000);
+
+   if (chain->timing_request.serial && chain->timing_request.time) {
+      target = chain->timing_request.time;
+      chain->present_ids.last_target_time = target;
+      *timestamp = target;
+
+      if (chain->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
+         target -= chain->present_ids.refresh_nsec / 2;
+
+      /* Without the flag, the application is supposed to deal with any safety margins on its own. */
+      timespec_from_nsec(&target_ts, target);
+
+      /* If we're using commit timing path, we always have FIFO protocol, so we don't have to
+       * consider scenarios where application is passing a very low present time.
+       * I.e., there is no need to max() the application timestamp against our estimated next refresh cycle.
+       * If the surface is occluded, it's possible to render at a higher rate than display refresh rate,
+       * but that's okay. Those presents will be discarded anyway, and we won't report odd timestamps to application. */
+   } else {
+      target = next_phase_locked_time(displayed_time,
+                                      refresh,
+                                      target);
+
+      chain->present_ids.last_target_time = target;
+      *timestamp = target;
+
+      /* Take back 500 us as a safety margin, to ensure we don't miss our
+       * target due to round-off error.
+       */
+      timespec_from_nsec(&target_ts, target - 500000);
+   }
+
    wp_commit_timer_v1_set_timestamp(chain->commit_timer,
                                     (uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec,
                                     target_ts.tv_nsec);
 
-   chain->present_ids.last_target_time = target;
-   *timestamp = target;
    return true;
 }
 
@@ -3069,13 +3209,15 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
    }
 
    if (present_id > 0 || (mode_fifo && chain->commit_timer) ||
-       util_perfetto_is_tracing_enabled()) {
+       util_perfetto_is_tracing_enabled() || chain->timing_request.serial) {
       struct wsi_wl_present_id *id =
          vk_zalloc(chain->wsi_wl_surface->display->wsi_wl->alloc, sizeof(*id), sizeof(uintptr_t),
                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
       id->chain = chain;
       id->present_id = present_id;
       id->alloc = chain->wsi_wl_surface->display->wsi_wl->alloc;
+      id->timing_serial = chain->timing_request.serial;
+      id->user_target_time = chain->timing_request.time != 0;
 
       mtx_lock(&chain->present_ids.lock);
 
@@ -3203,6 +3345,8 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
                                         wsi_wl_surface->display->queue);
    }
 
+   memset(&chain->timing_request, 0, sizeof(chain->timing_request));
+
    return VK_SUCCESS;
 }
 
@@ -3437,6 +3581,20 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain,
    return VK_SUCCESS;
 }
 
+static VkTimeDomainKHR
+clock_id_to_vk_time_domain(clockid_t id)
+{
+   switch (id) {
+      case CLOCK_MONOTONIC:
+         return VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
+      case CLOCK_MONOTONIC_RAW:
+         return VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
+      default:
+         /* Default fallback. Will not be used. */
+         return VK_TIME_DOMAIN_DEVICE_KHR;
+   }
+}
+
 static VkResult
 wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
                                 VkDevice device,
@@ -3615,6 +3773,12 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
    chain->base.queue_present = wsi_wl_swapchain_queue_present;
    chain->base.release_images = wsi_wl_swapchain_release_images;
    chain->base.set_present_mode = wsi_wl_swapchain_set_present_mode;
+   chain->base.set_timing_request = wsi_wl_swapchain_set_timing_request;
+   chain->base.poll_timing_request = wsi_wl_swapchain_poll_timing_request;
+   if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) {
+      chain->base.present_timing.time_domain =
+            clock_id_to_vk_time_domain(wsi_wl_surface->display->presentation_clock_id);
+   }
    chain->base.wait_for_present = wsi_wl_swapchain_wait_for_present;
    chain->base.wait_for_present2 = wsi_wl_swapchain_wait_for_present2;
    chain->base.present_mode = present_mode;