wsi/x11: Add Xwl support for present timing.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no> Reviewed-and-tested-by: Mario Kleiner <mario.kleiner.de@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39551>
2026-06-21 21:08:23 +02:00 · 2026-02-19 13:10:49 +01:00 · 2026-02-19 13:10:49 +01:00 · c3e3e758dc
commit c3e3e758dc
parent 1546dabd4a
1 changed files with 79 additions and 16 deletions
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@ -917,15 +917,29 @@ x11_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface,
         xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
         struct wsi_x11_connection *wsi_conn = wsi_x11_get_connection(wsi_device, conn);

+         wait->presentTimingSupported = VK_TRUE;
+
         if (wsi_conn->is_xwayland) {
-            /* Follow-up commit supports Xwl. */
-            wait->presentTimingSupported = VK_FALSE;
-            wait->presentStageQueries = 0;
+            /* Wayland COMPLETE is tied to fence callback, so that's what we'll report.
+             * For pure frame pacing support, this is likely fine. */
+            wait->presentStageQueries = VK_PRESENT_STAGE_REQUEST_DEQUEUED_BIT_EXT;
+
+            /* Xwayland cannot get a reliable refresh rate estimate since MSC is not tied to monitor refresh at all.
+             * However, it's pragmatically very important to expose some baseline Xwl support since
+             * a large amount of applications (mostly games) rely on X11 APIs.
+             *
+             * Relative timings are easier to deal with since errors against an absolute timer are more or less expected,
+             * and it's sufficient for implementing present intervals in GL/D3D, etc, but likely not for
+             * tight A/V sync in e.g. media players, but those should be using Wayland when available anyway.
+             * As per-spec the timing request we provide should correlate with PIXEL_VISIBLE_BIT stage,
+             * but when we only observe dequeue, that's not really possible, but relative timings don't have that problem.
+             *
+             * There is PRESENT_CAPABILITY_UST, which would help, but xserver does not implement it at all.
+             */
            wait->presentAtAbsoluteTimeSupported = VK_FALSE;
-            wait->presentAtRelativeTimeSupported = VK_FALSE;
+            wait->presentAtRelativeTimeSupported = VK_TRUE;
         } else {
            /* COMPLETE should be tied to page flip on native X11. */
-            wait->presentTimingSupported = VK_TRUE;
            wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
            wait->presentAtAbsoluteTimeSupported = VK_TRUE;
            wait->presentAtRelativeTimeSupported = VK_TRUE;
@ -1267,6 +1281,7 @@ struct x11_swapchain {
   VkResult                                     present_progress_error;

   struct wsi_image_timing_request              timing_request;
+   bool                                         has_reliable_msc;

   struct x11_image                             images[0];
 };
@ -1319,6 +1334,15 @@ static void x11_present_update_refresh_cycle_estimate(struct x11_swapchain *swap
         (swapchain->present_timing_window_index + 1) % X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE;
   struct x11_present_timing_entry *entry = &swapchain->present_timing_window[swapchain->present_timing_window_index];

+   if (!swapchain->has_reliable_msc) {
+      /* If we don't have reliable MSC, we always trust the fallback RANDR query.
+       * We have no idea if we're FRR or VRR. */
+      wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, randr_refresh_ns, 0, 0);
+      entry->msc = msc;
+      entry->ust = ust;
+      return;
+   }
+
   /* Try to get an initial estimate as quickly as possible, we will refine it over time. */
   if (entry->msc == 0)
      entry = &swapchain->present_timing_window[1];
@ -2304,6 +2328,20 @@ x11_present_compute_target_msc(struct x11_swapchain *chain,
   /* Present timing is only defined to work with FIFO modes, so we can rely on having
    * reliable relative timings, since we block for COMPLETE to come through before we queue up more presents. */
   if (relative) {
+      /* If application is trying to drive us at refresh rate, FIFO will take care of it.
+       * Don't end up in a situation where we sleep and miss the deadline by mistake. */
+      if (!chain->has_reliable_msc) {
+         uint64_t relative_threshold;
+         if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
+            relative_threshold = 3 * chain->base.present_timing.refresh_duration / 2;
+         else
+            relative_threshold = chain->base.present_timing.refresh_duration;
+
+         if (request->time <= relative_threshold) {
+            mtx_unlock(&chain->base.present_timing.lock);
+            return minimum_msc;
+         }
+      }
      target_ns = 1000 * (int64_t)entry->ust + (int64_t)request->time;
   } else {
      target_ns = (int64_t)request->time;
@ -2314,28 +2352,52 @@ x11_present_compute_target_msc(struct x11_swapchain *chain,
   if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
      target_ns -= (int64_t)chain->base.present_timing.refresh_duration / 2;

+  /* Xwl cannot understand MSC that jumps by more than 1. It appears that if there are MSC jumps above 1,
+   * each MSC cycle is padded by 16.6ms or something like that.
+   * If we want to target specific time, we must sleep to achieve that until Xwl improves.
+   * Fortunately, we're on a submit thread, so that is mostly an acceptable solution. */
+
   if (entry->msc && chain->base.present_timing.refresh_duration != 0 &&
-       chain->msc_estimate_is_stable) {
+       chain->msc_estimate_is_stable && chain->has_reliable_msc) {
      /* If we can trust MSC to be a stable FRR heartbeat, we sync to that. */
      uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0);
      uint64_t periods = (delta_time_ns + chain->base.present_timing.refresh_duration - 1) /
                         chain->base.present_timing.refresh_duration;
      mtx_unlock(&chain->base.present_timing.lock);
+
      minimum_msc = MAX2(minimum_msc, entry->msc + periods);
   } else {
-      /* If we don't have a stable estimate (e.g. true VRR) we just sleep until deadline.
+      /* If we don't have a stable estimate (e.g. true VRR, or Xwl) we just sleep until deadline.
       * This relies on timebase on os_time_nanosleep is MONOTONIC as well as UST being MONOTONIC. */

-      /* Very regular sleeping can trigger a strange feedback loop where MSC estimates becomes stable enough
-       * that we accept it as stable MSC. Perturb the rates enough to make it extremely unlikely
-       * we accept sleeping patterns as ground truth rate, introduce a 50 us error between each timestamp,
-       * which should avoid the 10 us check reliably. If sleep quantas are not as accurate, it's extremely unlikely
-       * we get a stable pace anyway. TODO: Is there a more reliable way? */
-      target_ns += 50000ll * (chain->present_timing_window_index & 1) - 25000;
-      target_ns = MAX2(target_ns, 0);
+      if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) {
+         if (!chain->has_reliable_msc && chain->base.present_timing.refresh_duration) {
+            uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0);
+            uint64_t periods = delta_time_ns / chain->base.present_timing.refresh_duration;

-      /* If we're on VRR X11 and trying to target a specific cycle by sleeping, pull back the sleep a bit.
-       * We will be racing against time once we wake up to send the request to Xserver -> COMPLETE.
+            target_ns = 1000ull * entry->ust + periods * chain->base.present_timing.refresh_duration;
+
+            /* Set a minimum target that is very close to the real estimate.
+             * This way, we ensure that we don't regularly round estimates up in
+             * chain->next_present_ust_lower_bound. */
+            target_ns += 63 * chain->base.present_timing.refresh_duration / 64;
+         }
+      }
+
+      /* On Xwl we never accept MSC estimates as ground truth, so ignore this perturbation. */
+      if (chain->has_reliable_msc) {
+         /* Very regular sleeping can trigger a strange feedback loop where MSC estimates becomes stable enough
+          * that we accept it as stable MSC. Perturb the rates enough to make it extremely unlikely
+          * we accept sleeping patterns as ground truth rate, introduce a 50 us error between each timestamp,
+          * which should avoid the 20 us check reliably. If sleep quantas are not as accurate, it's extremely unlikely
+          * we get a stable pace anyway. TODO: Is there a more reliable way? */
+
+         target_ns += 50000ll * (chain->present_timing_window_index & 1) - 25000;
+         target_ns = MAX2(target_ns, 0);
+      }
+
+      /* If we're on Xwl or VRR X11 and trying to target a specific cycle by sleeping, pull back the sleep a bit.
+       * We will be racing against time once we wake up to send the request to Xwl -> Wayland -> frame callback -> COMPLETE.
       * If target_ns syncs well to a refresh cycle, we speculate that COMPLETE will come through at about target_ns. */

      /* To get proper pace on an actual VRR display, we will have to detect if we're presenting too early
@ -3156,6 +3218,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
   chain->has_dri3_modifiers = wsi_conn->has_dri3_modifiers;
   chain->has_mit_shm = wsi_conn->has_mit_shm;
   chain->has_async_may_tear = present_caps & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR;
+   chain->has_reliable_msc = !wsi_conn->is_xwayland;

   /* When images in the swapchain don't fit the window, X can still present them, but it won't
    * happen by flip, only by copy. So this is a suboptimal copy, because if the client would change