vulkan/wsi: Add common infrastructure for EXT_present_timing.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no> Reviewed-by: Derek Foreman <derek.foreman@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38770>
2026-05-16 07:38:14 +02:00 · 2025-12-01 16:25:31 +01:00 · 2025-12-01 16:25:31 +01:00 · 47d69664d8
commit 47d69664d8
parent f08a542756
3 changed files with 397 additions and 11 deletions
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@ -631,6 +631,12 @@ wsi_swapchain_finish(struct wsi_swapchain *chain)
      vk_free(&chain->alloc, chain->cmd_pools);
   }

+   if (chain->present_timing.active) {
+      mtx_destroy(&chain->present_timing.lock);
+      if (chain->present_timing.timings)
+         vk_free(&chain->alloc, chain->present_timing.timings);
+   }
+
   vk_object_base_finish(&chain->base);
 }

@ -1116,6 +1122,23 @@ wsi_CreateSwapchainKHR(VkDevice _device,

   *pSwapchain = wsi_swapchain_to_handle(swapchain);

+   if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) {
+      swapchain->present_timing.active = true;
+      mtx_init(&swapchain->present_timing.lock, 0);
+
+      if (swapchain->poll_early_refresh) {
+         /* If we can query the display directly, we should report something reasonable on first query
+          * before we even present the first time. */
+         uint64_t refresh_ns = swapchain->poll_early_refresh(swapchain);
+         if (refresh_ns) {
+            swapchain->present_timing.refresh_duration = refresh_ns;
+            /* None of the APIs can know a-priori if we're driving the display VRR or not. */
+            swapchain->present_timing.refresh_interval = 0;
+            swapchain->present_timing.refresh_counter++;
+         }
+      }
+   }
+
   return VK_SUCCESS;
 }

@ -1172,6 +1195,266 @@ wsi_ReleaseSwapchainImagesKHR(VkDevice _device,
   return VK_SUCCESS;
 }

+static VkResult wsi_common_allocate_timing_request(
+      struct wsi_swapchain *swapchain, const VkPresentTimingInfoEXT *timing, uint64_t present_id)
+{
+   VkResult vr = VK_SUCCESS;
+   mtx_lock(&swapchain->present_timing.lock);
+
+   if (swapchain->present_timing.timings_count >= swapchain->present_timing.timings_capacity) {
+      vr = VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT;
+      goto err;
+   }
+
+   struct wsi_presentation_timing *wsi_timing =
+         &swapchain->present_timing.timings[swapchain->present_timing.timings_count++];
+
+   memset(wsi_timing, 0, sizeof(*wsi_timing));
+   wsi_timing->serial = ++swapchain->present_timing.serial;
+   wsi_timing->target_time = timing->targetTime;
+   wsi_timing->present_id = present_id;
+   wsi_timing->requested_feedback = timing->presentStageQueries;
+
+   /* Ignore the time domain since we have a static domain. */
+
+err:
+   mtx_unlock(&swapchain->present_timing.lock);
+   return vr;
+}
+
+void
+wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain,
+                                               uint64_t timing_serial,
+                                               uint64_t timestamp)
+{
+   assert(chain->present_timing.active);
+   mtx_lock(&chain->present_timing.lock);
+
+   for (size_t i = 0; i < chain->present_timing.timings_count; i++) {
+      if (chain->present_timing.timings[i].serial == timing_serial) {
+         chain->present_timing.timings[i].complete_time = timestamp;
+         chain->present_timing.timings[i].complete = VK_TRUE;
+         break;
+      }
+   }
+
+   mtx_unlock(&chain->present_timing.lock);
+}
+
+void
+wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain,
+                                                 uint64_t refresh_duration,
+                                                 uint64_t refresh_interval,
+                                                 int minimum_delta_for_update)
+{
+   mtx_lock(&chain->present_timing.lock);
+
+   int64_t duration_delta = llabs((int64_t)refresh_duration - (int64_t)chain->present_timing.refresh_duration);
+   int64_t interval_delta = llabs((int64_t)refresh_interval - (int64_t)chain->present_timing.refresh_interval);
+
+   /* When the refresh rate is an estimate, the value may fluctuate slightly frame to frame,
+    * don't spam refresh counter updates unless there is a meaningful delta.
+    * Applications that use absolute timings are expected to recalibrate based on feedback. */
+   if (duration_delta > minimum_delta_for_update || interval_delta > minimum_delta_for_update ||
+       chain->present_timing.refresh_counter == 0) {
+      /* We'll report this updated refresh counter in feedback,
+       * so that application knows to requery the refresh rate. */
+      chain->present_timing.refresh_counter++;
+      chain->present_timing.refresh_duration = refresh_duration;
+      chain->present_timing.refresh_interval = refresh_interval;
+   }
+
+   mtx_unlock(&chain->present_timing.lock);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+wsi_GetPastPresentationTimingEXT(
+      VkDevice                                    device,
+      const VkPastPresentationTimingInfoEXT*      pPastPresentationTimingInfo,
+      VkPastPresentationTimingPropertiesEXT*      pPastPresentationTimingProperties)
+{
+   VK_FROM_HANDLE(wsi_swapchain, swapchain, pPastPresentationTimingInfo->swapchain);
+   VkResult vr = VK_SUCCESS;
+   bool out_of_order = (pPastPresentationTimingInfo->flags &
+         VK_PAST_PRESENTATION_TIMING_ALLOW_OUT_OF_ORDER_RESULTS_BIT_EXT) != 0;
+
+   if (swapchain->poll_timing_request)
+      swapchain->poll_timing_request(swapchain);
+
+   mtx_lock(&swapchain->present_timing.lock);
+
+   pPastPresentationTimingProperties->timingPropertiesCounter = swapchain->present_timing.refresh_counter;
+   pPastPresentationTimingProperties->timeDomainsCounter = 1;
+
+   /* This implementation always returns results in-order, so can ignore the out-of-order flag.
+    * TODO: Honor the partial results flag. */
+
+   uint32_t done_count = 0;
+   for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) {
+      /* If different presents request different kinds of state, we may get completion out of order.
+       * If flag is not set, we cannot report frame N until we have completed all frames M < N. */
+      if (swapchain->present_timing.timings[i].complete)
+         done_count++;
+      else if (!out_of_order)
+         break;
+   }
+
+   /* We don't remove timing info from queue until it is consumed. */
+   if (!pPastPresentationTimingProperties->pPresentationTimings) {
+      pPastPresentationTimingProperties->presentationTimingCount = done_count;
+      mtx_unlock(&swapchain->present_timing.lock);
+      return VK_SUCCESS;
+   }
+
+   VK_OUTARRAY_MAKE_TYPED(VkPastPresentationTimingEXT, timings,
+                          pPastPresentationTimingProperties->pPresentationTimings,
+                          &pPastPresentationTimingProperties->presentationTimingCount);
+
+   uint32_t new_timings_count = 0;
+   bool stop_timing_removal = false;
+
+   for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) {
+      const struct wsi_presentation_timing *in_timing = &swapchain->present_timing.timings[i];
+
+      if (!swapchain->present_timing.timings[i].complete || stop_timing_removal) {
+         /* Keep output ordered to be compliant without having to re-sort every time.
+          * Queue depth for timestamps is expected to be small. */
+         swapchain->present_timing.timings[new_timings_count++] = swapchain->present_timing.timings[i];
+         if (!out_of_order)
+            stop_timing_removal = true;
+         continue;
+      }
+
+      vk_outarray_append_typed(VkPastPresentationTimingEXT, &timings, timing) {
+         timing->targetTime = swapchain->present_timing.timings[i].target_time;
+         timing->presentId = in_timing->present_id;
+         timing->timeDomain = swapchain->present_timing.time_domain;
+         timing->timeDomainId = 0;
+         timing->reportComplete = in_timing->complete;
+
+         /* No INCOMPLETE is reported here. Failures are silent.
+          * However, application already knows upper bound for stage count based on the query,
+          * so this should never fail. */
+
+         /* CTS expects that presentStageCount is overwritten (from 0 to something), not checked as an upper bound.
+          * VUID 12230 and 12231 require that presentStageCount is conservatively allocated.
+          * However, given the VUs, this is invalid usage. */
+         timing->presentStageCount = UINT32_MAX;
+
+         VK_OUTARRAY_MAKE_TYPED(VkPresentStageTimeEXT, stages, timing->pPresentStages, &timing->presentStageCount);
+
+         if (in_timing->requested_feedback & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
+            vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) {
+               stage->stage = VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
+               stage->time = in_timing->queue_done_time;
+            }
+         }
+
+         if (in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
+            vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) {
+               stage->stage = in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
+               /* It is expected that implementation will only expose one timing value. */
+               assert(util_bitcount(stage->stage) == 1);
+               stage->time = in_timing->complete_time;
+            }
+         }
+      }
+   }
+
+   swapchain->present_timing.timings_count = new_timings_count;
+   vr = vk_outarray_status(&timings);
+
+   /* This function is fully atomic within implementation, so have to be thread safe. */
+   mtx_unlock(&swapchain->present_timing.lock);
+   return vr;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+wsi_GetSwapchainTimeDomainPropertiesEXT(
+      VkDevice                                    device,
+      VkSwapchainKHR                              swapchain_,
+      VkSwapchainTimeDomainPropertiesEXT*         pSwapchainTimeDomainProperties,
+      uint64_t*                                   pTimeDomainsCounter)
+{
+   /* We don't change time domains. Everything is static. */
+   if (pTimeDomainsCounter)
+      *pTimeDomainsCounter = 1;
+
+   /* This style is a bit goofy and doesn't map cleanly to anything. */
+   if (!pSwapchainTimeDomainProperties->pTimeDomainIds && !pSwapchainTimeDomainProperties->pTimeDomains) {
+      pSwapchainTimeDomainProperties->timeDomainCount = 1;
+      return VK_SUCCESS;
+   } else if (pSwapchainTimeDomainProperties->timeDomainCount == 0) {
+      return VK_INCOMPLETE;
+   }
+
+   /* The proposal document requires that this domain is supported, but the spec does not make that clear.
+    * CTS also tests that. */
+   pSwapchainTimeDomainProperties->timeDomainCount = 1;
+   if (pSwapchainTimeDomainProperties->pTimeDomains)
+      *pSwapchainTimeDomainProperties->pTimeDomains = VK_TIME_DOMAIN_PRESENT_STAGE_LOCAL_EXT;
+   if (pSwapchainTimeDomainProperties->pTimeDomainIds)
+      *pSwapchainTimeDomainProperties->pTimeDomainIds = 0;
+
+   return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+wsi_GetSwapchainTimingPropertiesEXT(
+      VkDevice                                    device,
+      VkSwapchainKHR                              swapchain_,
+      VkSwapchainTimingPropertiesEXT*             pSwapchainTimingProperties,
+      uint64_t*                                   pSwapchainTimingPropertiesCounter)
+{
+   VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
+
+   mtx_lock(&swapchain->present_timing.lock);
+   /* If we don't have data yet (i.e., counter is 0), should return VK_NOT_READY.
+    * CTS does not like that however, so just return VK_SUCCESS. */
+   pSwapchainTimingProperties->refreshInterval = swapchain->present_timing.refresh_interval;
+   pSwapchainTimingProperties->refreshDuration = swapchain->present_timing.refresh_duration;
+   if (pSwapchainTimingPropertiesCounter)
+      *pSwapchainTimingPropertiesCounter = swapchain->present_timing.refresh_counter;
+   mtx_unlock(&swapchain->present_timing.lock);
+   return VK_SUCCESS;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+wsi_SetSwapchainPresentTimingQueueSizeEXT(
+      VkDevice                                    device,
+      VkSwapchainKHR                              swapchain_,
+      uint32_t                                    size)
+{
+   VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
+   assert(swapchain->present_timing.active);
+   VkResult vr = VK_SUCCESS;
+
+   mtx_lock(&swapchain->present_timing.lock);
+
+   if (size < swapchain->present_timing.timings_count) {
+      vr = VK_NOT_READY;
+      goto error;
+   }
+
+   if (size > swapchain->present_timing.timings_capacity) {
+      void *new_ptr = vk_realloc(&swapchain->alloc, swapchain->present_timing.timings,
+                 sizeof(*swapchain->present_timing.timings) * size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+      if (new_ptr) {
+         swapchain->present_timing.timings = new_ptr;
+         swapchain->present_timing.timings_capacity = size;
+      } else {
+         vr = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto error;
+      }
+   } else {
+      swapchain->present_timing.timings_capacity = size;
+   }
+
+error:
+   mtx_unlock(&swapchain->present_timing.lock);
+   return vr;
+}
+
 VkDeviceMemory
 wsi_common_get_memory(VkSwapchainKHR _swapchain, uint32_t index)
 {
@ -1180,6 +1463,14 @@ wsi_common_get_memory(VkSwapchainKHR _swapchain, uint32_t index)
   return swapchain->get_wsi_image(swapchain, index)->memory;
 }

+VkTimeDomainKHR
+wsi_common_get_time_domain(VkSwapchainKHR _swapchain, VkPresentStageFlagBitsEXT stage, uint64_t time_domain_id)
+{
+   VK_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+   return stage == VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT ?
+         VK_TIME_DOMAIN_DEVICE_KHR : swapchain->present_timing.time_domain;
+}
+
 VKAPI_ATTR VkResult VKAPI_CALL
 wsi_GetSwapchainImagesKHR(VkDevice device,
                          VkSwapchainKHR _swapchain,
@ -1525,6 +1816,38 @@ wsi_common_queue_present(const struct wsi_device *wsi,
      vk_find_struct_const(pPresentInfo->pNext, PRESENT_ID_2_KHR);
   const VkSwapchainPresentFenceInfoKHR *present_fence_info =
      vk_find_struct_const(pPresentInfo->pNext, SWAPCHAIN_PRESENT_FENCE_INFO_KHR);
+   const VkPresentTimingsInfoEXT *present_timings_info =
+         vk_find_struct_const(pPresentInfo->pNext, PRESENT_TIMINGS_INFO_EXT);
+
+   if (present_timings_info) {
+      /* If we fail a present due to full queue, it's a little unclear from
+       * spec if we should treat it as OUT_OF_DATE or OUT_OF_HOST_MEMORY for
+       * purposes of signaling. Validation layers and at least one other implementation
+       * in the wild seems to treat it as OUT_OF_DATE, so do that. */
+      for (uint32_t i = 0; i < present_timings_info->swapchainCount; i++) {
+         const VkPresentTimingInfoEXT *info = &present_timings_info->pTimingInfos[i];
+         VK_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
+         if (results[i] != VK_SUCCESS || !swapchain->set_timing_request || info->presentStageQueries == 0)
+            continue;
+
+         assert(swapchain->present_timing.active);
+
+         /* EXT_present_timing is defined to only work with present_id2.
+          * It's only used when reporting back timings. */
+         results[i] = wsi_common_allocate_timing_request(
+               swapchain, info, present_ids2 ? present_ids2->pPresentIds[i] : 0);
+
+         /* Application is responsible for allocating sufficient size here.
+          * We fail with VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT if application is bugged. */
+         if (results[i] == VK_SUCCESS) {
+            swapchain->set_timing_request(swapchain, &(struct wsi_image_timing_request) {
+               .serial = swapchain->present_timing.serial,
+               .time = info->targetTime,
+               .flags = info->flags,
+            });
+         }
+      }
+   }

   /* Gather up all the semaphores and fences we need to signal per-image */
   STACK_ARRAY(struct wsi_image_signal_info, image_signal_infos,
@ -1620,14 +1943,28 @@ wsi_common_queue_present(const struct wsi_device *wsi,
         struct wsi_image *image =
            swapchain->get_wsi_image(swapchain, image_index);

+         bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
+                                    swapchain->blit.queue != NULL;
+
+         /* For TIMING_QUEUE_FULL_EXT, ensure sync objects are signaled,
+          * but don't do any real work. */
+         if (results[i] == VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT ||
+               (!separate_queue_blit && results[i] == VK_SUCCESS)) {
+            for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) {
+               signal_semaphore_infos[signal_semaphore_count++] =
+                     image_signal_infos[i].semaphore_infos[j];
+            }
+            for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++)
+               fences[fence_count++] = image_signal_infos[i].fences[j];
+         }
+
         if (results[i] != VK_SUCCESS)
            continue;

         /* If we're blitting on another swapchain, just signal the blit
          * semaphore for now.
          */
-         if (swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
-             swapchain->blit.queue != NULL) {
+         if (separate_queue_blit) {
            /* Create the blit semaphore if needed */
            if (swapchain->blit.semaphores[image_index] == VK_NULL_HANDLE) {
               const VkSemaphoreCreateInfo sem_info = {
@ -1658,13 +1995,6 @@ wsi_common_queue_present(const struct wsi_device *wsi,
                  image->blit.cmd_buffers[queue->queue_family_index],
            };
         }
-
-         for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) {
-            signal_semaphore_infos[signal_semaphore_count++] =
-               image_signal_infos[i].semaphore_infos[j];
-         }
-         for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++)
-            fences[fence_count++] = image_signal_infos[i].fences[j];
      }

      const VkSubmitInfo2 submit_info = {
@ -1701,8 +2031,10 @@ wsi_common_queue_present(const struct wsi_device *wsi,
      if (results[i] != VK_SUCCESS)
         continue;

-      if (swapchain->blit.type == WSI_SWAPCHAIN_NO_BLIT ||
-          swapchain->blit.queue == NULL)
+      bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
+                                 swapchain->blit.queue != NULL;
+
+      if (!separate_queue_blit)
         continue;

      const VkSemaphoreSubmitInfo blit_semaphore_info = {
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@ -294,6 +294,11 @@ wsi_common_queue_present(const struct wsi_device *wsi,
                         struct vk_queue *queue,
                         const VkPresentInfoKHR *pPresentInfo);

+VkTimeDomainKHR
+wsi_common_get_time_domain(VkSwapchainKHR _swapchain,
+                           VkPresentStageFlagBitsEXT stage,
+                           uint64_t time_domain_id);
+
 static inline bool
 wsi_common_is_swapchain_image(const VkImageCreateInfo *pCreateInfo)
 {
--- a/src/vulkan/wsi/wsi_common_private.h
+++ b/src/vulkan/wsi/wsi_common_private.h
@ -190,6 +190,22 @@ struct wsi_image {
   void *cpu_map;
 };

+struct wsi_presentation_timing {
+   uint64_t present_id;
+   uint64_t target_time;
+   uint64_t serial;
+   uint64_t queue_done_time; /* GPU timestamp based. */
+   uint64_t complete_time; /* Best effort timestamp we get from backend. */
+   VkPresentStageFlagsEXT requested_feedback;
+   VkBool32 complete;
+};
+
+struct wsi_image_timing_request {
+   uint64_t                    serial;
+   uint64_t                    time;
+   VkPresentTimingInfoFlagsEXT flags;
+};
+
 struct wsi_swapchain {
   struct vk_object_base base;

@ -238,6 +254,26 @@ struct wsi_swapchain {
      struct vk_queue *queue;
   } blit;

+   struct {
+      mtx_t lock;
+      bool active;
+
+      struct wsi_presentation_timing *timings;
+      size_t timings_capacity;
+      size_t timings_count;
+
+      size_t serial;
+
+      /* Maps to Vulkan spec definitions. */
+      uint64_t refresh_duration;
+      uint64_t refresh_interval;
+      /* When 0, we don't know yet. Every time the refresh rate changes,
+       * increase this counter. This counter must also be passed in GetPastTimings. */
+      uint64_t refresh_counter;
+
+      VkTimeDomainKHR time_domain;
+   } present_timing;
+
   bool capture_key_pressed;

   /* Command pools, one per queue family */
@ -267,6 +303,10 @@ struct wsi_swapchain {
                            VkPresentModeKHR mode);
   void (*set_hdr_metadata)(struct wsi_swapchain *swap_chain,
                            const VkHdrMetadataEXT* pMetadata);
+   void (*set_timing_request)(struct wsi_swapchain *swap_chain,
+                            const struct wsi_image_timing_request *request);
+   void (*poll_timing_request)(struct wsi_swapchain *swap_chain);
+   uint64_t (*poll_early_refresh)(struct wsi_swapchain *swap_chain);
 };

 bool
@ -378,6 +418,15 @@ VkResult
 wsi_swapchain_wait_for_present_semaphore(const struct wsi_swapchain *chain,
                                         uint64_t present_id, uint64_t timeout);

+void
+wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain,
+                                               uint64_t timing_serial, uint64_t timestamp);
+
+void
+wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain,
+                                                 uint64_t refresh_duration, uint64_t refresh_interval,
+                                                 int minimum_delta_for_update);
+
 #ifdef HAVE_LIBDRM
 VkResult
 wsi_prepare_signal_dma_buf_from_semaphore(struct wsi_swapchain *chain,