From 57226c44c5b7e61bbb24d80dc50c063f6b4b7e42 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 5 Dec 2025 13:55:38 +0100 Subject: [PATCH 01/17] loader: Separate out X11 specific screen queries from dri_helper.h. Allows these helpers to be used for X11 WSI as well. Signed-off-by: Hans-Kristian Arntzen --- src/egl/drivers/dri2/platform_x11.c | 11 ---- src/loader/loader_dri_helper.h | 43 +-------------- src/loader/loader_dri_helper_screen.h | 76 +++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 53 deletions(-) create mode 100644 src/loader/loader_dri_helper_screen.h diff --git a/src/egl/drivers/dri2/platform_x11.c b/src/egl/drivers/dri2/platform_x11.c index 25d6ed20754..9dd6a956313 100644 --- a/src/egl/drivers/dri2/platform_x11.c +++ b/src/egl/drivers/dri2/platform_x11.c @@ -792,17 +792,6 @@ dri2_fourcc_for_depth(struct dri2_egl_display *dri2_dpy, uint32_t depth) } } -static int -box_intersection_area(int16_t a_x, int16_t a_y, int16_t a_width, - int16_t a_height, int16_t b_x, int16_t b_y, - int16_t b_width, int16_t b_height) -{ - int w = MIN2(a_x + a_width, b_x + b_width) - MAX2(a_x, b_x); - int h = MIN2(a_y + a_height, b_y + b_height) - MAX2(a_y, b_y); - - return (w < 0 || h < 0) ? 0 : w * h; -} - EGLBoolean dri2_x11_get_msc_rate(_EGLDisplay *display, _EGLSurface *surface, EGLint *numerator, EGLint *denominator) diff --git a/src/loader/loader_dri_helper.h b/src/loader/loader_dri_helper.h index 169e36b5d80..5837dde16cd 100644 --- a/src/loader/loader_dri_helper.h +++ b/src/loader/loader_dri_helper.h @@ -29,36 +29,7 @@ #include #include "util/format/u_formats.h" -#ifdef HAVE_X11_PLATFORM -#include -#include -#include - -struct loader_crtc_info { - xcb_randr_crtc_t id; - xcb_timestamp_t timestamp; - - int16_t x, y; - uint16_t width, height; - - unsigned refresh_numerator; - unsigned refresh_denominator; -}; - -struct loader_screen_resources { - mtx_t mtx; - - xcb_connection_t *conn; - xcb_screen_t *screen; - - xcb_timestamp_t config_timestamp; - - /* Number of CRTCs with an active mode set */ - unsigned num_crtcs; - struct loader_crtc_info *crtcs; -}; -#endif - +#include "loader_dri_helper_screen.h" /** * These formats are endian independent they result in the same layout @@ -110,16 +81,4 @@ loader_pipe_format_to_fourcc(enum pipe_format pipe); enum pipe_format loader_fourcc_to_pipe_format(uint32_t fourcc); -#ifdef HAVE_X11_PLATFORM -void -loader_init_screen_resources(struct loader_screen_resources *res, - xcb_connection_t *conn, - xcb_screen_t *screen); -bool -loader_update_screen_resources(struct loader_screen_resources *res); - -void -loader_destroy_screen_resources(struct loader_screen_resources *res); -#endif - #endif /* LOADER_DRI_HELPER_H */ diff --git a/src/loader/loader_dri_helper_screen.h b/src/loader/loader_dri_helper_screen.h new file mode 100644 index 00000000000..290e8dd111c --- /dev/null +++ b/src/loader/loader_dri_helper_screen.h @@ -0,0 +1,76 @@ +/* + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#ifndef LOADER_DRI_HELPER_SCREEN_H +#define LOADER_DRI_HELPER_SCREEN_H + +#ifdef HAVE_X11_PLATFORM +#include +#include +#include + +struct loader_crtc_info { + xcb_randr_crtc_t id; + xcb_timestamp_t timestamp; + + int16_t x, y; + uint16_t width, height; + + unsigned refresh_numerator; + unsigned refresh_denominator; +}; + +struct loader_screen_resources { + mtx_t mtx; + + xcb_connection_t *conn; + xcb_screen_t *screen; + + xcb_timestamp_t config_timestamp; + + /* Number of CRTCs with an active mode set */ + unsigned num_crtcs; + struct loader_crtc_info *crtcs; +}; + +void +loader_init_screen_resources(struct loader_screen_resources *res, + xcb_connection_t *conn, + xcb_screen_t *screen); +bool +loader_update_screen_resources(struct loader_screen_resources *res); + +void +loader_destroy_screen_resources(struct loader_screen_resources *res); + +#endif + +static inline int +box_intersection_area(int16_t a_x, int16_t a_y, int16_t a_width, + int16_t a_height, int16_t b_x, int16_t b_y, + int16_t b_width, int16_t b_height) +{ + int w = MIN2(a_x + a_width, b_x + b_width) - MAX2(a_x, b_x); + int h = MIN2(a_y + a_height, b_y + b_height) - MAX2(a_y, b_y); + + return (w < 0 || h < 0) ? 0 : w * h; +} + +#endif From f9acc4f54123eb689812e6ce6f88939a47b2a9e0 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 1 Dec 2025 16:25:31 +0100 Subject: [PATCH 02/17] vulkan/wsi: Add common infrastructure for EXT_present_timing. Signed-off-by: Hans-Kristian Arntzen --- src/vulkan/wsi/wsi_common.c | 340 +++++++++++++++++++++++++++- src/vulkan/wsi/wsi_common_private.h | 49 ++++ 2 files changed, 378 insertions(+), 11 deletions(-) diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index b861bb75ed7..8ef935edb44 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -628,6 +628,12 @@ wsi_swapchain_finish(struct wsi_swapchain *chain) vk_free(&chain->alloc, chain->cmd_pools); } + if (chain->present_timing.active) { + mtx_destroy(&chain->present_timing.lock); + if (chain->present_timing.timings) + vk_free(&chain->alloc, chain->present_timing.timings); + } + vk_object_base_finish(&chain->base); } @@ -1112,6 +1118,23 @@ wsi_CreateSwapchainKHR(VkDevice _device, *pSwapchain = wsi_swapchain_to_handle(swapchain); + if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) { + swapchain->present_timing.active = true; + mtx_init(&swapchain->present_timing.lock, 0); + + if (swapchain->poll_early_refresh) { + /* If we can query the display directly, we should report something reasonable on first query + * before we even present the first time. */ + uint64_t refresh_ns = swapchain->poll_early_refresh(swapchain); + if (refresh_ns) { + swapchain->present_timing.refresh_duration = refresh_ns; + /* None of the APIs can know a-priori if we're driving the display VRR or not. */ + swapchain->present_timing.refresh_interval = 0; + swapchain->present_timing.refresh_counter++; + } + } + } + return VK_SUCCESS; } @@ -1168,6 +1191,261 @@ wsi_ReleaseSwapchainImagesKHR(VkDevice _device, return VK_SUCCESS; } +static VkResult wsi_common_allocate_timing_request( + struct wsi_swapchain *swapchain, const VkPresentTimingInfoEXT *timing, uint64_t present_id) +{ + VkResult vr = VK_SUCCESS; + mtx_lock(&swapchain->present_timing.lock); + + if (swapchain->present_timing.timings_count >= swapchain->present_timing.timings_capacity) { + vr = VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT; + goto err; + } + + struct wsi_presentation_timing *wsi_timing = + &swapchain->present_timing.timings[swapchain->present_timing.timings_count++]; + + memset(wsi_timing, 0, sizeof(*wsi_timing)); + wsi_timing->serial = ++swapchain->present_timing.serial; + wsi_timing->target_time = timing->targetTime; + wsi_timing->present_id = present_id; + wsi_timing->requested_feedback = timing->presentStageQueries; + + /* Ignore the time domain since we have a static domain. */ + +err: + mtx_unlock(&swapchain->present_timing.lock); + return vr; +} + +void +wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain, + uint64_t timing_serial, + uint64_t timestamp) +{ + assert(chain->present_timing.active); + mtx_lock(&chain->present_timing.lock); + + for (size_t i = 0; i < chain->present_timing.timings_count; i++) { + if (chain->present_timing.timings[i].serial == timing_serial) { + chain->present_timing.timings[i].complete_time = timestamp; + chain->present_timing.timings[i].complete = VK_TRUE; + break; + } + } + + mtx_unlock(&chain->present_timing.lock); +} + +void +wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain, + uint64_t refresh_duration, + uint64_t refresh_interval, + int minimum_delta_for_update) +{ + mtx_lock(&chain->present_timing.lock); + + int64_t duration_delta = llabs((int64_t)refresh_duration - (int64_t)chain->present_timing.refresh_duration); + int64_t interval_delta = llabs((int64_t)refresh_interval - (int64_t)chain->present_timing.refresh_interval); + + /* When the refresh rate is an estimate, the value may fluctuate slightly frame to frame, + * don't spam refresh counter updates unless there is a meaningful delta. + * Applications that use absolute timings are expected to recalibrate based on feedback. */ + if (duration_delta > minimum_delta_for_update || interval_delta > minimum_delta_for_update || + chain->present_timing.refresh_counter == 0) { + /* We'll report this updated refresh counter in feedback, + * so that application knows to requery the refresh rate. */ + chain->present_timing.refresh_counter++; + chain->present_timing.refresh_duration = refresh_duration; + chain->present_timing.refresh_interval = refresh_interval; + } + + mtx_unlock(&chain->present_timing.lock); +} + +VKAPI_ATTR VkResult VKAPI_CALL +wsi_GetPastPresentationTimingEXT( + VkDevice device, + const VkPastPresentationTimingInfoEXT* pPastPresentationTimingInfo, + VkPastPresentationTimingPropertiesEXT* pPastPresentationTimingProperties) +{ + VK_FROM_HANDLE(wsi_swapchain, swapchain, pPastPresentationTimingInfo->swapchain); + VkResult vr = VK_SUCCESS; + bool out_of_order = (pPastPresentationTimingInfo->flags & + VK_PAST_PRESENTATION_TIMING_ALLOW_OUT_OF_ORDER_RESULTS_BIT_EXT) != 0; + + if (swapchain->poll_timing_request) + swapchain->poll_timing_request(swapchain); + + mtx_lock(&swapchain->present_timing.lock); + + pPastPresentationTimingProperties->timingPropertiesCounter = swapchain->present_timing.refresh_counter; + pPastPresentationTimingProperties->timeDomainsCounter = 1; + + /* This implementation always returns results in-order, so can ignore the out-of-order flag. + * TODO: Honor the partial results flag. */ + + uint32_t done_count = 0; + for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) { + /* If different presents request different kinds of state, we may get completion out of order. + * If flag is not set, we cannot report frame N until we have completed all frames M < N. */ + if (swapchain->present_timing.timings[i].complete) + done_count++; + else if (!out_of_order) + break; + } + + /* We don't remove timing info from queue until it is consumed. */ + if (!pPastPresentationTimingProperties->pPresentationTimings) { + pPastPresentationTimingProperties->presentationTimingCount = done_count; + mtx_unlock(&swapchain->present_timing.lock); + return VK_SUCCESS; + } + + VK_OUTARRAY_MAKE_TYPED(VkPastPresentationTimingEXT, timings, + pPastPresentationTimingProperties->pPresentationTimings, + &pPastPresentationTimingProperties->presentationTimingCount); + + uint32_t new_timings_count = 0; + bool stop_timing_removal = false; + + for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) { + const struct wsi_presentation_timing *in_timing = &swapchain->present_timing.timings[i]; + + if (!swapchain->present_timing.timings[i].complete || stop_timing_removal) { + /* Keep output ordered to be compliant without having to re-sort every time. + * Queue depth for timestamps is expected to be small. */ + swapchain->present_timing.timings[new_timings_count++] = swapchain->present_timing.timings[i]; + if (!out_of_order) + stop_timing_removal = true; + continue; + } + + vk_outarray_append_typed(VkPastPresentationTimingEXT, &timings, timing) { + timing->targetTime = swapchain->present_timing.timings[i].target_time; + timing->presentId = in_timing->present_id; + timing->timeDomain = swapchain->present_timing.time_domain; + timing->timeDomainId = 0; + timing->reportComplete = in_timing->complete; + + /* No INCOMPLETE is reported here. Failures are silent. + * However, application already knows upper bound for stage count based on the query, + * so this should never fail. */ + VK_OUTARRAY_MAKE_TYPED(VkPresentStageTimeEXT, stages, timing->pPresentStages, &timing->presentStageCount); + + if (in_timing->requested_feedback & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) { + vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) { + stage->stage = VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT; + stage->time = in_timing->queue_done_time; + } + } + + if (in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) { + vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) { + stage->stage = in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT; + /* It is expected that implementation will only expose one timing value. */ + assert(util_bitcount(stage->stage) == 1); + stage->time = in_timing->complete_time; + } + } + } + } + + swapchain->present_timing.timings_count = new_timings_count; + vr = vk_outarray_status(&timings); + + /* This function is fully atomic within implementation, so have to be thread safe. */ + mtx_unlock(&swapchain->present_timing.lock); + return vr; +} + +VKAPI_ATTR VkResult VKAPI_CALL +wsi_GetSwapchainTimeDomainPropertiesEXT( + VkDevice device, + VkSwapchainKHR swapchain_, + VkSwapchainTimeDomainPropertiesEXT* pSwapchainTimeDomainProperties, + uint64_t* pTimeDomainsCounter) +{ + VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_); + + /* We don't change time domains. Everything is static. */ + if (pTimeDomainsCounter) + *pTimeDomainsCounter = 1; + + /* This style is a bit goofy and doesn't map cleanly to anything. */ + if (!pSwapchainTimeDomainProperties->pTimeDomainIds && !pSwapchainTimeDomainProperties->pTimeDomains) { + pSwapchainTimeDomainProperties->timeDomainCount = 1; + return VK_SUCCESS; + } else if (pSwapchainTimeDomainProperties->timeDomainCount == 0) { + return VK_INCOMPLETE; + } + + pSwapchainTimeDomainProperties->timeDomainCount = 1; + if (pSwapchainTimeDomainProperties->pTimeDomains) + *pSwapchainTimeDomainProperties->pTimeDomains = swapchain->present_timing.time_domain; + if (pSwapchainTimeDomainProperties->pTimeDomainIds) + *pSwapchainTimeDomainProperties->pTimeDomainIds = 0; + + return VK_SUCCESS; +} + +VKAPI_ATTR VkResult VKAPI_CALL +wsi_GetSwapchainTimingPropertiesEXT( + VkDevice device, + VkSwapchainKHR swapchain_, + VkSwapchainTimingPropertiesEXT* pSwapchainTimingProperties, + uint64_t* pSwapchainTimingPropertiesCounter) +{ + VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_); + VkResult vr; + + mtx_lock(&swapchain->present_timing.lock); + /* If we don't have data yet, must return VK_NOT_READY. */ + vr = swapchain->present_timing.refresh_counter ? VK_SUCCESS : VK_NOT_READY; + pSwapchainTimingProperties->refreshInterval = swapchain->present_timing.refresh_interval; + pSwapchainTimingProperties->refreshDuration = swapchain->present_timing.refresh_duration; + if (pSwapchainTimingPropertiesCounter) + *pSwapchainTimingPropertiesCounter = swapchain->present_timing.refresh_counter; + mtx_unlock(&swapchain->present_timing.lock); + return vr; +} + +VKAPI_ATTR VkResult VKAPI_CALL +wsi_SetSwapchainPresentTimingQueueSizeEXT( + VkDevice device, + VkSwapchainKHR swapchain_, + uint32_t size) +{ + VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_); + assert(swapchain->present_timing.active); + VkResult vr = VK_SUCCESS; + + mtx_lock(&swapchain->present_timing.lock); + + if (size < swapchain->present_timing.timings_count) { + vr = VK_NOT_READY; + goto error; + } + + if (size > swapchain->present_timing.timings_capacity) { + void *new_ptr = vk_realloc(&swapchain->alloc, swapchain->present_timing.timings, + sizeof(*swapchain->present_timing.timings) * size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (new_ptr) { + swapchain->present_timing.timings = new_ptr; + swapchain->present_timing.timings_capacity = size; + } else { + vr = VK_ERROR_OUT_OF_HOST_MEMORY; + goto error; + } + } else { + swapchain->present_timing.timings_capacity = size; + } + +error: + mtx_unlock(&swapchain->present_timing.lock); + return vr; +} + VkDeviceMemory wsi_common_get_memory(VkSwapchainKHR _swapchain, uint32_t index) { @@ -1521,6 +1799,38 @@ wsi_common_queue_present(const struct wsi_device *wsi, vk_find_struct_const(pPresentInfo->pNext, PRESENT_ID_2_KHR); const VkSwapchainPresentFenceInfoKHR *present_fence_info = vk_find_struct_const(pPresentInfo->pNext, SWAPCHAIN_PRESENT_FENCE_INFO_KHR); + const VkPresentTimingsInfoEXT *present_timings_info = + vk_find_struct_const(pPresentInfo->pNext, PRESENT_TIMINGS_INFO_EXT); + + if (present_timings_info) { + /* If we fail a present due to full queue, it's a little unclear from + * spec if we should treat it as OUT_OF_DATE or OUT_OF_HOST_MEMORY for + * purposes of signaling. Validation layers and at least one other implementation + * in the wild seems to treat it as OUT_OF_DATE, so do that. */ + for (uint32_t i = 0; i < present_timings_info->swapchainCount; i++) { + const VkPresentTimingInfoEXT *info = &present_timings_info->pTimingInfos[i]; + VK_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]); + if (results[i] != VK_SUCCESS || !swapchain->set_timing_request) + continue; + + assert(swapchain->present_timing.active); + + /* EXT_present_timing is defined to only work with present_id2. + * It's only used when reporting back timings. */ + results[i] = wsi_common_allocate_timing_request( + swapchain, info, present_ids2 ? present_ids2->pPresentIds[i] : 0); + + /* Application is responsible for allocating sufficient size here. + * We fail with VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT if application is bugged. */ + if (results[i] == VK_SUCCESS) { + swapchain->set_timing_request(swapchain, &(struct wsi_image_timing_request) { + .serial = swapchain->present_timing.serial, + .time = info->targetTime, + .flags = info->flags, + }); + } + } + } /* Gather up all the semaphores and fences we need to signal per-image */ STACK_ARRAY(struct wsi_image_signal_info, image_signal_infos, @@ -1612,14 +1922,27 @@ wsi_common_queue_present(const struct wsi_device *wsi, struct wsi_image *image = swapchain->get_wsi_image(swapchain, image_index); + bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT && + swapchain->blit.queue != NULL; + + /* For TIMING_QUEUE_FULL_EXT, ensure sync objects are signaled, + * but don't do any real work. */ + if (results[i] == VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT || !separate_queue_blit) { + for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) { + signal_semaphore_infos[signal_semaphore_count++] = + image_signal_infos[i].semaphore_infos[j]; + } + for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++) + fences[fence_count++] = image_signal_infos[i].fences[j]; + } + if (results[i] != VK_SUCCESS) continue; /* If we're blitting on another swapchain, just signal the blit * semaphore for now. */ - if (swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT && - swapchain->blit.queue != NULL) { + if (separate_queue_blit) { /* Create the blit semaphore if needed */ if (swapchain->blit.semaphores[image_index] == VK_NULL_HANDLE) { const VkSemaphoreCreateInfo sem_info = { @@ -1650,13 +1973,6 @@ wsi_common_queue_present(const struct wsi_device *wsi, image->blit.cmd_buffers[queue->queue_family_index], }; } - - for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) { - signal_semaphore_infos[signal_semaphore_count++] = - image_signal_infos[i].semaphore_infos[j]; - } - for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++) - fences[fence_count++] = image_signal_infos[i].fences[j]; } const VkSubmitInfo2 submit_info = { @@ -1693,8 +2009,10 @@ wsi_common_queue_present(const struct wsi_device *wsi, if (results[i] != VK_SUCCESS) continue; - if (swapchain->blit.type == WSI_SWAPCHAIN_NO_BLIT || - swapchain->blit.queue == NULL) + bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT && + swapchain->blit.queue != NULL; + + if (!separate_queue_blit) continue; const VkSemaphoreSubmitInfo blit_semaphore_info = { diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h index 9fe64cfba95..fb9bfd8304b 100644 --- a/src/vulkan/wsi/wsi_common_private.h +++ b/src/vulkan/wsi/wsi_common_private.h @@ -190,6 +190,22 @@ struct wsi_image { void *cpu_map; }; +struct wsi_presentation_timing { + uint64_t present_id; + uint64_t target_time; + uint64_t serial; + uint64_t queue_done_time; /* GPU timestamp based. */ + uint64_t complete_time; /* Best effort timestamp we get from backend. */ + VkPresentStageFlagsEXT requested_feedback; + VkBool32 complete; +}; + +struct wsi_image_timing_request { + uint64_t serial; + uint64_t time; + VkPresentTimingInfoFlagsEXT flags; +}; + struct wsi_swapchain { struct vk_object_base base; @@ -237,6 +253,26 @@ struct wsi_swapchain { struct vk_queue *queue; } blit; + struct { + mtx_t lock; + bool active; + + struct wsi_presentation_timing *timings; + size_t timings_capacity; + size_t timings_count; + + size_t serial; + + /* Maps to Vulkan spec definitions. */ + uint64_t refresh_duration; + uint64_t refresh_interval; + /* When 0, we don't know yet. Every time the refresh rate changes, + * increase this counter. This counter must also be passed in GetPastTimings. */ + uint64_t refresh_counter; + + VkTimeDomainKHR time_domain; + } present_timing; + bool capture_key_pressed; /* Command pools, one per queue family */ @@ -266,6 +302,10 @@ struct wsi_swapchain { VkPresentModeKHR mode); void (*set_hdr_metadata)(struct wsi_swapchain *swap_chain, const VkHdrMetadataEXT* pMetadata); + void (*set_timing_request)(struct wsi_swapchain *swap_chain, + const struct wsi_image_timing_request *request); + void (*poll_timing_request)(struct wsi_swapchain *swap_chain); + uint64_t (*poll_early_refresh)(struct wsi_swapchain *swap_chain); }; bool @@ -377,6 +417,15 @@ VkResult wsi_swapchain_wait_for_present_semaphore(const struct wsi_swapchain *chain, uint64_t present_id, uint64_t timeout); +void +wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain, + uint64_t timing_serial, uint64_t timestamp); + +void +wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain, + uint64_t refresh_duration, uint64_t refresh_interval, + int minimum_delta_for_update); + #ifdef HAVE_LIBDRM VkResult wsi_prepare_signal_dma_buf_from_semaphore(struct wsi_swapchain *chain, From 8244c68da9609ae7d8cb6bccb810ee3ab09d5cc5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 1 Dec 2025 16:27:08 +0100 Subject: [PATCH 03/17] vulkan/wsi: Add no-op present timing support to most backends. Signed-off-by: Hans-Kristian Arntzen --- src/vulkan/wsi/wsi_common_display.c | 10 ++++++++++ src/vulkan/wsi/wsi_common_headless.c | 10 ++++++++++ src/vulkan/wsi/wsi_common_metal.c | 10 ++++++++++ src/vulkan/wsi/wsi_common_wayland.c | 10 ++++++++++ src/vulkan/wsi/wsi_common_win32.cpp | 10 ++++++++++ 5 files changed, 50 insertions(+) diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index 4c9451a6d00..53464a7416b 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -1319,6 +1319,16 @@ wsi_display_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface, break; } + case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { + VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; + + wait->presentStageQueries = 0; + wait->presentTimingSupported = VK_FALSE; + wait->presentAtAbsoluteTimeSupported = VK_FALSE; + wait->presentAtRelativeTimeSupported = VK_FALSE; + break; + } + default: /* Ignored */ break; diff --git a/src/vulkan/wsi/wsi_common_headless.c b/src/vulkan/wsi/wsi_common_headless.c index 46db5a49f09..807148b4f30 100644 --- a/src/vulkan/wsi/wsi_common_headless.c +++ b/src/vulkan/wsi/wsi_common_headless.c @@ -112,6 +112,16 @@ wsi_headless_surface_get_capabilities2(VkIcdSurfaceBase *surface, break; } + case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { + VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; + + wait->presentStageQueries = 0; + wait->presentTimingSupported = VK_FALSE; + wait->presentAtAbsoluteTimeSupported = VK_FALSE; + wait->presentAtRelativeTimeSupported = VK_FALSE; + break; + } + default: /* Ignored */ break; diff --git a/src/vulkan/wsi/wsi_common_metal.c b/src/vulkan/wsi/wsi_common_metal.c index 857de8959e1..2b9176c6719 100644 --- a/src/vulkan/wsi/wsi_common_metal.c +++ b/src/vulkan/wsi/wsi_common_metal.c @@ -139,6 +139,16 @@ wsi_metal_surface_get_capabilities2(VkIcdSurfaceBase *surface, break; } + case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { + VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; + + wait->presentStageQueries = 0; + wait->presentTimingSupported = VK_FALSE; + wait->presentAtAbsoluteTimeSupported = VK_FALSE; + wait->presentAtRelativeTimeSupported = VK_FALSE; + break; + } + default: /* Ignored */ break; diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 083777f42ac..38b9ed61ba1 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -1914,6 +1914,16 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface, break; } + case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { + VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; + + wait->presentStageQueries = 0; + wait->presentTimingSupported = VK_FALSE; + wait->presentAtAbsoluteTimeSupported = VK_FALSE; + wait->presentAtRelativeTimeSupported = VK_FALSE; + break; + } + default: /* Ignored */ break; diff --git a/src/vulkan/wsi/wsi_common_win32.cpp b/src/vulkan/wsi/wsi_common_win32.cpp index 03b84771b36..df41fd967c4 100644 --- a/src/vulkan/wsi/wsi_common_win32.cpp +++ b/src/vulkan/wsi/wsi_common_win32.cpp @@ -276,6 +276,16 @@ wsi_win32_surface_get_capabilities2(VkIcdSurfaceBase *surface, break; } + case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { + VkPresentTimingSurfaceCapabilitiesEXT *wait = (VkPresentTimingSurfaceCapabilitiesEXT *)ext; + + wait->presentStageQueries = 0; + wait->presentTimingSupported = VK_FALSE; + wait->presentAtAbsoluteTimeSupported = VK_FALSE; + wait->presentAtRelativeTimeSupported = VK_FALSE; + break; + } + default: /* Ignored */ break; From 72acf1f8b13620a594140d6f0916a87c114c04a7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 1 Dec 2025 16:27:29 +0100 Subject: [PATCH 04/17] wsi/x11: Implement EXT_present_timing support. Implement basic support on Xwl due to limitations inherent to Xwl. Signed-off-by: Hans-Kristian Arntzen --- src/meson.build | 2 +- src/vulkan/wsi/meson.build | 4 + src/vulkan/wsi/wsi_common_x11.c | 536 +++++++++++++++++++++++++++++--- 3 files changed, 495 insertions(+), 47 deletions(-) diff --git a/src/meson.build b/src/meson.build index f27dae33631..1568cfa656e 100644 --- a/src/meson.build +++ b/src/meson.build @@ -49,7 +49,7 @@ endif if with_platform_x11 subdir('x11') endif -if with_gallium_or_lvp or with_gbm or with_platform_wayland +if with_gallium_or_lvp or with_gbm or with_platform_wayland or with_platform_x11 or with_platform_xcb subdir('loader') endif subdir('compiler') diff --git a/src/vulkan/wsi/meson.build b/src/vulkan/wsi/meson.build index 38a83d08eab..4fc2a9205f1 100644 --- a/src/vulkan/wsi/meson.build +++ b/src/vulkan/wsi/meson.build @@ -26,6 +26,10 @@ if with_platform_wayland files_vulkan_wsi += wp_files['color-management-v1'] endif +if with_platform_x11 or with_platform_xcb + links_vulkan_wsi += libloader +endif + if with_platform_windows files_vulkan_wsi += files('wsi_common_win32.cpp') platform_deps += dep_dxheaders diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index 544ddceca1d..e7f3d0ac2d8 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -64,6 +64,7 @@ #include "wsi_common_entrypoints.h" #include "wsi_common_private.h" #include "wsi_common_queue.h" +#include "loader/loader_dri_helper_screen.h" #ifdef HAVE_SYS_SHM_H #include @@ -79,7 +80,14 @@ #define MAX_DAMAGE_RECTS 64 -struct wsi_x11_connection { +struct x11_icd_surface_key { + xcb_connection_t *conn; + xcb_window_t window; + uint32_t padding; /* Makes struct memcmp compatible. */ +}; + +struct wsi_x11_icd_surface { + struct x11_icd_surface_key key; bool has_dri3; bool has_dri3_modifiers; bool has_dri3_explicit_sync; @@ -88,13 +96,80 @@ struct wsi_x11_connection { bool is_xwayland; bool has_mit_shm; bool has_xfixes; + + struct loader_screen_resources screen_resources; + bool screen_resources_valid; + mtx_t mtx; + + /* This holds the fallback for MSC rate, i.e. refresh rate. + * If we cannot get ahold of a stable estimate based on real feedback, + * we defer to using this. With multi-monitors and other potential effects affecting actual rates, + * we shouldn't trust this blindly. */ + uint64_t current_refresh_ns; }; +static uint64_t +x11_icd_surface_update_present_timing(struct wsi_x11_icd_surface *surface, uint32_t width, uint32_t height) +{ + uint64_t ret; + + if (!surface->screen_resources_valid) + return 0; + + mtx_lock(&surface->mtx); + loader_update_screen_resources(&surface->screen_resources); + + if (surface->screen_resources.num_crtcs == 0) { + surface->current_refresh_ns = 0; + goto out; + } + + surface->current_refresh_ns = + 1000000000ull * surface->screen_resources.crtcs[0].refresh_denominator / + surface->screen_resources.crtcs[0].refresh_numerator; + + /* Don't need to ponder multi-monitor. */ + if (surface->screen_resources.num_crtcs == 1) + goto out; + + /* Find the best matching screen for the window. */ + xcb_translate_coordinates_cookie_t cookie = + xcb_translate_coordinates_unchecked(surface->key.conn, surface->key.window, + surface->screen_resources.screen->root, 0, 0); + xcb_translate_coordinates_reply_t *reply = + xcb_translate_coordinates_reply(surface->key.conn, cookie, NULL); + + if (!reply) + goto out; + + int area = 0; + + for (unsigned c = 0; c < surface->screen_resources.num_crtcs; c++) { + struct loader_crtc_info *crtc = &surface->screen_resources.crtcs[c]; + + int c_area = box_intersection_area( + reply->dst_x, reply->dst_y, width, height, crtc->x, + crtc->y, crtc->width, crtc->height); + + if (c_area > area) { + surface->current_refresh_ns = 1000000000ull * crtc->refresh_denominator / crtc->refresh_numerator; + area = c_area; + } + } + + free(reply); + +out: + ret = surface->current_refresh_ns; + mtx_unlock(&surface->mtx); + return ret; +} + struct wsi_x11 { struct wsi_interface base; mtx_t mutex; - /* Hash table of xcb_connection -> wsi_x11_connection mappings */ + /* Hash table of xcb_connection -> wsi_x11_icd_surface mappings */ struct hash_table *connections; }; @@ -224,9 +299,9 @@ wsi_x11_detect_xwayland(xcb_connection_t *conn, return is_xwayland; } -static struct wsi_x11_connection * -wsi_x11_connection_create(struct wsi_device *wsi_dev, - xcb_connection_t *conn) +static struct wsi_x11_icd_surface * +wsi_x11_icd_surface_create(struct wsi_device *wsi_dev, + xcb_connection_t *conn, xcb_window_t window) { xcb_query_extension_cookie_t dri3_cookie, pres_cookie, randr_cookie, amd_cookie, nv_cookie, shm_cookie, sync_cookie, @@ -241,16 +316,19 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev, bool has_dri3_v1_4 = false; bool has_present_v1_4 = false; - /* wsi_x11_get_connection may be called from a thread, but we will never end up here on a worker thread, + /* wsi_x11_get_icd_surface may be called from a thread, but we will never end up here on a worker thread, * since the connection will always be in the hash-map, * so we will not violate Vulkan's rule on allocation callbacks w.r.t. * when it is allowed to call the allocation callbacks. */ - struct wsi_x11_connection *wsi_conn = - vk_alloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8, + struct wsi_x11_icd_surface *wsi_conn = + vk_zalloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (!wsi_conn) return NULL; + wsi_conn->key.conn = conn; + wsi_conn->key.window = window; + sync_cookie = xcb_query_extension(conn, 4, "SYNC"); dri3_cookie = xcb_query_extension(conn, 4, "DRI3"); pres_cookie = xcb_query_extension(conn, 7, "Present"); @@ -378,6 +456,27 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev, } #endif + if (window) { + /* This state is only necessary for dealing with present timing, and if we fail, we simply won't expose support. */ + xcb_get_geometry_cookie_t geometry_cookie = xcb_get_geometry_unchecked(conn, window); + xcb_get_geometry_reply_t *geometry_reply = xcb_get_geometry_reply(conn, geometry_cookie, NULL); + if (geometry_reply) { + xcb_screen_iterator_t it = xcb_setup_roots_iterator(xcb_get_setup(conn)); + xcb_screen_t *screen; + + for (screen = it.data; it.rem != 0; xcb_screen_next(&it), screen = it.data) { + if (screen->root == geometry_reply->root) { + loader_init_screen_resources(&wsi_conn->screen_resources, conn, screen); + wsi_conn->screen_resources_valid = true; + mtx_init(&wsi_conn->mtx, 0); + break; + } + } + + free(geometry_reply); + } + } + free(dri3_reply); free(pres_reply); free(randr_reply); @@ -392,14 +491,18 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev, } static void -wsi_x11_connection_destroy(struct wsi_device *wsi_dev, - struct wsi_x11_connection *conn) +wsi_x11_icd_surface_destroy(struct wsi_device *wsi_dev, + struct wsi_x11_icd_surface *conn) { + if (conn->screen_resources_valid) { + loader_destroy_screen_resources(&conn->screen_resources); + mtx_destroy(&conn->mtx); + } vk_free(&wsi_dev->instance_alloc, conn); } static bool -wsi_x11_check_for_dri3(struct wsi_x11_connection *wsi_conn) +wsi_x11_check_for_dri3(struct wsi_x11_icd_surface *wsi_conn) { if (wsi_conn->has_dri3) return true; @@ -418,35 +521,37 @@ wsi_x11_check_for_dri3(struct wsi_x11_connection *wsi_conn) * * If the allocation fails NULL is returned. */ -static struct wsi_x11_connection * -wsi_x11_get_connection(struct wsi_device *wsi_dev, - xcb_connection_t *conn) +static struct wsi_x11_icd_surface * +wsi_x11_get_icd_surface(struct wsi_device *wsi_dev, + xcb_connection_t *conn, xcb_window_t window) { struct wsi_x11 *wsi = (struct wsi_x11 *)wsi_dev->wsi[VK_ICD_WSI_PLATFORM_XCB]; mtx_lock(&wsi->mutex); - struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn); + struct x11_icd_surface_key key = { .conn = conn, .window = window }; + + struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, &key); if (!entry) { /* We're about to make a bunch of blocking calls. Let's drop the * mutex for now so we don't block up too badly. */ mtx_unlock(&wsi->mutex); - struct wsi_x11_connection *wsi_conn = - wsi_x11_connection_create(wsi_dev, conn); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_icd_surface_create(wsi_dev, conn, window); if (!wsi_conn) return NULL; mtx_lock(&wsi->mutex); - entry = _mesa_hash_table_search(wsi->connections, conn); + entry = _mesa_hash_table_search(wsi->connections, &wsi_conn->key); if (entry) { /* Oops, someone raced us to it */ - wsi_x11_connection_destroy(wsi_dev, wsi_conn); + wsi_x11_icd_surface_destroy(wsi_dev, wsi_conn); } else { - entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn); + entry = _mesa_hash_table_insert(wsi->connections, &wsi_conn->key, wsi_conn); } } @@ -593,8 +698,8 @@ wsi_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice, if (!(wsi_device->queue_supports_blit & BITFIELD64_BIT(queueFamilyIndex))) return false; - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(wsi_device, connection); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface(wsi_device, connection, 0); if (!wsi_conn) return false; @@ -669,8 +774,8 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface, xcb_connection_t *conn = x11_surface_get_connection(icd_surface); xcb_window_t window = x11_surface_get_window(icd_surface); - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(wsi_device, conn); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface(wsi_device, conn, window); if (!wsi_conn) return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -722,7 +827,7 @@ x11_get_min_image_count(const struct wsi_device *wsi_device, bool is_xwayland) static unsigned x11_get_min_image_count_for_present_mode(struct wsi_device *wsi_device, - struct wsi_x11_connection *wsi_conn, + struct wsi_x11_icd_surface *wsi_conn, VkPresentModeKHR present_mode); static VkResult @@ -734,8 +839,8 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, xcb_connection_t *conn = x11_surface_get_connection(icd_surface); xcb_window_t window = x11_surface_get_window(icd_surface); struct wsi_x11_vk_surface *surface = (struct wsi_x11_vk_surface*)icd_surface; - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(wsi_device, conn); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface(wsi_device, conn, window); xcb_get_geometry_cookie_t geom_cookie; xcb_generic_error_t *err; xcb_get_geometry_reply_t *geom; @@ -863,6 +968,52 @@ x11_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface, break; } + case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { + VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; + + xcb_connection_t *conn = x11_surface_get_connection(icd_surface); + xcb_window_t window = x11_surface_get_window(icd_surface); + struct wsi_x11_icd_surface *wsi_conn = wsi_x11_get_icd_surface(wsi_device, conn, window); + + wait->presentStageQueries = 0; + wait->presentTimingSupported = VK_FALSE; + wait->presentAtAbsoluteTimeSupported = VK_FALSE; + wait->presentAtRelativeTimeSupported = VK_FALSE; + + /* If we cannot query modes for a screen, it's not possible to get reliable timings. */ + if (!wsi_conn->screen_resources_valid) + break; + + wait->presentTimingSupported = VK_TRUE; + + if (wsi_conn->is_xwayland) { + /* Wayland COMPLETE is tied to fence callback, so that's what we'll report. + * For pure frame pacing support, this is likely fine. */ + wait->presentStageQueries = VK_PRESENT_STAGE_REQUEST_DEQUEUED_BIT_EXT; + + /* Xwayland cannot get a reliable refresh rate estimate since MSC is not tied to monitor refresh at all. + * However, it's pragmatically very important to expose some baseline Xwl support since + * a large amount of applications (mostly games) rely on X11 APIs. + * + * Relative timings are easier to deal with since errors against an absolute timer are more or less expected, + * and it's sufficient for implementing present intervals in GL/D3D, etc, but likely not for + * tight A/V sync in e.g. media players, but those should be using Wayland when available anyway. + * As per-spec the timing request we provide should correlate with PIXEL_VISIBLE_BIT stage, + * but when we only observe dequeue, that's not really possible, but relative timings don't have that problem. + * + * There is PRESENT_CAPABILITY_UST, which would help, but xserver does not implement it at all. + */ + wait->presentAtRelativeTimeSupported = VK_TRUE; + } else { + /* COMPLETE should be tied to page flip on native X11. */ + wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT; + wait->presentAtAbsoluteTimeSupported = VK_TRUE; + wait->presentAtRelativeTimeSupported = VK_TRUE; + } + + break; + } + default: /* Ignored */ break; @@ -1092,6 +1243,7 @@ wsi_CreateXlibSurfaceKHR(VkInstance _instance, struct x11_image_pending_completion { uint32_t serial; uint64_t signal_present_id; + uint64_t timing_serial; }; struct x11_image { @@ -1108,6 +1260,7 @@ struct x11_image { VkPresentModeKHR present_mode; xcb_rectangle_t rects[MAX_DAMAGE_RECTS]; int rectangle_count; + struct wsi_image_timing_request timing_request; /* In IMMEDIATE and MAILBOX modes, we can have multiple pending presentations per image. * We need to keep track of them when considering present ID. */ @@ -1125,12 +1278,19 @@ struct x11_image { #endif }; +struct x11_present_timing_entry { + uint64_t msc; + uint64_t ust; +}; +#define X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE 16 + struct x11_swapchain { struct wsi_swapchain base; bool has_dri3_modifiers; bool has_mit_shm; bool has_async_may_tear; + bool has_reliable_msc; xcb_connection_t * conn; xcb_window_t window; @@ -1144,9 +1304,13 @@ struct x11_swapchain { xcb_special_event_t * special_event; uint64_t send_sbc; uint64_t last_present_msc; + uint64_t next_present_ust_lower_bound; uint32_t stamp; uint32_t sent_image_count; + struct x11_present_timing_entry present_timing_window[X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE]; + uint32_t present_timing_window_index; + atomic_int status; bool copy_is_suboptimal; struct wsi_queue present_queue; @@ -1168,14 +1332,121 @@ struct x11_swapchain { uint64_t present_id; VkResult present_progress_error; + struct wsi_image_timing_request timing_request; + bool msc_estimate_is_stable; + struct x11_image images[0]; }; VK_DEFINE_NONDISP_HANDLE_CASTS(x11_swapchain, base.base, VkSwapchainKHR, VK_OBJECT_TYPE_SWAPCHAIN_KHR) -static void x11_present_complete(struct x11_swapchain *swapchain, - struct x11_image *image, uint32_t index) +static bool x11_refresh_rate_estimate_is_stable(struct x11_swapchain *swapchain, uint64_t base_rate) { + /* Only accept a refresh rate estimate if it's *very* stable. + * Keith's old GOOGLE_display_timing MR suggests that using this estimate is better than blindly + * accepting the modeline in some cases. + * When running in VRR modes, the MSC will appear to be highly unstable, and we cannot accept those estimates. */ + + for (int i = 0; i < X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE; i++) { + const struct x11_present_timing_entry *a = + &swapchain->present_timing_window[i]; + const struct x11_present_timing_entry *b = + &swapchain->present_timing_window[(i + 1) % X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE]; + + if (!a->msc || !b->msc) + continue; + + uint64_t ust_delta = MAX2(a->ust, b->ust) - MIN2(a->ust, b->ust); + uint64_t msc_delta = MAX2(a->msc, b->msc) - MIN2(a->msc, b->msc); + + if (msc_delta == 0) + continue; + + uint64_t refresh_ns = 1000 * ust_delta / msc_delta; + + /* The true UST values are expected to be quite accurate. + * Anything more than 10us difference in rate is considered unstable. + * If the MSC is driven by GPU progress in VRR mode, + * it's extremely unlikely that they are paced *perfectly* for 16 frames in a row. */ + if (llabs((int64_t)base_rate - (int64_t)refresh_ns) > 10000) + return false; + } + + return true; +} + +static void x11_present_update_refresh_cycle_estimate(struct x11_swapchain *swapchain, + uint64_t msc, uint64_t ust) +{ + struct wsi_x11_icd_surface *surface = wsi_x11_get_icd_surface( + (struct wsi_device*)swapchain->base.wsi, swapchain->conn, swapchain->window); + + mtx_lock(&surface->mtx); + uint64_t randr_refresh_ns = surface->current_refresh_ns; + mtx_unlock(&surface->mtx); + + swapchain->present_timing_window_index = + (swapchain->present_timing_window_index + 1) % X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE; + struct x11_present_timing_entry *entry = &swapchain->present_timing_window[swapchain->present_timing_window_index]; + + if (!swapchain->has_reliable_msc) { + /* If we don't have reliable MSC, we always trust the fallback RANDR query. + * We have no idea if we're FRR or VRR. */ + wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, randr_refresh_ns, 0, 0); + entry->msc = msc; + entry->ust = ust; + return; + } + + /* Try to get an initial estimate as quickly as possible, we will refine it over time. */ + if (entry->msc == 0) + entry = &swapchain->present_timing_window[1]; + + if (entry->msc != 0) { + uint64_t msc_delta = msc - entry->msc; + + /* Safeguard against any weird interactions with IMMEDIATE. */ + if (msc_delta != 0) { + uint64_t ust_delta = 1000 * (ust - entry->ust); + uint64_t refresh_ns = ust_delta / msc_delta; + + swapchain->msc_estimate_is_stable = x11_refresh_rate_estimate_is_stable(swapchain, refresh_ns); + + if (swapchain->msc_estimate_is_stable) { + /* If MSC is tightly locked in, we can safely make the assumption we're in FRR mode. + * It's possible we're technically doing VRR, but if we're rendering at above monitor refresh + * rate consistently, then there is no meaningful difference anyway. */ + + /* Our refresh rates are only estimates, so expect some deviation (+/- 1us). */ + wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, refresh_ns, refresh_ns, 1000); + } else { + /* If we have enabled adaptive sync, and we're seeing highly irregular MSC values, we assume + * we're driving the display VRR. */ + uint64_t refresh_interval = swapchain->base.wsi->enable_adaptive_sync ? UINT64_MAX : 0; + wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, randr_refresh_ns, refresh_interval, 0); + } + } + } + + entry = &swapchain->present_timing_window[swapchain->present_timing_window_index]; + entry->msc = msc; + entry->ust = ust; +} + +static void x11_present_complete(struct x11_swapchain *swapchain, + struct x11_image *image, uint32_t index, + uint64_t msc, uint64_t ust) +{ + /* Update estimate for refresh rate. */ + if (swapchain->base.present_timing.active) + x11_present_update_refresh_cycle_estimate(swapchain, msc, ust); + + /* Make sure to signal present timings before signalling present wait, + * this way we get minimal latency for reports. */ + uint64_t timing_serial = image->pending_completions[index].timing_serial; + if (timing_serial) + wsi_swapchain_present_timing_notify_completion(&swapchain->base, timing_serial, ust * 1000); + uint64_t signal_present_id = image->pending_completions[index].signal_present_id; if (signal_present_id) { mtx_lock(&swapchain->present_progress_mutex); @@ -1327,6 +1598,16 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain, return VK_SUBOPTIMAL_KHR; } + if (chain->base.present_timing.active) { + /* It's possible that we have multiple monitors and moving windows around change the effective rate. + * Lots of logic reused from platform_x11.c. */ + + /* TODO: Should we rate-limit this query? */ + struct wsi_x11_icd_surface *surface = wsi_x11_get_icd_surface( + (struct wsi_device *)chain->base.wsi, chain->conn, chain->window); + x11_icd_surface_update_present_timing(surface, config->width, config->height); + } + break; } @@ -1348,13 +1629,14 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain, case XCB_PRESENT_EVENT_COMPLETE_NOTIFY: { xcb_present_complete_notify_event_t *complete = (void *) event; + uint64_t ust = MAX2(complete->ust, chain->next_present_ust_lower_bound); if (complete->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) { unsigned i, j; for (i = 0; i < chain->base.image_count; i++) { struct x11_image *image = &chain->images[i]; for (j = 0; j < image->present_queued_count; j++) { if (image->pending_completions[j].serial == complete->serial) { - x11_present_complete(chain, image, j); + x11_present_complete(chain, image, j, complete->msc, ust); } } } @@ -1424,8 +1706,8 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index, int64_t divisor = 0; int64_t remainder = 0; - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window); if (!wsi_conn) return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -1457,6 +1739,7 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index, (struct x11_image_pending_completion) { .signal_present_id = image->present_id, .serial = serial, + .timing_serial = image->timing_request.serial, }; xcb_void_cookie_t cookie; @@ -1654,7 +1937,7 @@ static VkResult x11_swapchain_read_status_atomic(struct x11_swapchain *chain) */ static bool x11_needs_wait_for_fences(const struct wsi_device *wsi_device, - struct wsi_x11_connection *wsi_conn, + struct wsi_x11_icd_surface *wsi_conn, VkPresentModeKHR present_mode) { if (wsi_conn->is_xwayland && !wsi_device->x11.xwaylandWaitReady) { @@ -1676,7 +1959,7 @@ x11_needs_wait_for_fences(const struct wsi_device *wsi_device, static bool x11_requires_mailbox_image_count(const struct wsi_device *device, - struct wsi_x11_connection *wsi_conn, + struct wsi_x11_icd_surface *wsi_conn, VkPresentModeKHR present_mode) { /* If we're resorting to wait for fences, we're assuming a MAILBOX-like model, @@ -1773,6 +2056,23 @@ x11_set_present_mode(struct wsi_swapchain *wsi_chain, chain->base.present_mode = mode; } +static void +x11_set_timing_request(struct wsi_swapchain *wsi_chain, + const struct wsi_image_timing_request *request) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain; + chain->timing_request = *request; +} + +static uint64_t +x11_poll_early_refresh(struct wsi_swapchain *wsi_chain) +{ + struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain; + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window); + return x11_icd_surface_update_present_timing(wsi_conn, chain->extent.width, chain->extent.height); +} + /** * Acquire a ready-to-use image from the swapchain. * @@ -1874,6 +2174,8 @@ x11_queue_present(struct wsi_swapchain *wsi_chain, chain->images[image_index].present_id = present_id; /* With KHR_swapchain_maintenance1, the present mode can change per present. */ chain->images[image_index].present_mode = chain->base.present_mode; + chain->images[image_index].timing_request = chain->timing_request; + memset(&chain->timing_request, 0, sizeof(chain->timing_request)); wsi_queue_push(&chain->present_queue, image_index); return x11_swapchain_read_status_atomic(chain); @@ -1974,6 +2276,125 @@ x11_manage_event_queue(void *state) return 0; } +static uint64_t +x11_present_compute_target_msc(struct x11_swapchain *chain, + const struct wsi_image_timing_request *request, + uint64_t minimum_msc) +{ + const struct x11_present_timing_entry *entry = &chain->present_timing_window[chain->present_timing_window_index]; + bool relative = (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_RELATIVE_TIME_BIT_EXT) != 0; + + /* Just use the FIFO derived MSC. From spec on relative: + * "If the swapchain has never been used to present an image, the provided targetTime is ignored." */ + if (!request->serial || !request->time || (relative && !entry->ust)) + return minimum_msc; + + int64_t target_ns; + + mtx_lock(&chain->base.present_timing.lock); + + /* Present timing is only defined to work with FIFO modes, so we can rely on having + * reliable relative timings, since we block for COMPLETE to come through before we queue up more presents. */ + if (relative) { + /* If application is trying to drive us at refresh rate, FIFO will take care of it. + * Don't end up in a situation where we sleep and miss the deadline by mistake. */ + if (!chain->has_reliable_msc) { + uint64_t relative_threshold; + if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) + relative_threshold = 3 * chain->base.present_timing.refresh_duration / 2; + else + relative_threshold = chain->base.present_timing.refresh_duration; + + if (request->time <= relative_threshold) { + mtx_unlock(&chain->base.present_timing.lock); + return minimum_msc; + } + } + target_ns = 1000 * (int64_t)entry->ust + (int64_t)request->time; + } else { + target_ns = (int64_t)request->time; + } + + /* Snap to nearest half refresh. This only makes sense for FRR, but it is the application's + * responsibility to not use this for VRR. If this flag is not used, this is strictly a "not before". */ + if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) + target_ns -= (int64_t)chain->base.present_timing.refresh_duration / 2; + + if (entry->msc && chain->base.present_timing.refresh_duration != 0 && + chain->msc_estimate_is_stable && chain->has_reliable_msc) { + /* If we can trust MSC to be a stable FRR heartbeat, we sync to that. */ + uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0); + uint64_t periods = (delta_time_ns + chain->base.present_timing.refresh_duration - 1) / + chain->base.present_timing.refresh_duration; + mtx_unlock(&chain->base.present_timing.lock); + + /* Xwl cannot understand MSC that jumps by more than 1. It appears that if there are MSC jumps above 1, + * each MSC cycle is padded by 16.6ms or something like that. + * If we want to target specific time, we must sleep to achieve that until Xwl improves. + * Fortunately, we're on a submit thread, so that is mostly an acceptable solution. */ + minimum_msc = MAX2(minimum_msc, entry->msc + periods); + } else { + /* If we don't have a stable estimate (e.g. true VRR, or Xwl) we just sleep until deadline. + * This relies on timebase on os_time_nanosleep is MONOTONIC as well as UST being MONOTONIC. */ + + if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) { + if (!chain->has_reliable_msc && chain->base.present_timing.refresh_duration) { + uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0); + uint64_t periods = delta_time_ns / chain->base.present_timing.refresh_duration; + + target_ns = 1000ull * entry->ust + periods * chain->base.present_timing.refresh_duration; + + /* Set a minimum target that is very close to the real estimate. + * This way, we ensure that we don't regularly round estimates up in + * chain->next_present_ust_lower_bound. */ + target_ns += 63 * chain->base.present_timing.refresh_duration / 64; + } + } + + if (chain->has_reliable_msc) { + /* Very regular sleeping can trigger a strange feedback loop where MSC estimates becomes stable enough + * that we accept it as stable MSC. Perturb the rates enough to make it extremely unlikely + * we accept sleeping patterns as ground truth rate, introduce a 50 us error between each timestamp, + * which should avoid the 10 us check reliably. If sleep quantas are not as accurate, it's extremely unlikely + * we get a stable pace anyway. TODO: Is there a more reliable way? */ + + /* On Xwl we never accept MSC estimates as ground truth, so ignore this perturbation. */ + target_ns += 50000ll * (chain->present_timing_window_index & 1) - 25000; + target_ns = MAX2(target_ns, 0); + } + + /* If we're on Xwl or VRR X11 and trying to target a specific cycle by sleeping, pull back the sleep a bit. + * We will be racing against time once we wake up to send the request to Xwl -> Wayland -> frame callback -> COMPLETE. + * If target_ns syncs well to a refresh cycle, we speculate that COMPLETE will come through at about target_ns. */ + + /* To get proper pace on an actual VRR display, we will have to detect if we're presenting too early + * compared to what application actually expected. + * In that case, we need to remove this compensation if we detect that presents come in too early. + * Effectively, we will need to adjust the report UST up if we somehow end up seeing a timestamp too early. + * The relative refresh will feed off this adjustment in a tight loop, so this should be pretty solid + * for both VRR and FRR. Present timing can only be used with FIFO modes, i.e. we will not overwrite this + * until the present is actually complete. */ + chain->next_present_ust_lower_bound = target_ns / 1000; + + /* We also need to pull back the sleep a bit to account for X.org roundtrip delays. + * Allow up to 4ms of error here. */ + int64_t eager_present_ns = MIN2((int64_t)chain->base.present_timing.refresh_duration / 4, 4 * 1000 * 1000); + target_ns -= eager_present_ns; + target_ns = MAX2(target_ns, 0); + + mtx_unlock(&chain->base.present_timing.lock); + mtx_unlock(&chain->thread_state_lock); + + os_time_nanosleep_until(target_ns); + + /* Reacquiring the lock won't change any invariants for us, so this is fine. + * We make sure to check chain->status after this function in case that got updated while we were sleeping. */ + mtx_lock(&chain->thread_state_lock); + } + + return minimum_msc; +} + /** * Presentation thread. * @@ -1991,8 +2412,8 @@ static int x11_manage_present_queue(void *state) { struct x11_swapchain *chain = state; - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window); VkResult result = VK_SUCCESS; u_thread_setname("WSI swapchain queue"); @@ -2040,6 +2461,8 @@ x11_manage_present_queue(void *state) u_cnd_monotonic_wait(&chain->thread_state_cond, &chain->thread_state_lock); } + target_msc = x11_present_compute_target_msc(chain, &chain->images[image_index].timing_request, target_msc); + if (chain->status < 0) { mtx_unlock(&chain->thread_state_lock); break; @@ -2315,7 +2738,7 @@ wsi_x11_recompute_dri3_modifier_hash(blake3_hash *hash, const struct wsi_drm_ima } static void -wsi_x11_get_dri3_modifiers(struct wsi_x11_connection *wsi_conn, +wsi_x11_get_dri3_modifiers(struct wsi_x11_icd_surface *wsi_conn, xcb_connection_t *conn, xcb_window_t window, uint8_t depth, uint8_t bpp, uint64_t **modifiers_in, uint32_t *num_modifiers_in, @@ -2402,8 +2825,8 @@ wsi_x11_swapchain_query_dri3_modifiers_changed(struct x11_swapchain *chain) uint64_t *modifiers[2] = {NULL, NULL}; uint32_t num_modifiers[2] = {0, 0}; - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window); xcb_get_geometry_reply_t *geometry = xcb_get_geometry_reply(chain->conn, xcb_get_geometry(chain->conn, chain->window), NULL); @@ -2551,7 +2974,7 @@ static VkResult x11_wait_for_present(struct wsi_swapchain *wsi_chain, static unsigned x11_get_min_image_count_for_present_mode(struct wsi_device *wsi_device, - struct wsi_x11_connection *wsi_conn, + struct wsi_x11_icd_surface *wsi_conn, VkPresentModeKHR present_mode) { uint32_t min_image_count = x11_get_min_image_count(wsi_device, wsi_conn->is_xwayland); @@ -2592,8 +3015,9 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, * representing it. */ xcb_connection_t *conn = x11_surface_get_connection(icd_surface); - struct wsi_x11_connection *wsi_conn = - wsi_x11_get_connection(wsi_device, conn); + xcb_window_t window = x11_surface_get_window(icd_surface); + struct wsi_x11_icd_surface *wsi_conn = + wsi_x11_get_icd_surface(wsi_device, conn, window); if (!wsi_conn) return VK_ERROR_OUT_OF_HOST_MEMORY; @@ -2613,7 +3037,6 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, } /* Check that we have a window up-front. It is an error to not have one. */ - xcb_window_t window = x11_surface_get_window(icd_surface); /* Get the geometry of that window. The bit depth of the swapchain will be fitted and the * chain's images extents should fit it for performance-optimizing flips. @@ -2736,8 +3159,14 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->base.wait_for_present2 = x11_wait_for_present; chain->base.release_images = x11_release_images; chain->base.set_present_mode = x11_set_present_mode; + chain->base.set_timing_request = x11_set_timing_request; + chain->base.poll_early_refresh = x11_poll_early_refresh; chain->base.present_mode = present_mode; chain->base.image_count = num_images; + + /* This is what Xserver is using. We cannot really query it, but we rely on it working. */ + chain->base.present_timing.time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR; + chain->conn = conn; chain->window = window; chain->depth = bit_depth; @@ -2749,6 +3178,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->has_dri3_modifiers = wsi_conn->has_dri3_modifiers; chain->has_mit_shm = wsi_conn->has_mit_shm; chain->has_async_may_tear = present_caps & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR; + chain->has_reliable_msc = !wsi_conn->is_xwayland; /* When images in the swapchain don't fit the window, X can still present them, but it won't * happen by flip, only by copy. So this is a suboptimal copy, because if the client would change @@ -2856,6 +3286,9 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, /* It is safe to set it here as only one swapchain can be associated with * the window, and swapchain creation does the association. At this point * we know the creation is going to succeed. */ + + /* If we have present timing, we need to make sure we get a useable estimate for refresh rate + * before we let the window run in full VRR. Once we have locked in the rate, we can enable VRR property. */ wsi_x11_set_adaptive_sync_property(conn, window, wsi_device->enable_adaptive_sync); @@ -2889,6 +3322,18 @@ fail_alloc: return result; } +static uint32_t x11_hash_icd_surface(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct x11_icd_surface_key)); +} + +static bool x11_icd_surface_equal(const void *a_, const void *b_) +{ + const struct x11_icd_surface_key *a = a_; + const struct x11_icd_surface_key *b = b_; + return a->conn == b->conn && a->window == b->window; +} + VkResult wsi_x11_init_wsi(struct wsi_device *wsi_device, const VkAllocationCallbacks *alloc, @@ -2916,8 +3361,7 @@ wsi_x11_init_wsi(struct wsi_device *wsi_device, goto fail_alloc; } - wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); + wsi->connections = _mesa_hash_table_create(NULL, x11_hash_icd_surface, x11_icd_surface_equal); if (!wsi->connections) { result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail_mutex; @@ -2981,7 +3425,7 @@ wsi_x11_finish_wsi(struct wsi_device *wsi_device, if (wsi) { hash_table_foreach(wsi->connections, entry) - wsi_x11_connection_destroy(wsi_device, entry->data); + wsi_x11_icd_surface_destroy(wsi_device, entry->data); _mesa_hash_table_destroy(wsi->connections, NULL); From 789c0bc19e9363dd0e874faa84b151c3d3f0b523 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 4 Dec 2025 13:52:02 +0100 Subject: [PATCH 05/17] wsi/wayland: Implement EXT_present_timing on Wayland. Only weakness right now is that we cannot implement VRR vs FRR query reliably. Signed-off-by: Hans-Kristian Arntzen --- src/vulkan/wsi/wsi_common_wayland.c | 198 +++++++++++++++++++++++++--- 1 file changed, 181 insertions(+), 17 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 38b9ed61ba1..ef0d13d8d95 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -254,6 +254,8 @@ struct wsi_wl_swapchain { bool has_hdr_metadata; } color; + struct wsi_image_timing_request timing_request; + struct wsi_wl_image images[0]; }; VK_DEFINE_NONDISP_HANDLE_CASTS(wsi_wl_swapchain, base.base, VkSwapchainKHR, @@ -1789,7 +1791,8 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, static VkResult wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface, struct wsi_device *wsi_device, - bool *has_wp_presentation) + bool *has_wp_presentation, clockid_t *clock_id, + bool *has_commit_timing, bool *has_fifo) { VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; struct wsi_wayland *wsi = @@ -1800,7 +1803,17 @@ wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface, wsi_device->sw, "mesa check wp_presentation")) return VK_ERROR_SURFACE_LOST_KHR; - *has_wp_presentation = !!display.wp_presentation_notwrapped; + if (has_wp_presentation) + *has_wp_presentation = !!display.wp_presentation_notwrapped; + + if (clock_id) + *clock_id = display.presentation_clock_id; + + if (has_commit_timing) + *has_commit_timing = !!display.commit_timing_manager; + + if (has_fifo) + *has_fifo = !!display.fifo_manager; wsi_wl_display_finish(&display); @@ -1893,7 +1906,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface, bool has_feedback; result = wsi_wl_surface_check_presentation(surface, wsi_device, - &has_feedback); + &has_feedback, NULL, NULL, NULL); if (result != VK_SUCCESS) return result; @@ -1906,7 +1919,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface, bool has_feedback; result = wsi_wl_surface_check_presentation(surface, wsi_device, - &has_feedback); + &has_feedback, NULL, NULL, NULL); if (result != VK_SUCCESS) return result; @@ -1916,11 +1929,45 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface, case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; + bool has_feedback, has_commit_timing, has_fifo; wait->presentStageQueries = 0; wait->presentTimingSupported = VK_FALSE; wait->presentAtAbsoluteTimeSupported = VK_FALSE; wait->presentAtRelativeTimeSupported = VK_FALSE; + + clockid_t clock_id; + + result = wsi_wl_surface_check_presentation(surface, wsi_device, + &has_feedback, &clock_id, + &has_commit_timing, &has_fifo); + + if (result != VK_SUCCESS) + return result; + + if (!has_feedback) + break; + + /* We could deal with esoteric clock domains by exposing VK_TIME_DOMAIN_SWAPCHAIN or PRESENT_STAGE_LOCAL, + * but that requires a lot more scaffolding, and there's no need to add extra complexity if we can + * get away with this. */ + if (clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_MONOTONIC_RAW) + break; + + /* Presentation timing spec talks about the reported time targeting "pixel being visible". + * From presentation-time spec: "Note, that if the display path has a non-zero latency, + * the time instant specified by this counter may differ from the timestamp's." + * No compositor I know of reports where it takes display latency into account, + * so it's a little unclear if we should actually be reporting PIXEL_OUT or PIXEL_VISIBLE. + * Choose PIXEL_OUT for now since no known compositor out there actually implements + * PIXEL_VISIBLE as intended, and we don't want to promise something we cannot hold. */ + wait->presentTimingSupported = VK_TRUE; + wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT; + + /* We cannot reliably implement FIFO guarantee + absolute time without the FIFO barrier. + * Presentation timing is only defined to work with FIFO (and its variants like RELAXED and LATEST_READY). */ + wait->presentAtAbsoluteTimeSupported = has_commit_timing && has_fifo; + break; } @@ -2414,6 +2461,7 @@ struct wsi_wl_present_id { * which uses frame callback to signal DRI3 COMPLETE. */ struct wl_callback *frame; uint64_t present_id; + uint64_t timing_serial; struct mesa_trace_flow flow; uint64_t submission_time; const VkAllocationCallbacks *alloc; @@ -2421,6 +2469,7 @@ struct wsi_wl_present_id { uint64_t target_time; uint64_t correction; struct wl_list link; + bool user_target_time; }; static struct wsi_image * @@ -2451,6 +2500,14 @@ wsi_wl_swapchain_set_present_mode(struct wsi_swapchain *wsi_chain, chain->base.present_mode = mode; } +static void +wsi_wl_swapchain_set_timing_request(struct wsi_swapchain *wsi_chain, + const struct wsi_image_timing_request *request) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain; + chain->timing_request = *request; +} + static VkResult dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_time) { @@ -2524,6 +2581,15 @@ dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_ return VK_SUCCESS; } +static void +wsi_wl_swapchain_poll_timing_request(struct wsi_swapchain *wsi_chain) +{ + /* Timing requests must complete in finite time, and if we're not calling present wait + * or queue present regularly, timing requests will never come back. */ + struct timespec instant = {0}; + dispatch_present_id_queue(wsi_chain, &instant); +} + static bool wsi_wl_swapchain_present_id_completes_in_finite_time_locked(struct wsi_wl_swapchain *chain, uint64_t present_id) @@ -2825,6 +2891,20 @@ presentation_handle_presented(void *data, struct wsi_wl_swapchain *chain = id->chain; uint64_t target_time = id->target_time; + /* In v1 of presentation time, we can know if we're likely running VRR, given refresh is 0. + * However, we cannot know what the base refresh rate is without some kind of external information. + * We also cannot know if we're actually driving the display in a VRR fashion. + * In v2, we should always know the "base refresh" rate, but that means we cannot know if we're driving + * the display VRR or FRR. We could try to deduce it based on timestamps, but that is too brittle. + * There is a v3 proposal that adds this information more formally so we don't have to guess. + * Knowing VRR or FRR is not mission critical for most use cases, so just report "Unknown" for now. */ + wsi_swapchain_present_timing_update_refresh_rate(&chain->base, refresh, 0, 0); + + /* Notify this before present wait to reduce latency of presentation timing requests + * if the application is driving its queries based off present waits. */ + if (id->timing_serial) + wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, presentation_time); + mtx_lock(&chain->present_ids.lock); chain->present_ids.refresh_nsec = refresh; if (!chain->present_ids.valid_refresh_nsec) { @@ -2836,7 +2916,9 @@ presentation_handle_presented(void *data, if (presentation_time > chain->present_ids.displayed_time) chain->present_ids.displayed_time = presentation_time; - if (target_time && presentation_time > target_time) + /* If we have user-defined target time it can be arbitrarily early, and we don't + * want to start compensating for that error if application stops requesting specific time. */ + if (!id->user_target_time && target_time && presentation_time > target_time) chain->present_ids.display_time_error = presentation_time - target_time; else chain->present_ids.display_time_error = 0; @@ -2851,6 +2933,15 @@ presentation_handle_discarded(void *data) struct wsi_wl_present_id *id = data; struct wsi_wl_swapchain *chain = id->chain; + /* From Vulkan spec: + * "Timing information for some present stages may have a time value of 0, + * indicating that results for that present stage are not available." + * Worst case we can simply take a timestamp of clock_id and pretend, but + * applications may start to latch onto that timestamp as ground truth, which + * is obviously not correct. */ + if (id->timing_serial) + wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, 0); + mtx_lock(&chain->present_ids.lock); if (!chain->present_ids.valid_refresh_nsec) { /* We've started occluded, so make up some safe values to throttle us */ @@ -2905,6 +2996,29 @@ static const struct wl_callback_listener frame_listener = { frame_handle_done, }; +static bool +set_application_driven_timestamp(struct wsi_wl_swapchain *chain, + uint64_t *timestamp, + uint64_t *correction) +{ + if (chain->timing_request.serial && chain->timing_request.time) { + /* Absolute time is requested before we have been able to report a reasonable refresh rate + * to application. This is valid, but we should not try to perform any rounding. + * NEAREST_REFRESH_CYCLE flag cannot be honored because it's impossible to know at this time. */ + struct timespec target_ts; + timespec_from_nsec(&target_ts, chain->timing_request.time); + wp_commit_timer_v1_set_timestamp(chain->commit_timer, + (uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec, + target_ts.tv_nsec); + *timestamp = chain->timing_request.time; + *correction = 0; + chain->present_ids.last_target_time = chain->timing_request.time; + return true; + } else { + return false; + } +} + /* The present_ids lock must be held */ static bool set_timestamp(struct wsi_wl_swapchain *chain, @@ -2918,7 +3032,7 @@ set_timestamp(struct wsi_wl_swapchain *chain, int32_t error = 0; if (!chain->present_ids.valid_refresh_nsec) - return false; + return set_application_driven_timestamp(chain, timestamp, correction); displayed_time = chain->present_ids.displayed_time; refresh = chain->present_ids.refresh_nsec; @@ -2928,7 +3042,7 @@ set_timestamp(struct wsi_wl_swapchain *chain, * timestamps at all, so bail out. */ if (!refresh) - return false; + return set_application_driven_timestamp(chain, timestamp, correction); /* We assume we're being fed at the display's refresh rate, but * if that doesn't happen our timestamps fall into the past. @@ -2946,6 +3060,10 @@ set_timestamp(struct wsi_wl_swapchain *chain, error = chain->present_ids.display_time_error - chain->present_ids.display_time_correction; + /* If we're driving timestamps from application, this is somewhat redundant + * but it will drain out any accumulated display_time_error over time. + * Accumulated errors are expected since application might not + * align the target time perfectly against a refresh cycle. */ target = chain->present_ids.last_target_time; if (error > 0) { target += (error / refresh) * refresh; @@ -2955,19 +3073,41 @@ set_timestamp(struct wsi_wl_swapchain *chain, } chain->present_ids.display_time_correction += *correction; - target = next_phase_locked_time(displayed_time, - refresh, - target); - /* Take back 500 us as a safety margin, to ensure we don't miss our - * target due to round-off error. - */ - timespec_from_nsec(&target_ts, target - 500000); + + if (chain->timing_request.serial && chain->timing_request.time) { + target = chain->timing_request.time; + chain->present_ids.last_target_time = target; + *timestamp = target; + + if (chain->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) + target -= chain->present_ids.refresh_nsec / 2; + + /* Without the flag, the application is supposed to deal with any safety margins on its own. */ + timespec_from_nsec(&target_ts, target); + + /* If we're using commit timing path, we always have FIFO protocol, so we don't have to + * consider scenarios where application is passing a very low present time. + * I.e., there is no need to max() the application timestamp against our estimated next refresh cycle. + * If the surface is occluded, it's possible to render at a higher rate than display refresh rate, + * but that's okay. Those presents will be discarded anyway, and we won't report odd timestamps to application. */ + } else { + target = next_phase_locked_time(displayed_time, + refresh, + target); + + chain->present_ids.last_target_time = target; + *timestamp = target; + + /* Take back 500 us as a safety margin, to ensure we don't miss our + * target due to round-off error. + */ + timespec_from_nsec(&target_ts, target - 500000); + } + wp_commit_timer_v1_set_timestamp(chain->commit_timer, (uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec, target_ts.tv_nsec); - chain->present_ids.last_target_time = target; - *timestamp = target; return true; } @@ -3069,13 +3209,15 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, } if (present_id > 0 || (mode_fifo && chain->commit_timer) || - util_perfetto_is_tracing_enabled()) { + util_perfetto_is_tracing_enabled() || chain->timing_request.serial) { struct wsi_wl_present_id *id = vk_zalloc(chain->wsi_wl_surface->display->wsi_wl->alloc, sizeof(*id), sizeof(uintptr_t), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); id->chain = chain; id->present_id = present_id; id->alloc = chain->wsi_wl_surface->display->wsi_wl->alloc; + id->timing_serial = chain->timing_request.serial; + id->user_target_time = chain->timing_request.time != 0; mtx_lock(&chain->present_ids.lock); @@ -3203,6 +3345,8 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, wsi_wl_surface->display->queue); } + memset(&chain->timing_request, 0, sizeof(chain->timing_request)); + return VK_SUCCESS; } @@ -3437,6 +3581,20 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain, return VK_SUCCESS; } +static VkTimeDomainKHR +clock_id_to_vk_time_domain(clockid_t id) +{ + switch (id) { + case CLOCK_MONOTONIC: + return VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR; + case CLOCK_MONOTONIC_RAW: + return VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR; + default: + /* Default fallback. Will not be used. */ + return VK_TIME_DOMAIN_DEVICE_KHR; + } +} + static VkResult wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, VkDevice device, @@ -3615,6 +3773,12 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->base.queue_present = wsi_wl_swapchain_queue_present; chain->base.release_images = wsi_wl_swapchain_release_images; chain->base.set_present_mode = wsi_wl_swapchain_set_present_mode; + chain->base.set_timing_request = wsi_wl_swapchain_set_timing_request; + chain->base.poll_timing_request = wsi_wl_swapchain_poll_timing_request; + if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) { + chain->base.present_timing.time_domain = + clock_id_to_vk_time_domain(wsi_wl_surface->display->presentation_clock_id); + } chain->base.wait_for_present = wsi_wl_swapchain_wait_for_present; chain->base.wait_for_present2 = wsi_wl_swapchain_wait_for_present2; chain->base.present_mode = present_mode; From 98dbd18c3f169d6a0572b419204a08e31673cf58 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 1 Dec 2025 16:28:44 +0100 Subject: [PATCH 06/17] radv: Enable EXT_present_timing. Signed-off-by: Hans-Kristian Arntzen --- src/amd/vulkan/radv_physical_device.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index 91473b82817..0f6700ff6ef 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -791,6 +791,10 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device .EXT_pipeline_library_group_handles = radv_enable_rt(pdev), .EXT_pipeline_robustness = !pdev->use_llvm, .EXT_post_depth_coverage = pdev->info.gfx_level >= GFX10, +#ifdef RADV_USE_WSI_PLATFORM + /* KHR_calibrated_timestamps is a requirement to expose EXT_present_timing. */ + .EXT_present_timing = radv_calibrated_timestamps_enabled(pdev), +#endif .EXT_primitive_topology_list_restart = true, .EXT_primitives_generated_query = true, .EXT_private_data = true, @@ -1481,6 +1485,14 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc /* VK_EXT_custom_resolve */ .customResolve = true, + +#ifdef RADV_USE_WSI_PLATFORM + /* VK_EXT_present_timing */ + /* The actual query is deferred to surface time. */ + .presentTiming = true, + .presentAtAbsoluteTime = true, + .presentAtRelativeTime = true, +#endif }; } From 41b7421320e0c051e4493ab267aa8558a5569d07 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 10 Dec 2025 17:54:35 +0100 Subject: [PATCH 07/17] turnip: Enable EXT_present_timing. Signed-off-by: Hans-Kristian Arntzen --- src/freedreno/vulkan/tu_device.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index dceb5227116..af341c83a60 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -314,6 +314,9 @@ get_device_extensions(const struct tu_physical_device *device, .EXT_physical_device_drm = !is_kgsl(device->instance), .EXT_pipeline_creation_cache_control = true, .EXT_pipeline_creation_feedback = true, +#ifdef TU_USE_WSI_PLATFORM + .EXT_present_timing = device->info->props.has_persistent_counter, +#endif .EXT_primitive_topology_list_restart = true, .EXT_primitives_generated_query = true, .EXT_private_data = true, @@ -825,6 +828,13 @@ tu_get_features(struct tu_physical_device *pdevice, /* VK_EXT_custom_resolve */ features->customResolve = true; + +#ifdef TU_USE_WSI_PLATFORM + /* VK_EXT_present_timing */ + features->presentTiming = true; + features->presentAtRelativeTime = true; + features->presentAtAbsoluteTime = true; +#endif } static void From 92609ef4f98b251e072f1226d3f5cf69390de5b8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 10 Dec 2025 17:54:48 +0100 Subject: [PATCH 08/17] anv: Enable VK_EXT_present_timing. Signed-off-by: Hans-Kristian Arntzen --- src/intel/vulkan/anv_physical_device.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/intel/vulkan/anv_physical_device.c b/src/intel/vulkan/anv_physical_device.c index 9c25067e95c..9a89d23d4bd 100644 --- a/src/intel/vulkan/anv_physical_device.c +++ b/src/intel/vulkan/anv_physical_device.c @@ -354,6 +354,9 @@ get_device_extensions(const struct anv_physical_device *device, .EXT_pipeline_protected_access = device->has_protected_contexts, .EXT_pipeline_robustness = true, .EXT_post_depth_coverage = true, +#ifdef ANV_USE_WSI_PLATFORM + .EXT_present_timing = device->has_reg_timestamp, +#endif .EXT_primitive_topology_list_restart = true, .EXT_primitives_generated_query = true, .EXT_private_data = true, @@ -1005,6 +1008,13 @@ get_features(const struct anv_physical_device *pdevice, /* VK_KHR_pipeline_binary */ .pipelineBinaries = true, + +#ifdef ANV_USE_WSI_PLATFORM + /* VK_EXT_present_timing */ + .presentTiming = true, + .presentAtRelativeTime = true, + .presentAtAbsoluteTime = true, +#endif }; /* The new DOOM and Wolfenstein games require depthBounds without From aed8621015aed5ff00a4d07b185fd314baacbfab Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 10 Dec 2025 17:55:02 +0100 Subject: [PATCH 09/17] nvk: Enable EXT_present_timing. Signed-off-by: Hans-Kristian Arntzen --- src/nouveau/vulkan/nvk_physical_device.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/nouveau/vulkan/nvk_physical_device.c b/src/nouveau/vulkan/nvk_physical_device.c index de6ee083ab3..80b82f4b9ff 100644 --- a/src/nouveau/vulkan/nvk_physical_device.c +++ b/src/nouveau/vulkan/nvk_physical_device.c @@ -262,6 +262,9 @@ nvk_get_device_extensions(const struct nvk_instance *instance, .EXT_pipeline_robustness = true, .EXT_physical_device_drm = true, .EXT_post_depth_coverage = info->cls_eng3d >= MAXWELL_B, +#ifdef NVK_USE_WSI_PLATFORM + .EXT_present_timing = true, +#endif .EXT_primitive_topology_list_restart = true, .EXT_private_data = true, .EXT_primitives_generated_query = true, @@ -753,6 +756,11 @@ nvk_get_device_features(const struct nv_device_info *info, /* VK_KHR_present_wait2 */ .presentWait2 = true, + + /* VK_EXT_present_timing */ + .presentTiming = true, + .presentAtRelativeTime = true, + .presentAtAbsoluteTime = true, #endif }; } From b28424949fd112baffd236270beabe0421961b34 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Wed, 10 Dec 2025 22:00:41 +0100 Subject: [PATCH 10/17] hk: enable VK_EXT_present_timing --- src/asahi/vulkan/hk_physical_device.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/asahi/vulkan/hk_physical_device.c b/src/asahi/vulkan/hk_physical_device.c index c49fac86e21..7fad0103906 100644 --- a/src/asahi/vulkan/hk_physical_device.c +++ b/src/asahi/vulkan/hk_physical_device.c @@ -181,6 +181,9 @@ hk_get_device_extensions(const struct hk_instance *instance, .EXT_pipeline_protected_access = true, .EXT_pipeline_robustness = true, .EXT_physical_device_drm = true, +#ifdef HK_USE_WSI_PLATFORM + .EXT_present_timing = true, +#endif .EXT_primitive_topology_list_restart = true, .EXT_private_data = true, .EXT_primitives_generated_query = false, @@ -623,6 +626,13 @@ hk_get_device_features( /* VK_KHR_shader_relaxed_extended_instruction */ .shaderRelaxedExtendedInstruction = true, + +#ifdef HK_USE_WSI_PLATFORM + /* VK_EXT_present_timing */ + .presentTiming = true, + .presentAtRelativeTime = true, + .presentAtAbsoluteTime = true, +#endif }; } From 44334b74ad432db81b181692c17dfb714dccacf5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 8 Dec 2025 14:25:47 +0100 Subject: [PATCH 11/17] vulkan/wsi: Implement QUEUE_OPERATIONS_END present timing query. This is mostly provided for convenience, but it's not implementable by applications when we're using blit queues for PRIME, so it's quite useful to have. This is reworked from previous GOOGLE_display_timing MRs by Keith Packard and Emma Anholt. See MR 38472 for reference. Rather than exposing PRESENT_STAGE_LOCAL, we expose all timestamps in one unified domain to simplify the implementation. Signed-off-by: Hans-Kristian Arntzen --- src/vulkan/wsi/wsi_common.c | 326 ++++++++++++++++++++++++++-- src/vulkan/wsi/wsi_common.h | 11 + src/vulkan/wsi/wsi_common_private.h | 15 +- src/vulkan/wsi/wsi_common_wayland.c | 16 +- src/vulkan/wsi/wsi_common_x11.c | 13 +- 5 files changed, 356 insertions(+), 25 deletions(-) diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index 8ef935edb44..ffd08b4d6a2 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -95,6 +95,7 @@ wsi_device_init(struct wsi_device *wsi, WSI_GET_CB(GetPhysicalDeviceProperties2); WSI_GET_CB(GetPhysicalDeviceMemoryProperties); WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties); + WSI_GET_CB(GetPhysicalDeviceProperties); #undef WSI_GET_CB wsi->drm_info.sType = @@ -121,10 +122,18 @@ wsi_device_init(struct wsi_device *wsi, VkQueueFamilyProperties queue_properties[64]; GetPhysicalDeviceQueueFamilyProperties(pdevice, &wsi->queue_family_count, queue_properties); + VkPhysicalDeviceProperties properties; + GetPhysicalDeviceProperties(pdevice, &properties); + wsi->timestamp_period = properties.limits.timestampPeriod; + for (unsigned i = 0; i < wsi->queue_family_count; i++) { VkFlags req_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT; if (queue_properties[i].queueFlags & req_flags) wsi->queue_supports_blit |= BITFIELD64_BIT(i); + + /* Don't want to consider timestamp wrapping logic. */ + if (queue_properties[i].timestampValidBits == 64) + wsi->queue_supports_timestamps |= BITFIELD64_BIT(i); } for (VkExternalSemaphoreHandleTypeFlags handle_type = 1; @@ -180,15 +189,19 @@ wsi_device_init(struct wsi_device *wsi, WSI_GET_CB(CmdPipelineBarrier); WSI_GET_CB(CmdCopyImage); WSI_GET_CB(CmdCopyImageToBuffer); + WSI_GET_CB(CmdResetQueryPool); + WSI_GET_CB(CmdWriteTimestamp); WSI_GET_CB(CreateBuffer); WSI_GET_CB(CreateCommandPool); WSI_GET_CB(CreateFence); WSI_GET_CB(CreateImage); + WSI_GET_CB(CreateQueryPool); WSI_GET_CB(CreateSemaphore); WSI_GET_CB(DestroyBuffer); WSI_GET_CB(DestroyCommandPool); WSI_GET_CB(DestroyFence); WSI_GET_CB(DestroyImage); + WSI_GET_CB(DestroyQueryPool); WSI_GET_CB(DestroySemaphore); WSI_GET_CB(EndCommandBuffer); WSI_GET_CB(FreeMemory); @@ -200,9 +213,14 @@ wsi_device_init(struct wsi_device *wsi, WSI_GET_CB(GetImageSubresourceLayout); if (!wsi->sw) WSI_GET_CB(GetMemoryFdKHR); + WSI_GET_CB(GetPhysicalDeviceCalibrateableTimeDomainsKHR); + WSI_GET_CB(GetPhysicalDeviceProperties); WSI_GET_CB(GetPhysicalDeviceFormatProperties); WSI_GET_CB(GetPhysicalDeviceFormatProperties2); WSI_GET_CB(GetPhysicalDeviceImageFormatProperties2); + WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties); + WSI_GET_CB(GetCalibratedTimestampsKHR); + WSI_GET_CB(GetQueryPoolResults); WSI_GET_CB(GetSemaphoreFdKHR); WSI_GET_CB(ResetFences); WSI_GET_CB(QueueSubmit2); @@ -481,8 +499,10 @@ wsi_swapchain_init(const struct wsi_device *wsi, chain->blit.type = get_blit_type(wsi, image_params, _device); chain->blit.queue = NULL; - if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT) { - if (wsi->get_blit_queue) { + if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT || + (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT)) { + + if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT && wsi->get_blit_queue) { chain->blit.queue = wsi->get_blit_queue(_device); } @@ -503,10 +523,18 @@ wsi_swapchain_init(const struct wsi_device *wsi, if (chain->blit.queue != NULL) { queue_family_index = chain->blit.queue->queue_family_index; } else { + uint64_t effective_queues = wsi->queue_supports_blit; + if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) + effective_queues &= wsi->queue_supports_timestamps; + + /* Fallback. If this happens we don't advertise support for queue complete times. */ + if (!effective_queues) + effective_queues = wsi->queue_supports_blit; + /* Queues returned by get_blit_queue() might not be listed in * GetPhysicalDeviceQueueFamilyProperties, so this check is skipped for those queues. */ - if (!(wsi->queue_supports_blit & BITFIELD64_BIT(queue_family_index))) + if (!(effective_queues & BITFIELD64_BIT(queue_family_index))) continue; } @@ -616,7 +644,7 @@ wsi_swapchain_finish(struct wsi_swapchain *chain) chain->wsi->DestroySemaphore(chain->device, chain->present_id_timeline, &chain->alloc); - if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT) { + if (chain->cmd_pools) { int cmd_pools_count = chain->blit.queue != NULL ? 1 : chain->wsi->queue_family_count; for (uint32_t i = 0; i < cmd_pools_count; i++) { @@ -821,6 +849,88 @@ fail: return result; } +/** + * Creates the timestamp-query command buffers for the end of rendering, that + * will be used to report QUEUE_COMPLETE timestamp for EXT_present_timing. + * + * Unless the swapchain is blitting, we don't know what queue family a Present + * will happen on. So we make a timestamp command buffer for each so they're + * ready to go at present time. + */ +VkResult +wsi_image_init_timestamp(const struct wsi_swapchain *chain, + struct wsi_image *image) +{ + const struct wsi_device *wsi = chain->wsi; + VkResult result; + /* Set up command buffer to get timestamp info */ + + result = wsi->CreateQueryPool( + chain->device, + &(const VkQueryPoolCreateInfo){ + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .queryType = VK_QUERY_TYPE_TIMESTAMP, + .queryCount = 1, + }, + NULL, + &image->query_pool); + + if (result != VK_SUCCESS) + goto fail; + + uint32_t family_count = chain->blit.queue ? 1 : wsi->queue_family_count; + + if (!image->timestamp_cmd_buffers) { + image->timestamp_cmd_buffers = + vk_zalloc(&chain->alloc, sizeof(VkCommandBuffer) * family_count, 8, + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); + if (!image->timestamp_cmd_buffers) + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + + for (uint32_t i = 0; i < family_count; i++) { + /* We can only use timestamps on a queue that reports timestamp bits != 0. + * Since we don't consider device timestamp wrapping in this implementation (unclear how that would ever work), + * only report queue done where timestamp bits == 64. */ + if (!chain->cmd_pools[i]) + continue; + + result = wsi->AllocateCommandBuffers( + chain->device, + &(const VkCommandBufferAllocateInfo){ + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .pNext = NULL, + .commandPool = chain->cmd_pools[i], + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }, &image->timestamp_cmd_buffers[i]); + + if (result != VK_SUCCESS) + goto fail; + + wsi->BeginCommandBuffer( + image->timestamp_cmd_buffers[i], + &(VkCommandBufferBeginInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + }); + + wsi->CmdResetQueryPool(image->timestamp_cmd_buffers[i], + image->query_pool, + 0, 1); + + wsi->CmdWriteTimestamp(image->timestamp_cmd_buffers[i], + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + image->query_pool, + 0); + + wsi->EndCommandBuffer(image->timestamp_cmd_buffers[i]); + } + + return VK_SUCCESS; +fail: + return result; +} + void wsi_destroy_image(const struct wsi_swapchain *chain, struct wsi_image *image) @@ -856,6 +966,19 @@ wsi_destroy_image(const struct wsi_swapchain *chain, vk_free(&chain->alloc, image->blit.cmd_buffers); } + wsi->DestroyQueryPool(chain->device, image->query_pool, NULL); + + if (image->timestamp_cmd_buffers) { + uint32_t family_count = chain->blit.queue ? 1 : wsi->queue_family_count; + for (uint32_t i = 0; i < family_count; i++) { + if (image->timestamp_cmd_buffers[i]) { + wsi->FreeCommandBuffers(chain->device, chain->cmd_pools[i], + 1, &image->timestamp_cmd_buffers[i]); + } + } + vk_free(&chain->alloc, image->timestamp_cmd_buffers); + } + wsi->FreeMemory(chain->device, image->memory, &chain->alloc); wsi->DestroyImage(chain->device, image->image, &chain->alloc); wsi->DestroyImage(chain->device, image->blit.image, &chain->alloc); @@ -918,8 +1041,43 @@ wsi_GetPhysicalDeviceSurfaceCapabilities2KHR( struct wsi_device *wsi_device = device->wsi_device; struct wsi_interface *iface = wsi_device->wsi[surface->platform]; - return iface->get_capabilities2(surface, wsi_device, pSurfaceInfo->pNext, - pSurfaceCapabilities); + VkResult vr = iface->get_capabilities2(surface, wsi_device, pSurfaceInfo->pNext, + pSurfaceCapabilities); + if (vr != VK_SUCCESS) + return vr; + + struct VkPresentTimingSurfaceCapabilitiesEXT *present_timing = + vk_find_struct(pSurfaceCapabilities, PRESENT_TIMING_SURFACE_CAPABILITIES_EXT); + + if (present_timing && present_timing->presentTimingSupported) { + if (wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps) { + /* Make sure the implementation is capable of calibrating timestamps. */ + if (wsi_device->GetPhysicalDeviceCalibrateableTimeDomainsKHR && wsi_device->GetCalibratedTimestampsKHR) { + VkTimeDomainKHR domains[64]; + uint32_t count = ARRAY_SIZE(domains); + wsi_device->GetPhysicalDeviceCalibrateableTimeDomainsKHR(wsi_device->pdevice, &count, domains); + + bool supports_device = false, supports_monotonic = false, supports_monotonic_raw = false; + + for (uint32_t i = 0; i < count; i++) { + if (domains[i] == VK_TIME_DOMAIN_DEVICE_KHR) + supports_device = true; + else if (domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR) + supports_monotonic = true; + else if (domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR) + supports_monotonic_raw = true; + } + + /* Current present timing implementations do not use anything outside these. + * QPC might be relevant for Dozen at some point, but for now, we only consider Linux-centric + * platforms for present timing. */ + if (supports_device && supports_monotonic && supports_monotonic_raw) + present_timing->presentStageQueries |= VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT; + } + } + } + + return vr; } VKAPI_ATTR VkResult VKAPI_CALL @@ -1122,6 +1280,15 @@ wsi_CreateSwapchainKHR(VkDevice _device, swapchain->present_timing.active = true; mtx_init(&swapchain->present_timing.lock, 0); + for (uint32_t i = 0; i < swapchain->image_count; i++) { + struct wsi_image *image = swapchain->get_wsi_image(swapchain, i); + result = wsi_image_init_timestamp(swapchain, image); + if (result != VK_SUCCESS) { + swapchain->destroy(swapchain, alloc); + return result; + } + } + if (swapchain->poll_early_refresh) { /* If we can query the display directly, we should report something reasonable on first query * before we even present the first time. */ @@ -1191,8 +1358,85 @@ wsi_ReleaseSwapchainImagesKHR(VkDevice _device, return VK_SUCCESS; } +static void +wsi_swapchain_present_timing_sample_query_pool(struct wsi_swapchain *chain, + struct wsi_presentation_timing *timing, + struct wsi_image *image, + uint64_t upper_bound) +{ + if (!(timing->requested_feedback & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)) + return; + + /* The GPU really should be done by now, and we should be able to read the timestamp, + * but it's possible that the present was discarded and we have a 0 timestamp here for the present. + * In this case, we should not block to wait on the queue dispatch timestamp. */ + uint64_t queue_ts; + + if (chain->wsi->GetQueryPoolResults(chain->device, image->query_pool, 0, 1, sizeof(uint64_t), + &queue_ts, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT) != VK_SUCCESS) + return; + + /* There are two ways to deal with DEVICE timestamp domain. + * Either we can report PRESENT_STAGE_LOCAL domain and let application + * calibrate the timestamps on its own. However, this creates an annoying situation + * where application is able to QueuePresentKHR requesting we use QUEUE_OPERATIONS_END time domain as + * the reference (targetTimeDomainPresentStage). + * In that case, we are forced to re-calibrate the timestamp anyway. + * We will also need to implement per-driver plumbing to forward SWAPCHAIN_LOCAL and PRESENT_STAGE_LOCAL + * time domains to the swapchain and query the underlying time domain. + * Instead of dealing with this mess, just recalibrate the timestamp. The accuracy of queue_operations_end + * is not particularly important. */ + + /* We have already made sure that the implementation supports these. */ + const VkCalibratedTimestampInfoKHR infos[2] = { + { + .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR, + .timeDomain = VK_TIME_DOMAIN_DEVICE_KHR, + }, + { + .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR, + .timeDomain = chain->present_timing.time_domain, + }, + }; + + uint64_t timestamps[2]; + uint64_t max_deviation; + if (chain->wsi->GetCalibratedTimestampsKHR(chain->device, 2, infos, timestamps, &max_deviation) == VK_SUCCESS) { + int64_t device_delta_ticks = (int64_t)queue_ts - (int64_t)timestamps[0]; + int64_t device_delta_ns = (int64_t)((double)chain->wsi->timestamp_period * (double)device_delta_ticks); + uint64_t queue_timestamp = timestamps[1] + device_delta_ns; + + /* Make sure we don't report GPU completing after we flip the request. + * Avoids any weird precision issues creeping through. */ + if (upper_bound) + queue_timestamp = MIN2(queue_timestamp, upper_bound); + + timing->queue_done_time = queue_timestamp; + } +} + +static void +wsi_swapchain_present_timing_notify_recycle_locked(struct wsi_swapchain *chain, + struct wsi_image *image) +{ + assert(chain->present_timing.active); + + for (size_t i = 0; i < chain->present_timing.timings_count; i++) { + if (chain->present_timing.timings[i].image == image) { + /* A different present takes ownership of the image's query pool index now. */ + chain->present_timing.timings[i].image = NULL; + chain->present_timing.timings[i].queue_done_time = 0; + + /* We waited on progress fence, so the timestamp query is guaranteed to be done. */ + wsi_swapchain_present_timing_sample_query_pool(chain, &chain->present_timing.timings[i], image, 0); + break; + } + } +} + static VkResult wsi_common_allocate_timing_request( - struct wsi_swapchain *swapchain, const VkPresentTimingInfoEXT *timing, uint64_t present_id) + struct wsi_swapchain *swapchain, const VkPresentTimingInfoEXT *timing, + uint64_t present_id, struct wsi_image *image) { VkResult vr = VK_SUCCESS; mtx_lock(&swapchain->present_timing.lock); @@ -1202,6 +1446,8 @@ static VkResult wsi_common_allocate_timing_request( goto err; } + wsi_swapchain_present_timing_notify_recycle_locked(swapchain, image); + struct wsi_presentation_timing *wsi_timing = &swapchain->present_timing.timings[swapchain->present_timing.timings_count++]; @@ -1210,6 +1456,7 @@ static VkResult wsi_common_allocate_timing_request( wsi_timing->target_time = timing->targetTime; wsi_timing->present_id = present_id; wsi_timing->requested_feedback = timing->presentStageQueries; + wsi_timing->image = image; /* Ignore the time domain since we have a static domain. */ @@ -1221,7 +1468,8 @@ err: void wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain, uint64_t timing_serial, - uint64_t timestamp) + uint64_t timestamp, + struct wsi_image *image) { assert(chain->present_timing.active); mtx_lock(&chain->present_timing.lock); @@ -1230,6 +1478,17 @@ wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain, if (chain->present_timing.timings[i].serial == timing_serial) { chain->present_timing.timings[i].complete_time = timestamp; chain->present_timing.timings[i].complete = VK_TRUE; + + /* It's possible that QueuePresentKHR already handled the queue done timestamp for us, + * since the image was recycled before presentation could fully complete. + * In this case, we no longer own the timestamp query pool index, so just skip. */ + if (chain->present_timing.timings[i].image != image) + break; + + /* 0 means unknown. Application can probably fall back to its own timestamps if it wants to. */ + chain->present_timing.timings[i].queue_done_time = 0; + wsi_swapchain_present_timing_sample_query_pool(chain, &chain->present_timing.timings[i], image, timestamp); + chain->present_timing.timings[i].image = NULL; break; } } @@ -1802,6 +2061,8 @@ wsi_common_queue_present(const struct wsi_device *wsi, const VkPresentTimingsInfoEXT *present_timings_info = vk_find_struct_const(pPresentInfo->pNext, PRESENT_TIMINGS_INFO_EXT); + bool needs_timing_command_buffer = false; + if (present_timings_info) { /* If we fail a present due to full queue, it's a little unclear from * spec if we should treat it as OUT_OF_DATE or OUT_OF_HOST_MEMORY for @@ -1815,10 +2076,13 @@ wsi_common_queue_present(const struct wsi_device *wsi, assert(swapchain->present_timing.active); + uint32_t image_index = pPresentInfo->pImageIndices[i]; + /* EXT_present_timing is defined to only work with present_id2. * It's only used when reporting back timings. */ results[i] = wsi_common_allocate_timing_request( - swapchain, info, present_ids2 ? present_ids2->pPresentIds[i] : 0); + swapchain, info, present_ids2 ? present_ids2->pPresentIds[i] : 0, + swapchain->get_wsi_image(swapchain, image_index)); /* Application is responsible for allocating sufficient size here. * We fail with VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT if application is bugged. */ @@ -1828,6 +2092,13 @@ wsi_common_queue_present(const struct wsi_device *wsi, .time = info->targetTime, .flags = info->flags, }); + + if (info->presentStageQueries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) { + /* It's not a problem if we redundantly submit timing command buffers. + * VUID-12234 also says all swapchains in this present must have been + * created with present timing enabled. */ + needs_timing_command_buffer = true; + } } } } @@ -1906,15 +2177,15 @@ wsi_common_queue_present(const struct wsi_device *wsi, * the per-image semaphores and fences with the blit. */ { - STACK_ARRAY(VkCommandBufferSubmitInfo, blit_command_buffer_infos, - pPresentInfo->swapchainCount); + STACK_ARRAY(VkCommandBufferSubmitInfo, command_buffer_infos, + pPresentInfo->swapchainCount * 2); STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos, pPresentInfo->swapchainCount * ARRAY_SIZE(image_signal_infos[0].semaphore_infos)); STACK_ARRAY(VkFence, fences, pPresentInfo->swapchainCount * ARRAY_SIZE(image_signal_infos[0].fences)); - uint32_t blit_count = 0, signal_semaphore_count = 0, fence_count = 0; + uint32_t command_buffer_count = 0, signal_semaphore_count = 0, fence_count = 0; for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) { VK_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]); @@ -1967,20 +2238,27 @@ wsi_common_queue_present(const struct wsi_device *wsi, } if (swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT) { - blit_command_buffer_infos[blit_count++] = (VkCommandBufferSubmitInfo) { + command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, .commandBuffer = image->blit.cmd_buffers[queue->queue_family_index], }; } + + if (needs_timing_command_buffer) { + command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = image->timestamp_cmd_buffers[queue->queue_family_index], + }; + } } const VkSubmitInfo2 submit_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, .waitSemaphoreInfoCount = pPresentInfo->waitSemaphoreCount, .pWaitSemaphoreInfos = semaphore_wait_infos, - .commandBufferInfoCount = blit_count, - .pCommandBufferInfos = blit_command_buffer_infos, + .commandBufferInfoCount = command_buffer_count, + .pCommandBufferInfos = command_buffer_infos, .signalSemaphoreInfoCount = signal_semaphore_count, .pSignalSemaphoreInfos = signal_semaphore_infos, }; @@ -1996,7 +2274,7 @@ wsi_common_queue_present(const struct wsi_device *wsi, STACK_ARRAY_FINISH(fences); STACK_ARRAY_FINISH(signal_semaphore_infos); - STACK_ARRAY_FINISH(blit_command_buffer_infos); + STACK_ARRAY_FINISH(command_buffer_infos); } /* Now do blits on any blit queues */ @@ -2021,17 +2299,27 @@ wsi_common_queue_present(const struct wsi_device *wsi, .semaphore = swapchain->blit.semaphores[image_index], }; - const VkCommandBufferSubmitInfo blit_command_buffer_info = { + VkCommandBufferSubmitInfo command_buffer_infos[2]; + uint32_t command_buffer_count = 0; + + command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, .commandBuffer = image->blit.cmd_buffers[0], }; + if (needs_timing_command_buffer) { + command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = image->timestamp_cmd_buffers[0], + }; + } + const VkSubmitInfo2 submit_info = { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, .waitSemaphoreInfoCount = 1, .pWaitSemaphoreInfos = &blit_semaphore_info, - .commandBufferInfoCount = 1, - .pCommandBufferInfos = &blit_command_buffer_info, + .commandBufferInfoCount = command_buffer_count, + .pCommandBufferInfos = command_buffer_infos, .signalSemaphoreInfoCount = image_signal_infos[i].semaphore_count, .pSignalSemaphoreInfos = image_signal_infos[i].semaphore_infos, }; diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h index ae6b3f7b87c..4a86f35c1de 100644 --- a/src/vulkan/wsi/wsi_common.h +++ b/src/vulkan/wsi/wsi_common.h @@ -62,6 +62,8 @@ struct wsi_device { VkPhysicalDeviceMemoryProperties memory_props; uint32_t queue_family_count; uint64_t queue_supports_blit; + uint64_t queue_supports_timestamps; + float timestamp_period; VkPhysicalDeviceDrmPropertiesEXT drm_info; VkPhysicalDevicePCIBusInfoPropertiesEXT pci_bus_info; @@ -201,28 +203,37 @@ struct wsi_device { WSI_CB(CmdPipelineBarrier); WSI_CB(CmdCopyImage); WSI_CB(CmdCopyImageToBuffer); + WSI_CB(CmdResetQueryPool); + WSI_CB(CmdWriteTimestamp); WSI_CB(CreateBuffer); WSI_CB(CreateCommandPool); WSI_CB(CreateFence); WSI_CB(CreateImage); + WSI_CB(CreateQueryPool); WSI_CB(CreateSemaphore); WSI_CB(DestroyBuffer); WSI_CB(DestroyCommandPool); WSI_CB(DestroyFence); WSI_CB(DestroyImage); + WSI_CB(DestroyQueryPool); WSI_CB(DestroySemaphore); WSI_CB(EndCommandBuffer); WSI_CB(FreeMemory); WSI_CB(FreeCommandBuffers); WSI_CB(GetBufferMemoryRequirements); + WSI_CB(GetCalibratedTimestampsKHR); WSI_CB(GetFenceStatus); WSI_CB(GetImageDrmFormatModifierPropertiesEXT); WSI_CB(GetImageMemoryRequirements); WSI_CB(GetImageSubresourceLayout); WSI_CB(GetMemoryFdKHR); + WSI_CB(GetPhysicalDeviceCalibrateableTimeDomainsKHR); + WSI_CB(GetPhysicalDeviceProperties); WSI_CB(GetPhysicalDeviceFormatProperties); WSI_CB(GetPhysicalDeviceFormatProperties2); WSI_CB(GetPhysicalDeviceImageFormatProperties2); + WSI_CB(GetPhysicalDeviceQueueFamilyProperties); + WSI_CB(GetQueryPoolResults); WSI_CB(GetSemaphoreFdKHR); WSI_CB(ResetFences); WSI_CB(QueueSubmit2); diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h index fb9bfd8304b..3e2e7e2de5f 100644 --- a/src/vulkan/wsi/wsi_common_private.h +++ b/src/vulkan/wsi/wsi_common_private.h @@ -188,6 +188,9 @@ struct wsi_image { int dma_buf_fd; #endif void *cpu_map; + + VkQueryPool query_pool; + VkCommandBuffer *timestamp_cmd_buffers; }; struct wsi_presentation_timing { @@ -196,6 +199,10 @@ struct wsi_presentation_timing { uint64_t serial; uint64_t queue_done_time; /* GPU timestamp based. */ uint64_t complete_time; /* Best effort timestamp we get from backend. */ + /* If we're rendering with IMMEDIATE, it's possible for images to IDLE long before they complete. + * In this case, we have to ensure that queue_done_time is sampled at QueuePresentKHR time + * before we recycle an image. */ + struct wsi_image *image; VkPresentStageFlagsEXT requested_feedback; VkBool32 complete; }; @@ -274,6 +281,7 @@ struct wsi_swapchain { } present_timing; bool capture_key_pressed; + float timestamp_period; /* Command pools, one per queue family */ VkCommandPool *cmd_pools; @@ -409,6 +417,10 @@ wsi_create_image(const struct wsi_swapchain *chain, void wsi_image_init(struct wsi_image *image); +VkResult +wsi_image_init_timestamp(const struct wsi_swapchain *chain, + struct wsi_image *image); + void wsi_destroy_image(const struct wsi_swapchain *chain, struct wsi_image *image); @@ -419,7 +431,8 @@ wsi_swapchain_wait_for_present_semaphore(const struct wsi_swapchain *chain, void wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain, - uint64_t timing_serial, uint64_t timestamp); + uint64_t timing_serial, uint64_t timestamp, + struct wsi_image *image); void wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain, diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index ef0d13d8d95..559058b04c5 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -1670,7 +1670,15 @@ wsi_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevi struct wsi_wayland *wsi = (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND]; - if (!(wsi_device->queue_supports_blit & BITFIELD64_BIT(queueFamilyIndex))) + /* These should overlap. */ + uint64_t effective_queues = wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps; + + /* If there are no queues that support both blits and timestamps, + * don't report support for queue timestamps. */ + if (!effective_queues) + effective_queues = wsi_device->queue_supports_blit; + + if (!(effective_queues & BITFIELD64_BIT(queueFamilyIndex))) return false; struct wsi_wl_display display; @@ -2469,6 +2477,7 @@ struct wsi_wl_present_id { uint64_t target_time; uint64_t correction; struct wl_list link; + struct wsi_image *img; bool user_target_time; }; @@ -2903,7 +2912,7 @@ presentation_handle_presented(void *data, /* Notify this before present wait to reduce latency of presentation timing requests * if the application is driving its queries based off present waits. */ if (id->timing_serial) - wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, presentation_time); + wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, presentation_time, id->img); mtx_lock(&chain->present_ids.lock); chain->present_ids.refresh_nsec = refresh; @@ -2940,7 +2949,7 @@ presentation_handle_discarded(void *data) * applications may start to latch onto that timestamp as ground truth, which * is obviously not correct. */ if (id->timing_serial) - wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, 0); + wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, 0, id->img); mtx_lock(&chain->present_ids.lock); if (!chain->present_ids.valid_refresh_nsec) { @@ -3217,6 +3226,7 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, id->present_id = present_id; id->alloc = chain->wsi_wl_surface->display->wsi_wl->alloc; id->timing_serial = chain->timing_request.serial; + id->img = &chain->images[image_index].base; id->user_target_time = chain->timing_request.time != 0; mtx_lock(&chain->present_ids.lock); diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index e7f3d0ac2d8..49278786c93 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -695,7 +695,16 @@ wsi_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice, { VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice); struct wsi_device *wsi_device = pdevice->wsi_device; - if (!(wsi_device->queue_supports_blit & BITFIELD64_BIT(queueFamilyIndex))) + + /* These should overlap. */ + uint64_t effective_queues = wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps; + + /* If there are no queues that support both blits and timestamps, + * don't report support for queue timestamps. */ + if (!effective_queues) + effective_queues = wsi_device->queue_supports_blit; + + if (!(effective_queues & BITFIELD64_BIT(queueFamilyIndex))) return false; struct wsi_x11_icd_surface *wsi_conn = @@ -1445,7 +1454,7 @@ static void x11_present_complete(struct x11_swapchain *swapchain, * this way we get minimal latency for reports. */ uint64_t timing_serial = image->pending_completions[index].timing_serial; if (timing_serial) - wsi_swapchain_present_timing_notify_completion(&swapchain->base, timing_serial, ust * 1000); + wsi_swapchain_present_timing_notify_completion(&swapchain->base, timing_serial, ust * 1000, &image->base); uint64_t signal_present_id = image->pending_completions[index].signal_present_id; if (signal_present_id) { From d301e081e6bd1f390b6e51ddabcdbe1c43c6990a Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 10 Dec 2025 12:06:52 +0100 Subject: [PATCH 12/17] wsi/wayland: Fix some locking quirks around present ID update. Don't drop the lock only to retake it right after. Signed-off-by: Hans-Kristian Arntzen --- src/vulkan/wsi/wsi_common_wayland.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 559058b04c5..141ddf6fc45 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -2879,16 +2879,13 @@ wsi_wl_swapchain_acquire_next_image_implicit(struct wsi_swapchain *wsi_chain, } static void -wsi_wl_presentation_update_present_id(struct wsi_wl_present_id *id) +wsi_wl_presentation_update_present_id_locked(struct wsi_wl_present_id *id) { - mtx_lock(&id->chain->present_ids.lock); id->chain->present_ids.outstanding_count--; if (id->present_id > id->chain->present_ids.max_completed) id->chain->present_ids.max_completed = id->present_id; id->chain->present_ids.display_time_correction -= id->correction; - mtx_unlock(&id->chain->present_ids.lock); - vk_free(id->alloc, id); } static void @@ -2931,9 +2928,10 @@ presentation_handle_presented(void *data, chain->present_ids.display_time_error = presentation_time - target_time; else chain->present_ids.display_time_error = 0; - mtx_unlock(&chain->present_ids.lock); - wsi_wl_presentation_update_present_id(id); + wsi_wl_presentation_update_present_id_locked(id); + mtx_unlock(&chain->present_ids.lock); + vk_free(id->alloc, id); } static void @@ -2959,9 +2957,10 @@ presentation_handle_discarded(void *data) chain->present_ids.refresh_nsec = 16666666; chain->present_ids.valid_refresh_nsec = true; } - mtx_unlock(&chain->present_ids.lock); - wsi_wl_presentation_update_present_id(id); + wsi_wl_presentation_update_present_id_locked(id); + mtx_unlock(&chain->present_ids.lock); + vk_free(id->alloc, id); } static void @@ -2980,9 +2979,10 @@ presentation_frame_handle_done(void *data, struct wl_callback *callback, uint32_ mtx_lock(&chain->present_ids.lock); wl_list_remove(&id->link); - mtx_unlock(&chain->present_ids.lock); - wsi_wl_presentation_update_present_id(id); + wsi_wl_presentation_update_present_id_locked(id); + mtx_unlock(&chain->present_ids.lock); + vk_free(id->alloc, id); wl_callback_destroy(callback); } From bdc5e0d27030e188ec8c22ddadf9cfc729c51dc9 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Fri, 14 Nov 2025 10:10:16 -0800 Subject: [PATCH 13/17] vulkan/wsi: Add some comments about how the vblank/flip sequencing happens. --- src/vulkan/wsi/wsi_common_display.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index 53464a7416b..a539a09de8b 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -1827,6 +1827,17 @@ wsi_display_surface_error(struct wsi_display_swapchain *swapchain, VkResult resu mtx_unlock(&swapchain->present_id_mutex); } +/** + * libdrm callback for when we get a DRM_EVENT_PAGE_FLIP in response to our + * atomic commit with DRM_MODE_PAGE_FLIP_EVENT. That event can happen at any + * point after vblank, when the old image is no longer being scanned out and + * that commit is set up to be scanned out next. + * + * This means that we can queue up a new atomic commit, if there were presents + * that we hadn't submitted yet (the event queue is driven by + * wsi_display_wait_thread(), so that's what ends up submitting atomic commits + * most of the time). + **/ static void wsi_display_page_flip_handler2(int fd, unsigned int frame, @@ -1871,6 +1882,11 @@ static void wsi_display_vblank_handler(int fd, unsigned int frame, wsi_display_fence_event_handler(fence); } +/** + * libdrm callback for when we get a DRM_EVENT_CRTC_SEQUENCE in response to a + * drmCrtcQueueSequence(), indicating that the first pixel of a new frame is + * being scanned out. + **/ static void wsi_display_sequence_handler(int fd, uint64_t frame, uint64_t nsec, uint64_t user_data) { From 185439a33a85646789ba7f36b057356604bbeff0 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Thu, 13 Nov 2025 15:06:45 -0800 Subject: [PATCH 14/17] wsi/display: Delete dead vblank_handler path. We use sequence_handler for our vblank events. --- src/vulkan/wsi/wsi_common_display.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index a539a09de8b..40f55da6db8 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -1873,15 +1873,6 @@ static void wsi_display_page_flip_handler(int fd, wsi_display_page_flip_handler2(fd, frame, sec, usec, 0, data); } -static void wsi_display_vblank_handler(int fd, unsigned int frame, - unsigned int sec, unsigned int usec, - void *data) -{ - struct wsi_display_fence *fence = data; - - wsi_display_fence_event_handler(fence); -} - /** * libdrm callback for when we get a DRM_EVENT_CRTC_SEQUENCE in response to a * drmCrtcQueueSequence(), indicating that the first pixel of a new frame is @@ -1902,7 +1893,7 @@ static drmEventContext event_context = { #if DRM_EVENT_CONTEXT_VERSION >= 3 .page_flip_handler2 = wsi_display_page_flip_handler2, #endif - .vblank_handler = wsi_display_vblank_handler, + .vblank_handler = NULL, .sequence_handler = wsi_display_sequence_handler, }; From 2946f93193804ab4530a3a90b1122279f3b83bba Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 10 Dec 2025 14:46:59 +0100 Subject: [PATCH 15/17] wsi/display: Implement present timing on KHR_display. Deal with VRR vs FRR as well. Loosely based on earlier work by Keith Packard and Emma Anholt (MR 38472 for reference). Signed-off-by: Hans-Kristian Arntzen --- src/vulkan/wsi/wsi_common.c | 6 +- src/vulkan/wsi/wsi_common_display.c | 267 +++++++++++++++++++++++++--- src/vulkan/wsi/wsi_common_private.h | 2 +- src/vulkan/wsi/wsi_common_x11.c | 5 +- 4 files changed, 246 insertions(+), 34 deletions(-) diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index ffd08b4d6a2..0188e6fda64 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -1292,11 +1292,11 @@ wsi_CreateSwapchainKHR(VkDevice _device, if (swapchain->poll_early_refresh) { /* If we can query the display directly, we should report something reasonable on first query * before we even present the first time. */ - uint64_t refresh_ns = swapchain->poll_early_refresh(swapchain); + uint64_t interval; + uint64_t refresh_ns = swapchain->poll_early_refresh(swapchain, &interval); if (refresh_ns) { swapchain->present_timing.refresh_duration = refresh_ns; - /* None of the APIs can know a-priori if we're driving the display VRR or not. */ - swapchain->present_timing.refresh_interval = 0; + swapchain->present_timing.refresh_interval = interval; swapchain->present_timing.refresh_counter++; } } diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index 40f55da6db8..23db07359fe 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -156,6 +156,12 @@ enum colorspace_enum { COLORSPACE_ENUM_MAX, }; +enum vrr_tristate { + VRR_TRISTATE_UNKNOWN, + VRR_TRISTATE_DISABLED, + VRR_TRISTATE_ENABLED, +}; + typedef struct wsi_display_connector_metadata { VkHdrMetadataEXT hdr_metadata; bool supports_st2084; @@ -185,6 +191,10 @@ typedef struct wsi_display_connector { struct wsi_display_connector_metadata metadata; uint32_t count_formats; uint32_t *formats; + enum vrr_tristate vrr_capable; + enum vrr_tristate vrr_enabled; + uint64_t last_frame; + uint64_t last_nsec; } wsi_display_connector; struct wsi_display { @@ -370,6 +380,11 @@ find_properties(struct wsi_display_connector *connector, uint32_t count_props, u } } + if (!strcmp(prop->name, "vrr_capable")) + connector->vrr_capable = prop_values[p] != 0 ? VRR_TRISTATE_ENABLED : VRR_TRISTATE_DISABLED; + if (!strcmp(prop->name, "VRR_ENABLED")) + connector->vrr_enabled = prop_values[p] != 0 ? VRR_TRISTATE_ENABLED : VRR_TRISTATE_DISABLED; + drmModeFreeProperty(prop); } @@ -431,38 +446,45 @@ find_connector_properties(struct wsi_display_connector *connector, drmModeConnec enum wsi_image_state { WSI_IMAGE_IDLE, WSI_IMAGE_DRAWING, + WSI_IMAGE_WAITING, + WSI_IMAGE_QUEUED_AFTER_WAIT, WSI_IMAGE_QUEUED, WSI_IMAGE_FLIPPING, WSI_IMAGE_DISPLAYING }; struct wsi_display_image { - struct wsi_image base; - struct wsi_display_swapchain *chain; - enum wsi_image_state state; - uint32_t fb_id; - uint32_t buffer[4]; - uint64_t flip_sequence; - uint64_t present_id; + struct wsi_image base; + struct wsi_display_swapchain *chain; + enum wsi_image_state state; + uint32_t fb_id; + uint32_t buffer[4]; + uint64_t flip_sequence; + uint64_t present_id; + struct wsi_image_timing_request timing_request; + struct wsi_display_fence *fence; + uint64_t minimum_ns; }; struct wsi_display_swapchain { - struct wsi_swapchain base; - struct wsi_display *wsi; - VkIcdSurfaceDisplay *surface; - uint64_t flip_sequence; - VkResult status; + struct wsi_swapchain base; + struct wsi_display *wsi; + VkIcdSurfaceDisplay *surface; + uint64_t flip_sequence; + VkResult status; - mtx_t present_id_mutex; - struct u_cnd_monotonic present_id_cond; - uint64_t present_id; - VkResult present_id_error; + mtx_t present_id_mutex; + struct u_cnd_monotonic present_id_cond; + uint64_t present_id; + VkResult present_id_error; /* A unique ID for the color outcome of the swapchain. A serial of 0 means unset/default. */ - uint64_t color_outcome_serial; - VkHdrMetadataEXT hdr_metadata; + uint64_t color_outcome_serial; + VkHdrMetadataEXT hdr_metadata; - struct wsi_display_image images[0]; + struct wsi_image_timing_request timing_request; + + struct wsi_display_image images[0]; }; struct wsi_display_fence { @@ -473,6 +495,9 @@ struct wsi_display_fence { uint32_t syncobj; /* syncobj to signal on event */ uint64_t sequence; bool device_event; /* fence is used for device events */ + struct wsi_display_connector *connector; + /* Image to be flipped, if this fence is for an image in the WSI_IMAGE_WAITING state that will need to move to QUEUED. */ + struct wsi_display_image *image; }; struct wsi_display_sync { @@ -1322,10 +1347,10 @@ wsi_display_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface, case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; - wait->presentStageQueries = 0; - wait->presentTimingSupported = VK_FALSE; - wait->presentAtAbsoluteTimeSupported = VK_FALSE; - wait->presentAtRelativeTimeSupported = VK_FALSE; + wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT; + wait->presentTimingSupported = VK_TRUE; + wait->presentAtAbsoluteTimeSupported = VK_TRUE; + wait->presentAtRelativeTimeSupported = VK_TRUE; break; } @@ -1688,6 +1713,8 @@ wsi_display_image_init(struct wsi_swapchain *drv_chain, image->chain = chain; image->state = WSI_IMAGE_IDLE; + image->fence = NULL; + image->minimum_ns = 0; image->fb_id = 0; uint64_t *fb_modifiers = NULL; @@ -1799,6 +1826,12 @@ wsi_display_idle_old_displaying(struct wsi_display_image *active_image) static VkResult _wsi_display_queue_next(struct wsi_swapchain *drv_chain); +static uint64_t +widen_32_to_64(uint32_t narrow, uint64_t near) +{ + return near + (int32_t)(narrow - near); +} + /** * Wakes up any vkWaitForPresentKHR() waiters on the last present to this * image. @@ -1849,6 +1882,28 @@ wsi_display_page_flip_handler2(int fd, struct wsi_display_image *image = data; struct wsi_display_swapchain *chain = image->chain; + VkIcdSurfaceDisplay *surface = chain->surface; + wsi_display_mode *display_mode = + wsi_display_mode_from_handle(surface->displayMode); + wsi_display_connector *connector = display_mode->connector; + + uint64_t nsec = 1000000000ull * sec + 1000ull * usec; + /* If we're on VRR timing path, ensure we get a stable pace. */ + nsec = MAX2(nsec, image->minimum_ns); + + uint64_t frame64 = widen_32_to_64(frame, connector->last_frame); + connector->last_frame = frame64; + connector->last_nsec = nsec; + + /* Never update the refresh rate estimate. It's static based on the mode. + * Update this before we signal present wait so that applications + * get lowest possible latency for present time. */ + if (image->timing_request.serial) { + wsi_swapchain_present_timing_notify_completion( + &chain->base, image->timing_request.serial, + nsec, &image->base); + } + wsi_display_debug("image %ld displayed at %d\n", image - &(image->chain->images[0]), frame); image->state = WSI_IMAGE_DISPLAYING; @@ -1862,7 +1917,9 @@ wsi_display_page_flip_handler2(int fd, chain->status = result; } -static void wsi_display_fence_event_handler(struct wsi_display_fence *fence); +static void wsi_display_fence_event_handler(struct wsi_display_fence *fence, + uint64_t nsec, + uint64_t frame); static void wsi_display_page_flip_handler(int fd, unsigned int frame, @@ -1884,7 +1941,7 @@ static void wsi_display_sequence_handler(int fd, uint64_t frame, struct wsi_display_fence *fence = (struct wsi_display_fence *) (uintptr_t) user_data; - wsi_display_fence_event_handler(fence); + wsi_display_fence_event_handler(fence, nsec, frame); } static drmEventContext event_context = { @@ -2400,13 +2457,30 @@ wsi_display_fence_check_free(struct wsi_display_fence *fence) vk_free(fence->wsi->alloc, fence); } -static void wsi_display_fence_event_handler(struct wsi_display_fence *fence) +static void wsi_display_fence_event_handler(struct wsi_display_fence *fence, + uint64_t nsec, uint64_t frame) { + struct wsi_display_connector *connector = fence->connector; + struct wsi_display_image *image = fence->image; + if (fence->syncobj) { (void) drmSyncobjSignal(fence->wsi->syncobj_fd, &fence->syncobj, 1); (void) drmSyncobjDestroy(fence->wsi->syncobj_fd, fence->syncobj); } + if (connector) { + connector->last_nsec = nsec; + connector->last_frame = frame; + } + + if (image && image->state == WSI_IMAGE_WAITING) { + /* We may need to do the final sleep on CPU to resolve VRR timings. */ + image->state = WSI_IMAGE_QUEUED_AFTER_WAIT; + VkResult result = _wsi_display_queue_next(&image->chain->base); + if (result != VK_SUCCESS) + image->chain->status = result; + } + fence->event_received = true; wsi_display_fence_check_free(fence); } @@ -2839,9 +2913,11 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain) switch (tmp_image->state) { case WSI_IMAGE_FLIPPING: - /* already flipping, don't send another to the kernel yet */ + case WSI_IMAGE_WAITING: + /* already flipping or waiting for a flip, don't send another to the kernel yet */ return VK_SUCCESS; case WSI_IMAGE_QUEUED: + case WSI_IMAGE_QUEUED_AFTER_WAIT: /* find the oldest queued */ if (!image || tmp_image->flip_sequence < image->flip_sequence) image = tmp_image; @@ -2854,6 +2930,95 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain) if (!image) return VK_SUCCESS; + if (image->fence) { + image->fence->image = NULL; + wsi_display_fence_destroy(image->fence); + image->fence = NULL; + } + + unsigned num_cycles_to_skip = 0; + int64_t target_relative_ns = 0; + bool skip_timing = false; + bool nearest_cycle = + (image->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) != 0; + + if (image->timing_request.time != 0) { + /* Ensure we have some kind of timebase to work from. */ + if (!connector->last_frame) + drmCrtcGetSequence(wsi->fd, connector->crtc_id, &connector->last_frame, &connector->last_nsec); + + if (!connector->last_frame || chain->base.present_timing.refresh_duration == 0) { + /* Something has gone very wrong. Just ignore present timing for safety. */ + skip_timing = true; + wsi_display_debug("Cannot get a stable timebase, last frame = %"PRIu64", refresh_duration = %"PRIu64".\n", + connector->last_frame, chain->base.present_timing.refresh_duration); + } + } + + if (!skip_timing && image->state == WSI_IMAGE_QUEUED && image->timing_request.time != 0) { + target_relative_ns = (int64_t)image->timing_request.time; + + /* We need to estimate number of refresh cycles to wait for. */ + if (!(image->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_RELATIVE_TIME_BIT_EXT)) { + target_relative_ns -= (int64_t)connector->last_nsec; + } + + if (nearest_cycle) { + /* No need to lock, we never update refresh_duration dynamically. */ + target_relative_ns -= (int64_t)chain->base.present_timing.refresh_duration / 2; + } else { + /* If application is computing an exact value that lands exactly on the refresh cycle, + * pull back the estimate a little bit since DRM precision is 1us. */ + target_relative_ns -= 1000; + } + } + + target_relative_ns = MAX2(target_relative_ns, 0); + if (target_relative_ns && chain->base.present_timing.refresh_duration) + num_cycles_to_skip = target_relative_ns / chain->base.present_timing.refresh_duration; + + /* CRTC cycles is not reliable on VRR. We cannot use that as a time base. */ + bool is_vrr = connector->vrr_enabled == VRR_TRISTATE_ENABLED && + connector->vrr_capable == VRR_TRISTATE_ENABLED; + + if (num_cycles_to_skip) { + if (!is_vrr) { + /* On FRR, we can rely on vblank events to guide time progression. */ + VkDisplayKHR display = wsi_display_connector_to_handle(connector); + image->fence = wsi_display_fence_alloc(wsi, -1); + + if (image->fence) { + image->fence->connector = connector; + image->fence->image = image; + + uint64_t frame_queued; + uint64_t target_frame = connector->last_frame + num_cycles_to_skip; + VkResult result = wsi_register_vblank_event(image->fence, chain->base.wsi, display, + 0, target_frame, &frame_queued); + + if (result == VK_SUCCESS && frame_queued <= target_frame) { + /* Wait until the vblank fence signals and the event handler will attempt to requeue us. */ + image->state = WSI_IMAGE_WAITING; + return VK_SUCCESS; + } + } + } else { + /* On a VRR display, applications can request frame times which are fractional, + * and there is no good way to target absolute time with atomic commits it seems ... */ + int64_t target_ns = target_relative_ns + (int64_t)connector->last_nsec; + image->minimum_ns = target_ns; + + /* Account for some minimum delay in submitting a page flip until it's processed and sleep jitter. + * We will compensate for the difference if there is any, so that we don't report completion + * times in the past. */ + target_ns -= 1 * 1000 * 1000; + + os_time_nanosleep_until(target_ns); + } + } + + image->state = WSI_IMAGE_QUEUED; + int ret = drm_atomic_commit(connector, image); if (ret == 0) { image->state = WSI_IMAGE_FLIPPING; @@ -2876,6 +3041,44 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain) } } +static void +wsi_display_set_timing_request(struct wsi_swapchain *drv_chain, + const struct wsi_image_timing_request *request) +{ + struct wsi_display_swapchain *chain = + (struct wsi_display_swapchain *) drv_chain; + chain->timing_request = *request; +} + +static uint64_t +wsi_display_poll_refresh_duration(struct wsi_swapchain *drv_chain, uint64_t *interval) +{ + struct wsi_display_swapchain *chain = + (struct wsi_display_swapchain *)drv_chain; + VkIcdSurfaceDisplay *surface = chain->surface; + wsi_display_mode *display_mode = + wsi_display_mode_from_handle(surface->displayMode); + double refresh = wsi_display_mode_refresh(display_mode); + wsi_display_connector *connector = display_mode->connector; + + uint64_t refresh_ns = (uint64_t)(floor(1.0 / refresh * 1e9 + 0.5)); + + /* Assume FRR by default. */ + *interval = refresh_ns; + + /* If VRR is not enabled on the target CRTC, we should honor that. + * There is no mechanism to clearly request that VRR is desired, + * so we must assume that user might force us into FRR mode. */ + if (connector->vrr_capable == VRR_TRISTATE_ENABLED) { + if (connector->vrr_enabled == VRR_TRISTATE_UNKNOWN) + *interval = 0; /* Somehow we don't know if the connector is VRR or FRR. Report unknown. */ + else if (connector->vrr_enabled == VRR_TRISTATE_ENABLED) + *interval = UINT64_MAX; + } + + return refresh_ns; +} + static VkResult wsi_display_queue_present(struct wsi_swapchain *drv_chain, uint32_t image_index, @@ -2893,16 +3096,19 @@ wsi_display_queue_present(struct wsi_swapchain *drv_chain, return chain->status; image->present_id = present_id; + image->timing_request = chain->timing_request; assert(image->state == WSI_IMAGE_DRAWING); wsi_display_debug("present %d\n", image_index); mtx_lock(&wsi->wait_mutex); - /* Make sure that the page flip handler is processed in finite time if using present wait. */ - if (present_id) + /* Make sure that the page flip handler is processed in finite time if using present wait + * or presentation time. */ + if (present_id || chain->timing_request.serial) wsi_display_start_wait_thread(wsi); + memset(&chain->timing_request, 0, sizeof(chain->timing_request)); image->flip_sequence = ++chain->flip_sequence; image->state = WSI_IMAGE_QUEUED; @@ -3062,6 +3268,9 @@ wsi_display_surface_create_swapchain( chain->base.acquire_next_image = wsi_display_acquire_next_image; chain->base.release_images = wsi_display_release_images; chain->base.queue_present = wsi_display_queue_present; + chain->base.set_timing_request = wsi_display_set_timing_request; + chain->base.poll_early_refresh = wsi_display_poll_refresh_duration; + chain->base.present_timing.time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR; chain->base.wait_for_present = wsi_display_wait_for_present; chain->base.wait_for_present2 = wsi_display_wait_for_present; chain->base.set_hdr_metadata = wsi_display_set_hdr_metadata; diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h index 3e2e7e2de5f..000c6d952c8 100644 --- a/src/vulkan/wsi/wsi_common_private.h +++ b/src/vulkan/wsi/wsi_common_private.h @@ -313,7 +313,7 @@ struct wsi_swapchain { void (*set_timing_request)(struct wsi_swapchain *swap_chain, const struct wsi_image_timing_request *request); void (*poll_timing_request)(struct wsi_swapchain *swap_chain); - uint64_t (*poll_early_refresh)(struct wsi_swapchain *swap_chain); + uint64_t (*poll_early_refresh)(struct wsi_swapchain *swap_chain, uint64_t *interval); }; bool diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c index 49278786c93..329e745184c 100644 --- a/src/vulkan/wsi/wsi_common_x11.c +++ b/src/vulkan/wsi/wsi_common_x11.c @@ -2074,11 +2074,14 @@ x11_set_timing_request(struct wsi_swapchain *wsi_chain, } static uint64_t -x11_poll_early_refresh(struct wsi_swapchain *wsi_chain) +x11_poll_early_refresh(struct wsi_swapchain *wsi_chain, uint64_t *interval) { struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain; struct wsi_x11_icd_surface *wsi_conn = wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window); + + /* We don't know yet. */ + *interval = 0; return x11_icd_surface_update_present_timing(wsi_conn, chain->extent.width, chain->extent.height); } From dc0df4586caa04b44f83d6b2a0c3c3b00dbf423d Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Fri, 14 Nov 2025 09:53:57 -0800 Subject: [PATCH 16/17] vulkan/wsi: Delete ancient libdrm support for the page flip handler. Looks like the "new" page flip handler was in 2.4.78 in 2017. Mesa requires at least 2.4.109, so we can retire this. --- src/vulkan/wsi/wsi_common_display.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_display.c b/src/vulkan/wsi/wsi_common_display.c index 23db07359fe..dc00551afb0 100644 --- a/src/vulkan/wsi/wsi_common_display.c +++ b/src/vulkan/wsi/wsi_common_display.c @@ -1921,15 +1921,6 @@ static void wsi_display_fence_event_handler(struct wsi_display_fence *fence, uint64_t nsec, uint64_t frame); -static void wsi_display_page_flip_handler(int fd, - unsigned int frame, - unsigned int sec, - unsigned int usec, - void *data) -{ - wsi_display_page_flip_handler2(fd, frame, sec, usec, 0, data); -} - /** * libdrm callback for when we get a DRM_EVENT_CRTC_SEQUENCE in response to a * drmCrtcQueueSequence(), indicating that the first pixel of a new frame is @@ -1946,10 +1937,8 @@ static void wsi_display_sequence_handler(int fd, uint64_t frame, static drmEventContext event_context = { .version = DRM_EVENT_CONTEXT_VERSION, - .page_flip_handler = wsi_display_page_flip_handler, -#if DRM_EVENT_CONTEXT_VERSION >= 3 + .page_flip_handler = NULL, .page_flip_handler2 = wsi_display_page_flip_handler2, -#endif .vblank_handler = NULL, .sequence_handler = wsi_display_sequence_handler, }; From 0b4c40858b9e4d5d56432736bd39e10e7a91402f Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 10 Dec 2025 17:56:51 +0100 Subject: [PATCH 17/17] docs: Add VK_EXT_present_timing to new features. Signed-off-by: Hans-Kristian Arntzen --- docs/features.txt | 1 + docs/relnotes/new_features.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/features.txt b/docs/features.txt index 5e907be8ff9..e9d52ffb567 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -658,6 +658,7 @@ Khronos extensions that are not part of any Vulkan version: VK_EXT_physical_device_drm DONE (anv, hasvk, hk, nvk, panvk, pvr, radv, tu, v3dv, vn) VK_EXT_pipeline_library_group_handles DONE (anv, lvp, radv, vn) VK_EXT_post_depth_coverage DONE (anv/gfx11+, lvp, nvk, radv/gfx10+, tu, vn) + VK_EXT_present_timing DONE (anv, hk, nvk, radv, tu) VK_EXT_primitive_topology_list_restart DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn, nvk) VK_EXT_primitives_generated_query DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_EXT_provoking_vertex DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn) diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index efff05ca884..0adf258b751 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -20,3 +20,4 @@ VK_KHR_surface_maintenance1 promotion everywhere EXT is exposed VK_KHR_swapchain_maintenance1 promotion everywhere EXT is exposed VK_KHR_dynamic_rendering on PowerVR VK_EXT_multisampled_render_to_single_sampled on panvk +VK_EXT_present_timing on RADV, NVK, Turnip, ANV, Honeykrisp