From 3fdc14b089262ed98aa5a8ccbc77191ac126d20b Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 4 Dec 2025 13:52:02 +0100 Subject: [PATCH] wsi/wayland: Implement EXT_present_timing on Wayland. Only weakness right now is that we cannot implement VRR vs FRR query reliably. Signed-off-by: Hans-Kristian Arntzen Reviewed-by: Derek Foreman Part-of: --- src/vulkan/wsi/wsi_common_wayland.c | 198 +++++++++++++++++++++++++--- 1 file changed, 181 insertions(+), 17 deletions(-) diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 7aab7bec38a..29149fbf4a1 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -254,6 +254,8 @@ struct wsi_wl_swapchain { bool has_hdr_metadata; } color; + struct wsi_image_timing_request timing_request; + struct wsi_wl_image images[0]; }; VK_DEFINE_NONDISP_HANDLE_CASTS(wsi_wl_swapchain, base.base, VkSwapchainKHR, @@ -1789,7 +1791,8 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *icd_surface, static VkResult wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface, struct wsi_device *wsi_device, - bool *has_wp_presentation) + bool *has_wp_presentation, clockid_t *clock_id, + bool *has_commit_timing, bool *has_fifo) { VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface; struct wsi_wayland *wsi = @@ -1800,7 +1803,17 @@ wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface, wsi_device->sw, "mesa check wp_presentation")) return VK_ERROR_SURFACE_LOST_KHR; - *has_wp_presentation = !!display.wp_presentation_notwrapped; + if (has_wp_presentation) + *has_wp_presentation = !!display.wp_presentation_notwrapped; + + if (clock_id) + *clock_id = display.presentation_clock_id; + + if (has_commit_timing) + *has_commit_timing = !!display.commit_timing_manager; + + if (has_fifo) + *has_fifo = !!display.fifo_manager; wsi_wl_display_finish(&display); @@ -1893,7 +1906,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface, bool has_feedback; result = wsi_wl_surface_check_presentation(surface, wsi_device, - &has_feedback); + &has_feedback, NULL, NULL, NULL); if (result != VK_SUCCESS) return result; @@ -1906,7 +1919,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface, bool has_feedback; result = wsi_wl_surface_check_presentation(surface, wsi_device, - &has_feedback); + &has_feedback, NULL, NULL, NULL); if (result != VK_SUCCESS) return result; @@ -1916,11 +1929,45 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface, case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: { VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext; + bool has_feedback, has_commit_timing, has_fifo; wait->presentStageQueries = 0; wait->presentTimingSupported = VK_FALSE; wait->presentAtAbsoluteTimeSupported = VK_FALSE; wait->presentAtRelativeTimeSupported = VK_FALSE; + + clockid_t clock_id; + + result = wsi_wl_surface_check_presentation(surface, wsi_device, + &has_feedback, &clock_id, + &has_commit_timing, &has_fifo); + + if (result != VK_SUCCESS) + return result; + + if (!has_feedback) + break; + + /* We could deal with esoteric clock domains by exposing VK_TIME_DOMAIN_SWAPCHAIN or PRESENT_STAGE_LOCAL, + * but that requires a lot more scaffolding, and there's no need to add extra complexity if we can + * get away with this. */ + if (clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_MONOTONIC_RAW) + break; + + /* Presentation timing spec talks about the reported time targeting "pixel being visible". + * From presentation-time spec: "Note, that if the display path has a non-zero latency, + * the time instant specified by this counter may differ from the timestamp's." + * No compositor I know of reports where it takes display latency into account, + * so it's a little unclear if we should actually be reporting PIXEL_OUT or PIXEL_VISIBLE. + * Choose PIXEL_OUT for now since no known compositor out there actually implements + * PIXEL_VISIBLE as intended, and we don't want to promise something we cannot hold. */ + wait->presentTimingSupported = VK_TRUE; + wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT; + + /* We cannot reliably implement FIFO guarantee + absolute time without the FIFO barrier. + * Presentation timing is only defined to work with FIFO (and its variants like RELAXED and LATEST_READY). */ + wait->presentAtAbsoluteTimeSupported = has_commit_timing && has_fifo; + break; } @@ -2414,6 +2461,7 @@ struct wsi_wl_present_id { * which uses frame callback to signal DRI3 COMPLETE. */ struct wl_callback *frame; uint64_t present_id; + uint64_t timing_serial; struct mesa_trace_flow flow; uint64_t submission_time; const VkAllocationCallbacks *alloc; @@ -2421,6 +2469,7 @@ struct wsi_wl_present_id { uint64_t target_time; uint64_t correction; struct wl_list link; + bool user_target_time; }; static struct wsi_image * @@ -2451,6 +2500,14 @@ wsi_wl_swapchain_set_present_mode(struct wsi_swapchain *wsi_chain, chain->base.present_mode = mode; } +static void +wsi_wl_swapchain_set_timing_request(struct wsi_swapchain *wsi_chain, + const struct wsi_image_timing_request *request) +{ + struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain; + chain->timing_request = *request; +} + static VkResult dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_time) { @@ -2524,6 +2581,15 @@ dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_ return VK_SUCCESS; } +static void +wsi_wl_swapchain_poll_timing_request(struct wsi_swapchain *wsi_chain) +{ + /* Timing requests must complete in finite time, and if we're not calling present wait + * or queue present regularly, timing requests will never come back. */ + struct timespec instant = {0}; + dispatch_present_id_queue(wsi_chain, &instant); +} + static bool wsi_wl_swapchain_present_id_completes_in_finite_time_locked(struct wsi_wl_swapchain *chain, uint64_t present_id) @@ -2825,6 +2891,20 @@ presentation_handle_presented(void *data, struct wsi_wl_swapchain *chain = id->chain; uint64_t target_time = id->target_time; + /* In v1 of presentation time, we can know if we're likely running VRR, given refresh is 0. + * However, we cannot know what the base refresh rate is without some kind of external information. + * We also cannot know if we're actually driving the display in a VRR fashion. + * In v2, we should always know the "base refresh" rate, but that means we cannot know if we're driving + * the display VRR or FRR. We could try to deduce it based on timestamps, but that is too brittle. + * There is a v3 proposal that adds this information more formally so we don't have to guess. + * Knowing VRR or FRR is not mission critical for most use cases, so just report "Unknown" for now. */ + wsi_swapchain_present_timing_update_refresh_rate(&chain->base, refresh, 0, 0); + + /* Notify this before present wait to reduce latency of presentation timing requests + * if the application is driving its queries based off present waits. */ + if (id->timing_serial) + wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, presentation_time); + mtx_lock(&chain->present_ids.lock); chain->present_ids.refresh_nsec = refresh; if (!chain->present_ids.valid_refresh_nsec) { @@ -2836,7 +2916,9 @@ presentation_handle_presented(void *data, if (presentation_time > chain->present_ids.displayed_time) chain->present_ids.displayed_time = presentation_time; - if (target_time && presentation_time > target_time) + /* If we have user-defined target time it can be arbitrarily early, and we don't + * want to start compensating for that error if application stops requesting specific time. */ + if (!id->user_target_time && target_time && presentation_time > target_time) chain->present_ids.display_time_error = presentation_time - target_time; else chain->present_ids.display_time_error = 0; @@ -2851,6 +2933,15 @@ presentation_handle_discarded(void *data) struct wsi_wl_present_id *id = data; struct wsi_wl_swapchain *chain = id->chain; + /* From Vulkan spec: + * "Timing information for some present stages may have a time value of 0, + * indicating that results for that present stage are not available." + * Worst case we can simply take a timestamp of clock_id and pretend, but + * applications may start to latch onto that timestamp as ground truth, which + * is obviously not correct. */ + if (id->timing_serial) + wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, 0); + mtx_lock(&chain->present_ids.lock); if (!chain->present_ids.valid_refresh_nsec) { /* We've started occluded, so make up some safe values to throttle us */ @@ -2905,6 +2996,29 @@ static const struct wl_callback_listener frame_listener = { frame_handle_done, }; +static bool +set_application_driven_timestamp(struct wsi_wl_swapchain *chain, + uint64_t *timestamp, + uint64_t *correction) +{ + if (chain->timing_request.serial && chain->timing_request.time) { + /* Absolute time is requested before we have been able to report a reasonable refresh rate + * to application. This is valid, but we should not try to perform any rounding. + * NEAREST_REFRESH_CYCLE flag cannot be honored because it's impossible to know at this time. */ + struct timespec target_ts; + timespec_from_nsec(&target_ts, chain->timing_request.time); + wp_commit_timer_v1_set_timestamp(chain->commit_timer, + (uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec, + target_ts.tv_nsec); + *timestamp = chain->timing_request.time; + *correction = 0; + chain->present_ids.last_target_time = chain->timing_request.time; + return true; + } else { + return false; + } +} + /* The present_ids lock must be held */ static bool set_timestamp(struct wsi_wl_swapchain *chain, @@ -2918,7 +3032,7 @@ set_timestamp(struct wsi_wl_swapchain *chain, int32_t error = 0; if (!chain->present_ids.valid_refresh_nsec) - return false; + return set_application_driven_timestamp(chain, timestamp, correction); displayed_time = chain->present_ids.displayed_time; refresh = chain->present_ids.refresh_nsec; @@ -2928,7 +3042,7 @@ set_timestamp(struct wsi_wl_swapchain *chain, * timestamps at all, so bail out. */ if (!refresh) - return false; + return set_application_driven_timestamp(chain, timestamp, correction); /* We assume we're being fed at the display's refresh rate, but * if that doesn't happen our timestamps fall into the past. @@ -2946,6 +3060,10 @@ set_timestamp(struct wsi_wl_swapchain *chain, error = chain->present_ids.display_time_error - chain->present_ids.display_time_correction; + /* If we're driving timestamps from application, this is somewhat redundant + * but it will drain out any accumulated display_time_error over time. + * Accumulated errors are expected since application might not + * align the target time perfectly against a refresh cycle. */ target = chain->present_ids.last_target_time; if (error > 0) { target += (error / refresh) * refresh; @@ -2955,19 +3073,41 @@ set_timestamp(struct wsi_wl_swapchain *chain, } chain->present_ids.display_time_correction += *correction; - target = next_phase_locked_time(displayed_time, - refresh, - target); - /* Take back 500 us as a safety margin, to ensure we don't miss our - * target due to round-off error. - */ - timespec_from_nsec(&target_ts, target - 500000); + + if (chain->timing_request.serial && chain->timing_request.time) { + target = chain->timing_request.time; + chain->present_ids.last_target_time = target; + *timestamp = target; + + if (chain->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) + target -= chain->present_ids.refresh_nsec / 2; + + /* Without the flag, the application is supposed to deal with any safety margins on its own. */ + timespec_from_nsec(&target_ts, target); + + /* If we're using commit timing path, we always have FIFO protocol, so we don't have to + * consider scenarios where application is passing a very low present time. + * I.e., there is no need to max() the application timestamp against our estimated next refresh cycle. + * If the surface is occluded, it's possible to render at a higher rate than display refresh rate, + * but that's okay. Those presents will be discarded anyway, and we won't report odd timestamps to application. */ + } else { + target = next_phase_locked_time(displayed_time, + refresh, + target); + + chain->present_ids.last_target_time = target; + *timestamp = target; + + /* Take back 500 us as a safety margin, to ensure we don't miss our + * target due to round-off error. + */ + timespec_from_nsec(&target_ts, target - 500000); + } + wp_commit_timer_v1_set_timestamp(chain->commit_timer, (uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec, target_ts.tv_nsec); - chain->present_ids.last_target_time = target; - *timestamp = target; return true; } @@ -3069,13 +3209,15 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, } if (present_id > 0 || (mode_fifo && chain->commit_timer) || - util_perfetto_is_tracing_enabled()) { + util_perfetto_is_tracing_enabled() || chain->timing_request.serial) { struct wsi_wl_present_id *id = vk_zalloc(chain->wsi_wl_surface->display->wsi_wl->alloc, sizeof(*id), sizeof(uintptr_t), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); id->chain = chain; id->present_id = present_id; id->alloc = chain->wsi_wl_surface->display->wsi_wl->alloc; + id->timing_serial = chain->timing_request.serial; + id->user_target_time = chain->timing_request.time != 0; mtx_lock(&chain->present_ids.lock); @@ -3203,6 +3345,8 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, wsi_wl_surface->display->queue); } + memset(&chain->timing_request, 0, sizeof(chain->timing_request)); + return VK_SUCCESS; } @@ -3437,6 +3581,20 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain, return VK_SUCCESS; } +static VkTimeDomainKHR +clock_id_to_vk_time_domain(clockid_t id) +{ + switch (id) { + case CLOCK_MONOTONIC: + return VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR; + case CLOCK_MONOTONIC_RAW: + return VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR; + default: + /* Default fallback. Will not be used. */ + return VK_TIME_DOMAIN_DEVICE_KHR; + } +} + static VkResult wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, VkDevice device, @@ -3615,6 +3773,12 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->base.queue_present = wsi_wl_swapchain_queue_present; chain->base.release_images = wsi_wl_swapchain_release_images; chain->base.set_present_mode = wsi_wl_swapchain_set_present_mode; + chain->base.set_timing_request = wsi_wl_swapchain_set_timing_request; + chain->base.poll_timing_request = wsi_wl_swapchain_poll_timing_request; + if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) { + chain->base.present_timing.time_domain = + clock_id_to_vk_time_domain(wsi_wl_surface->display->presentation_clock_id); + } chain->base.wait_for_present = wsi_wl_swapchain_wait_for_present; chain->base.wait_for_present2 = wsi_wl_swapchain_wait_for_present2; chain->base.present_mode = present_mode;