From c26ab1aee1dd9a06e17744de7ab6a54530b47ca5 Mon Sep 17 00:00:00 2001 From: Derek Foreman Date: Mon, 4 Dec 2023 12:47:06 -0600 Subject: [PATCH] vulkan/wsi/wayland: Pace frames with commit-timing-v1 Instead of using frame callbacks - which may stop firing if our surface is occluded - use the new commit-timing-v1 protocol in combination with the presentation feedback protocol. If the required protocols are unavailable, or the environment variable MESA_VK_WSI_DEBUG contains "nowlts", we fall back to frame callback based pacing behaviour. Signed-off-by: Derek Foreman Part-of: --- src/egl/wayland/wayland-drm/meson.build | 1 + src/vulkan/wsi/meson.build | 1 + src/vulkan/wsi/wsi_common.c | 2 + src/vulkan/wsi/wsi_common.h | 5 + src/vulkan/wsi/wsi_common_private.h | 1 + src/vulkan/wsi/wsi_common_wayland.c | 250 +++++++++++++++++++++++- 6 files changed, 250 insertions(+), 10 deletions(-) diff --git a/src/egl/wayland/wayland-drm/meson.build b/src/egl/wayland/wayland-drm/meson.build index 04b8917a81d..59ce3442499 100644 --- a/src/egl/wayland/wayland-drm/meson.build +++ b/src/egl/wayland/wayland-drm/meson.build @@ -43,6 +43,7 @@ libwayland_drm = static_library( wp_dir = dep_wl_protocols.get_variable(pkgconfig : 'pkgdatadir', internal : 'pkgdatadir') wp_protos = { 'fifo-v1': 'staging/fifo/fifo-v1.xml', + 'commit-timing-v1': 'staging/commit-timing/commit-timing-v1.xml', 'linux-dmabuf-unstable-v1': 'unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml', 'presentation-time': 'stable/presentation-time/presentation-time.xml', 'tearing-control-v1': 'staging/tearing-control/tearing-control-v1.xml', diff --git a/src/vulkan/wsi/meson.build b/src/vulkan/wsi/meson.build index 11e5926d9af..6933eb780c4 100644 --- a/src/vulkan/wsi/meson.build +++ b/src/vulkan/wsi/meson.build @@ -16,6 +16,7 @@ endif if with_platform_wayland files_vulkan_wsi += files('wsi_common_wayland.c') files_vulkan_wsi += wp_files['fifo-v1'] + files_vulkan_wsi += wp_files['commit-timing-v1'] files_vulkan_wsi += wp_files['linux-dmabuf-unstable-v1'] files_vulkan_wsi += wp_files['presentation-time'] files_vulkan_wsi += wp_files['tearing-control-v1'] diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c index cd63a008c05..548bbc8c930 100644 --- a/src/vulkan/wsi/wsi_common.c +++ b/src/vulkan/wsi/wsi_common.c @@ -55,6 +55,7 @@ static const struct debug_control debug_control[] = { { "noshm", WSI_DEBUG_NOSHM }, { "linear", WSI_DEBUG_LINEAR }, { "dxgi", WSI_DEBUG_DXGI }, + { "nowlts", WSI_DEBUG_NOWLTS }, { NULL, }, }; @@ -86,6 +87,7 @@ wsi_device_init(struct wsi_device *wsi, wsi->sw = device_options->sw_device || (WSI_DEBUG & WSI_DEBUG_SW); wsi->wants_linear = (WSI_DEBUG & WSI_DEBUG_LINEAR) != 0; wsi->x11.extra_xwayland_image = device_options->extra_xwayland_image; + wsi->wayland.disable_timestamps = (WSI_DEBUG & WSI_DEBUG_NOWLTS) != 0; #define WSI_GET_CB(func) \ PFN_vk##func func = (PFN_vk##func)proc_addr(pdevice, "vk" #func) WSI_GET_CB(GetPhysicalDeviceExternalSemaphoreProperties); diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h index eebf519e6bc..fee70909b70 100644 --- a/src/vulkan/wsi/wsi_common.h +++ b/src/vulkan/wsi/wsi_common.h @@ -203,6 +203,11 @@ struct wsi_device { * This requires VK_KHR_timeline_semaphore. */ bool khr_present_wait; + struct { + /* Don't use the commit-timing protocol for pacing */ + bool disable_timestamps; + } wayland; + /* * This sets the ownership for a WSI memory object: * diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h index cdf5be65ae2..8d4c2072003 100644 --- a/src/vulkan/wsi/wsi_common_private.h +++ b/src/vulkan/wsi/wsi_common_private.h @@ -40,6 +40,7 @@ struct wsi_swapchain; #define WSI_DEBUG_NOSHM (1ull << 2) #define WSI_DEBUG_LINEAR (1ull << 3) #define WSI_DEBUG_DXGI (1ull << 4) +#define WSI_DEBUG_NOWLTS (1ull << 5) extern uint64_t WSI_DEBUG; diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c index 23809ee3475..97633f46da1 100644 --- a/src/vulkan/wsi/wsi_common_wayland.c +++ b/src/vulkan/wsi/wsi_common_wayland.c @@ -42,6 +42,7 @@ #include "wsi_common_entrypoints.h" #include "wsi_common_private.h" #include "fifo-v1-client-protocol.h" +#include "commit-timing-v1-client-protocol.h" #include "linux-dmabuf-unstable-v1-client-protocol.h" #include "presentation-time-client-protocol.h" #include "linux-drm-syncobj-v1-client-protocol.h" @@ -116,6 +117,8 @@ struct wsi_wl_display { uint32_t wp_presentation_version; struct wp_fifo_manager_v1 *fifo_manager; + struct wp_commit_timing_manager_v1 *commit_timing_manager; + bool no_timestamps; struct wsi_wayland *wsi_wl; @@ -173,6 +176,12 @@ struct wsi_wl_surface { uint64_t presentation_track_id; } analytics; + uint64_t last_target_time; + uint64_t displayed_time; + bool valid_refresh_nsec; + unsigned int refresh_nsec; + uint64_t display_time_error; + uint64_t display_time_correction; struct zwp_linux_dmabuf_feedback_v1 *wl_dmabuf_feedback; struct dmabuf_feedback dmabuf_feedback, pending_dmabuf_feedback; @@ -185,6 +194,7 @@ struct wsi_wl_swapchain { struct wsi_wl_surface *wsi_wl_surface; struct wp_tearing_control_v1 *tearing_control; struct wp_fifo_v1 *fifo; + struct wp_commit_timer_v1 *commit_timer; struct wl_callback *frame; @@ -255,6 +265,40 @@ stringify_wayland_id(uint32_t id) return out; } +/* Given a time base and a refresh period, find the next + * time past 'from' that is an even multiple of the period + * past the base. + */ +static uint64_t +next_phase_locked_time(uint64_t base, uint64_t period, uint64_t from) +{ + uint64_t target, cycles; + + assert(from != 0); + + if (base == 0) + return from; + + if (period == 0) + period = 16666666; + + /* If our time base is in the future (which can happen when using + * presentation feedback events), target the next possible + * presentation time. + */ + if (base >= from) + return base + period; + + /* Round up our cycle count so imprecision in feedback times doesn't + * lead to a time just after a refresh and a time just before the + * following refresh producing the same cycle count. + */ + cycles = (from - base + period - 1) / period; + target = base + cycles * period; + + return target; +} + static struct wsi_wl_format * wsi_wl_display_add_vk_format(struct wsi_wl_display *display, struct u_vector *formats, @@ -873,6 +917,10 @@ registry_handle_global(void *data, struct wl_registry *registry, } else if (strcmp(interface, wp_fifo_manager_v1_interface.name) == 0) { display->fifo_manager = wl_registry_bind(registry, name, &wp_fifo_manager_v1_interface, 1); + } else if (!display->no_timestamps && + strcmp(interface, wp_commit_timing_manager_v1_interface.name) == 0) { + display->commit_timing_manager = + wl_registry_bind(registry, name, &wp_commit_timing_manager_v1_interface, 1); } } @@ -903,6 +951,8 @@ wsi_wl_display_finish(struct wsi_wl_display *display) wp_presentation_destroy(display->wp_presentation_notwrapped); if (display->fifo_manager) wp_fifo_manager_v1_destroy(display->fifo_manager); + if (display->commit_timing_manager) + wp_commit_timing_manager_v1_destroy(display->commit_timing_manager); if (display->tearing_control_manager) wp_tearing_control_manager_v1_destroy(display->tearing_control_manager); if (display->wl_display_wrapper) @@ -941,6 +991,8 @@ wsi_wl_display_init(struct wsi_wayland *wsi_wl, goto fail; } + display->no_timestamps = wsi_wl->wsi->wayland.disable_timestamps; + wl_proxy_set_queue((struct wl_proxy *) display->wl_display_wrapper, display->queue); @@ -1724,6 +1776,9 @@ static VkResult wsi_wl_surface_init(struct wsi_wl_surface *wsi_wl_surface, } wsi_wl_surface_analytics_init(wsi_wl_surface, pAllocator); + + wsi_wl_surface->valid_refresh_nsec = false; + wsi_wl_surface->refresh_nsec = 0; return VK_SUCCESS; fail: @@ -1777,6 +1832,8 @@ struct wsi_wl_present_id { const VkAllocationCallbacks *alloc; struct wsi_wl_swapchain *chain; int buffer_id; + uint64_t target_time; + uint64_t correction; struct wl_list link; }; @@ -1943,6 +2000,37 @@ wsi_wl_swapchain_wait_for_present(struct wsi_swapchain *wsi_chain, } } +static int +wsi_wl_swapchain_ensure_dispatch(struct wsi_wl_swapchain *chain) +{ + struct wsi_wl_surface *wsi_wl_surface = chain->wsi_wl_surface; + struct wl_display *display = wsi_wl_surface->display->wl_display; + struct timespec timeout = {0, 0}; + int ret = 0; + + mtx_lock(&chain->present_ids.lock); + if (chain->present_ids.dispatch_in_progress) + goto already_dispatching; + + chain->present_ids.dispatch_in_progress = true; + mtx_unlock(&chain->present_ids.lock); + + /* Use a dispatch with an instant timeout because dispatch_pending + * won't read any events in the pipe. + */ + ret = wl_display_dispatch_queue_timeout(display, + chain->present_ids.queue, + &timeout); + + mtx_lock(&chain->present_ids.lock); + u_cnd_monotonic_broadcast(&chain->present_ids.list_advanced); + chain->present_ids.dispatch_in_progress = false; + +already_dispatching: + mtx_unlock(&chain->present_ids.lock); + return ret; +} + static VkResult wsi_wl_swapchain_acquire_next_image_explicit(struct wsi_swapchain *wsi_chain, const VkAcquireNextImageInfoKHR *info, @@ -2005,6 +2093,15 @@ wsi_wl_swapchain_acquire_next_image_implicit(struct wsi_swapchain *wsi_chain, timespec_add(&end_time, &rel_timeout, &start_time); while (1) { + /* If we can use timestamps, we want to make sure the queue feedback + * events are in is dispatched so we eventually get a refresh rate + * and a vsync time to phase lock to. We don't need to wait for it + * now. + */ + if (chain->commit_timer) { + if (wsi_wl_swapchain_ensure_dispatch(chain) == -1) + return VK_ERROR_OUT_OF_DATE_KHR; + } /* Try to find a free image. */ for (uint32_t i = 0; i < chain->base.image_count; i++) { if (!chain->images[i].busy) { @@ -2043,6 +2140,7 @@ wsi_wl_presentation_update_present_id(struct wsi_wl_present_id *id) if (id->present_id > id->chain->present_ids.max_completed) id->chain->present_ids.max_completed = id->present_id; + id->chain->wsi_wl_surface->display_time_correction -= id->correction; wl_list_remove(&id->link); mtx_unlock(&id->chain->present_ids.lock); vk_free(id->alloc, id); @@ -2094,11 +2192,31 @@ presentation_handle_presented(void *data, MESA_TRACE_FUNC_FLOW(&id->flow_id); + struct wsi_wl_swapchain *chain = id->chain; + struct wsi_wl_surface *surface = chain->wsi_wl_surface; + uint64_t target_time = id->target_time; + + surface->refresh_nsec = refresh; + presentation_ts.tv_sec = ((uint64_t)tv_sec_hi << 32) + tv_sec_lo; presentation_ts.tv_nsec = tv_nsec; presentation_time = timespec_to_nsec(&presentation_ts); trace_present(id, presentation_time); + if (!surface->valid_refresh_nsec) { + surface->valid_refresh_nsec = true; + surface->last_target_time = presentation_time; + target_time = presentation_time; + } + + if (presentation_time > surface->displayed_time) + surface->displayed_time = presentation_time; + + if (target_time && presentation_time > target_time) + surface->display_time_error = presentation_time - target_time; + else + surface->display_time_error = 0; + wsi_wl_presentation_update_present_id(id); wp_presentation_feedback_destroy(feedback); } @@ -2110,6 +2228,16 @@ presentation_handle_discarded(void *data, struct wsi_wl_present_id *id = data; MESA_TRACE_FUNC_FLOW(&id->flow_id); + struct wsi_wl_swapchain *chain = id->chain; + struct wsi_wl_surface *surface = chain->wsi_wl_surface; + + if (!surface->valid_refresh_nsec) { + /* We've started occluded, so make up some safe values to throttle us */ + surface->displayed_time = os_time_get_nano(); + surface->last_target_time = surface->displayed_time; + surface->refresh_nsec = 16666666; + surface->valid_refresh_nsec = true; + } wsi_wl_presentation_update_present_id(id); wp_presentation_feedback_destroy(feedback); @@ -2149,6 +2277,71 @@ static const struct wl_callback_listener frame_listener = { frame_handle_done, }; +static bool +set_timestamp(struct wsi_wl_swapchain *chain, + uint64_t *timestamp, + uint64_t *correction) +{ + struct wsi_wl_surface *surface = chain->wsi_wl_surface; + uint64_t target; + struct timespec target_ts; + uint64_t refresh; + uint64_t displayed_time; + int32_t error = 0; + + if (!surface->valid_refresh_nsec) + return false; + + displayed_time = surface->displayed_time; + refresh = surface->refresh_nsec; + + /* If refresh is 0, presentation feedback has informed us we have no + * fixed refresh cycle. In that case we can't generate sensible + * timestamps at all, so bail out. + */ + if (!refresh) + return false; + + /* We assume we're being fed at the display's refresh rate, but + * if that doesn't happen our timestamps fall into the past. + * + * This would result in an offscreen surface being unthrottled until + * it "catches up" on missed frames. Instead, correct for missed + * frame opportunities by jumping forward if our display time + * didn't match our target time. + * + * Since we might have a few frames in flight, we need to keep a + * running tally of how much correction we're applying and remove + * it as corrected frames are retired. + */ + if (surface->display_time_error > surface->display_time_correction) + error = surface->display_time_error - surface->display_time_correction; + + target = surface->last_target_time; + if (error > 0) { + target += (error / refresh) * refresh; + *correction = (error / refresh) * refresh; + } else { + *correction = 0; + } + + surface->display_time_correction += *correction; + target = next_phase_locked_time(displayed_time, + refresh, + target); + /* Take back 500 us as a safety margin, to ensure we don't miss our + * target due to round-off error. + */ + timespec_from_nsec(&target_ts, target - 500000); + wp_commit_timer_v1_set_timestamp(chain->commit_timer, + (uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec, + target_ts.tv_nsec); + + surface->last_target_time = target; + *timestamp = target; + return true; +} + static VkResult wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, uint32_t image_index, @@ -2156,7 +2349,9 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, const VkPresentRegionKHR *damage) { struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain; + bool timestamped = false; bool queue_dispatched = false; + bool need_legacy_throttling = true; uint64_t flow_id = chain->images[image_index].flow_id; MESA_TRACE_FUNC_FLOW(&flow_id); @@ -2232,16 +2427,8 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, wl_surface_damage(wsi_wl_surface->surface, 0, 0, INT32_MAX, INT32_MAX); } - if (mode_fifo && !chain->fifo) { - chain->frame = wl_surface_frame(wsi_wl_surface->surface); - wl_callback_add_listener(chain->frame, &frame_listener, chain); - chain->legacy_fifo_ready = false; - } else { - /* If we present MAILBOX, any subsequent presentation in FIFO can replace this image. */ - chain->legacy_fifo_ready = true; - } - - if (present_id > 0 || util_perfetto_is_tracing_enabled()) { + if (present_id > 0 || (mode_fifo && chain->commit_timer) || + util_perfetto_is_tracing_enabled()) { struct wsi_wl_present_id *id = vk_zalloc(chain->wsi_wl_surface->display->wsi_wl->alloc, sizeof(*id), sizeof(uintptr_t), VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); @@ -2254,6 +2441,12 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, id->submission_time = os_time_get_nano(); + if (mode_fifo && chain->fifo && chain->commit_timer) { + timestamped = set_timestamp(chain, &id->target_time, &id->correction); + if (timestamped || !wsi_wl_surface->valid_refresh_nsec) + need_legacy_throttling = false; + } + mtx_lock(&chain->present_ids.lock); if (chain->present_ids.wp_presentation) { @@ -2273,8 +2466,34 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain, chain->images[image_index].busy = true; + if (mode_fifo && need_legacy_throttling) { + chain->frame = wl_surface_frame(wsi_wl_surface->surface); + wl_callback_add_listener(chain->frame, &frame_listener, chain); + chain->legacy_fifo_ready = false; + } else { + /* If we present MAILBOX, any subsequent presentation in FIFO can replace this image. */ + chain->legacy_fifo_ready = true; + } + if (mode_fifo && chain->fifo) { wp_fifo_v1_set_barrier(chain->fifo); + + /* If our surface is occluded and we're using vkWaitForPresentKHR, + * we can end up waiting forever. The FIFO condition and the time + * constraint are met, but the image hasn't been presented because + * we're occluded - but the image isn't discarded because there + * are no further content updates for the compositor to process. + * + * This extra commit gives us the second content update to move + * things along. If we're occluded the FIFO constraint is + * satisfied immediately after the time constraint is, pushing + * out a discard. If we're visible, the timed content update + * receives presented feedback and the FIFO one blocks further + * updates until the next refresh. + */ + if (timestamped) + wl_surface_commit(wsi_wl_surface->surface); + wp_fifo_v1_wait_barrier(chain->fifo); } wl_surface_commit(wsi_wl_surface->surface); @@ -2495,6 +2714,9 @@ wsi_wl_swapchain_chain_free(struct wsi_wl_swapchain *chain, if (chain->fifo) wp_fifo_v1_destroy(chain->fifo); + if (chain->commit_timer) + wp_commit_timer_v1_destroy(chain->commit_timer); + wsi_swapchain_finish(&chain->base); } @@ -2566,6 +2788,10 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, wp_fifo_v1_destroy(old_chain->fifo); old_chain->fifo = NULL; } + if (old_chain->commit_timer) { + wp_commit_timer_v1_destroy(old_chain->commit_timer); + old_chain->commit_timer = NULL; + } } /* Take ownership of the wsi_wl_surface */ @@ -2720,6 +2946,10 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface, chain->fifo = wp_fifo_manager_v1_get_fifo(dpy->fifo_manager, chain->wsi_wl_surface->surface); } + if (dpy->commit_timing_manager) { + chain->commit_timer = wp_commit_timing_manager_v1_get_timer(dpy->commit_timing_manager, + chain->wsi_wl_surface->surface); + } for (uint32_t i = 0; i < chain->base.image_count; i++) { result = wsi_wl_image_init(chain, &chain->images[i],