mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 05:10:11 +01:00
Merge branch 'present-timing' into 'main'
vulkan/wsi: Implement EXT_present_timing. See merge request mesa/mesa!38770
This commit is contained in:
commit
32a5663d35
21 changed files with 1845 additions and 201 deletions
|
|
@ -658,6 +658,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_EXT_physical_device_drm DONE (anv, hasvk, hk, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
||||
VK_EXT_pipeline_library_group_handles DONE (anv, lvp, radv, vn)
|
||||
VK_EXT_post_depth_coverage DONE (anv/gfx11+, lvp, nvk, radv/gfx10+, tu, vn)
|
||||
VK_EXT_present_timing DONE (anv, hk, nvk, radv, tu)
|
||||
VK_EXT_primitive_topology_list_restart DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn, nvk)
|
||||
VK_EXT_primitives_generated_query DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
|
||||
VK_EXT_provoking_vertex DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)
|
||||
|
|
|
|||
|
|
@ -20,3 +20,4 @@ VK_KHR_surface_maintenance1 promotion everywhere EXT is exposed
|
|||
VK_KHR_swapchain_maintenance1 promotion everywhere EXT is exposed
|
||||
VK_KHR_dynamic_rendering on PowerVR
|
||||
VK_EXT_multisampled_render_to_single_sampled on panvk
|
||||
VK_EXT_present_timing on RADV, NVK, Turnip, ANV, Honeykrisp
|
||||
|
|
|
|||
|
|
@ -791,6 +791,10 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
|
|||
.EXT_pipeline_library_group_handles = radv_enable_rt(pdev),
|
||||
.EXT_pipeline_robustness = !pdev->use_llvm,
|
||||
.EXT_post_depth_coverage = pdev->info.gfx_level >= GFX10,
|
||||
#ifdef RADV_USE_WSI_PLATFORM
|
||||
/* KHR_calibrated_timestamps is a requirement to expose EXT_present_timing. */
|
||||
.EXT_present_timing = radv_calibrated_timestamps_enabled(pdev),
|
||||
#endif
|
||||
.EXT_primitive_topology_list_restart = true,
|
||||
.EXT_primitives_generated_query = true,
|
||||
.EXT_private_data = true,
|
||||
|
|
@ -1481,6 +1485,14 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
|
|||
|
||||
/* VK_EXT_custom_resolve */
|
||||
.customResolve = true,
|
||||
|
||||
#ifdef RADV_USE_WSI_PLATFORM
|
||||
/* VK_EXT_present_timing */
|
||||
/* The actual query is deferred to surface time. */
|
||||
.presentTiming = true,
|
||||
.presentAtAbsoluteTime = true,
|
||||
.presentAtRelativeTime = true,
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -181,6 +181,9 @@ hk_get_device_extensions(const struct hk_instance *instance,
|
|||
.EXT_pipeline_protected_access = true,
|
||||
.EXT_pipeline_robustness = true,
|
||||
.EXT_physical_device_drm = true,
|
||||
#ifdef HK_USE_WSI_PLATFORM
|
||||
.EXT_present_timing = true,
|
||||
#endif
|
||||
.EXT_primitive_topology_list_restart = true,
|
||||
.EXT_private_data = true,
|
||||
.EXT_primitives_generated_query = false,
|
||||
|
|
@ -623,6 +626,13 @@ hk_get_device_features(
|
|||
|
||||
/* VK_KHR_shader_relaxed_extended_instruction */
|
||||
.shaderRelaxedExtendedInstruction = true,
|
||||
|
||||
#ifdef HK_USE_WSI_PLATFORM
|
||||
/* VK_EXT_present_timing */
|
||||
.presentTiming = true,
|
||||
.presentAtRelativeTime = true,
|
||||
.presentAtAbsoluteTime = true,
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -792,17 +792,6 @@ dri2_fourcc_for_depth(struct dri2_egl_display *dri2_dpy, uint32_t depth)
|
|||
}
|
||||
}
|
||||
|
||||
static int
|
||||
box_intersection_area(int16_t a_x, int16_t a_y, int16_t a_width,
|
||||
int16_t a_height, int16_t b_x, int16_t b_y,
|
||||
int16_t b_width, int16_t b_height)
|
||||
{
|
||||
int w = MIN2(a_x + a_width, b_x + b_width) - MAX2(a_x, b_x);
|
||||
int h = MIN2(a_y + a_height, b_y + b_height) - MAX2(a_y, b_y);
|
||||
|
||||
return (w < 0 || h < 0) ? 0 : w * h;
|
||||
}
|
||||
|
||||
EGLBoolean
|
||||
dri2_x11_get_msc_rate(_EGLDisplay *display, _EGLSurface *surface,
|
||||
EGLint *numerator, EGLint *denominator)
|
||||
|
|
|
|||
|
|
@ -314,6 +314,9 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.EXT_physical_device_drm = !is_kgsl(device->instance),
|
||||
.EXT_pipeline_creation_cache_control = true,
|
||||
.EXT_pipeline_creation_feedback = true,
|
||||
#ifdef TU_USE_WSI_PLATFORM
|
||||
.EXT_present_timing = device->info->props.has_persistent_counter,
|
||||
#endif
|
||||
.EXT_primitive_topology_list_restart = true,
|
||||
.EXT_primitives_generated_query = true,
|
||||
.EXT_private_data = true,
|
||||
|
|
@ -825,6 +828,13 @@ tu_get_features(struct tu_physical_device *pdevice,
|
|||
|
||||
/* VK_EXT_custom_resolve */
|
||||
features->customResolve = true;
|
||||
|
||||
#ifdef TU_USE_WSI_PLATFORM
|
||||
/* VK_EXT_present_timing */
|
||||
features->presentTiming = true;
|
||||
features->presentAtRelativeTime = true;
|
||||
features->presentAtAbsoluteTime = true;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -354,6 +354,9 @@ get_device_extensions(const struct anv_physical_device *device,
|
|||
.EXT_pipeline_protected_access = device->has_protected_contexts,
|
||||
.EXT_pipeline_robustness = true,
|
||||
.EXT_post_depth_coverage = true,
|
||||
#ifdef ANV_USE_WSI_PLATFORM
|
||||
.EXT_present_timing = device->has_reg_timestamp,
|
||||
#endif
|
||||
.EXT_primitive_topology_list_restart = true,
|
||||
.EXT_primitives_generated_query = true,
|
||||
.EXT_private_data = true,
|
||||
|
|
@ -1005,6 +1008,13 @@ get_features(const struct anv_physical_device *pdevice,
|
|||
|
||||
/* VK_KHR_pipeline_binary */
|
||||
.pipelineBinaries = true,
|
||||
|
||||
#ifdef ANV_USE_WSI_PLATFORM
|
||||
/* VK_EXT_present_timing */
|
||||
.presentTiming = true,
|
||||
.presentAtRelativeTime = true,
|
||||
.presentAtAbsoluteTime = true,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* The new DOOM and Wolfenstein games require depthBounds without
|
||||
|
|
|
|||
|
|
@ -29,36 +29,7 @@
|
|||
#include <c11/threads.h>
|
||||
#include "util/format/u_formats.h"
|
||||
|
||||
#ifdef HAVE_X11_PLATFORM
|
||||
#include <xcb/xcb.h>
|
||||
#include <xcb/dri3.h>
|
||||
#include <xcb/present.h>
|
||||
|
||||
struct loader_crtc_info {
|
||||
xcb_randr_crtc_t id;
|
||||
xcb_timestamp_t timestamp;
|
||||
|
||||
int16_t x, y;
|
||||
uint16_t width, height;
|
||||
|
||||
unsigned refresh_numerator;
|
||||
unsigned refresh_denominator;
|
||||
};
|
||||
|
||||
struct loader_screen_resources {
|
||||
mtx_t mtx;
|
||||
|
||||
xcb_connection_t *conn;
|
||||
xcb_screen_t *screen;
|
||||
|
||||
xcb_timestamp_t config_timestamp;
|
||||
|
||||
/* Number of CRTCs with an active mode set */
|
||||
unsigned num_crtcs;
|
||||
struct loader_crtc_info *crtcs;
|
||||
};
|
||||
#endif
|
||||
|
||||
#include "loader_dri_helper_screen.h"
|
||||
|
||||
/**
|
||||
* These formats are endian independent they result in the same layout
|
||||
|
|
@ -110,16 +81,4 @@ loader_pipe_format_to_fourcc(enum pipe_format pipe);
|
|||
enum pipe_format
|
||||
loader_fourcc_to_pipe_format(uint32_t fourcc);
|
||||
|
||||
#ifdef HAVE_X11_PLATFORM
|
||||
void
|
||||
loader_init_screen_resources(struct loader_screen_resources *res,
|
||||
xcb_connection_t *conn,
|
||||
xcb_screen_t *screen);
|
||||
bool
|
||||
loader_update_screen_resources(struct loader_screen_resources *res);
|
||||
|
||||
void
|
||||
loader_destroy_screen_resources(struct loader_screen_resources *res);
|
||||
#endif
|
||||
|
||||
#endif /* LOADER_DRI_HELPER_H */
|
||||
|
|
|
|||
76
src/loader/loader_dri_helper_screen.h
Normal file
76
src/loader/loader_dri_helper_screen.h
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Permission to use, copy, modify, distribute, and sell this software and its
|
||||
* documentation for any purpose is hereby granted without fee, provided that
|
||||
* the above copyright notice appear in all copies and that both that copyright
|
||||
* notice and this permission notice appear in supporting documentation, and
|
||||
* that the name of the copyright holders not be used in advertising or
|
||||
* publicity pertaining to distribution of the software without specific,
|
||||
* written prior permission. The copyright holders make no representations
|
||||
* about the suitability of this software for any purpose. It is provided "as
|
||||
* is" without express or implied warranty.
|
||||
*
|
||||
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
||||
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
|
||||
* EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
||||
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
||||
* OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef LOADER_DRI_HELPER_SCREEN_H
|
||||
#define LOADER_DRI_HELPER_SCREEN_H
|
||||
|
||||
#ifdef HAVE_X11_PLATFORM
|
||||
#include <xcb/xcb.h>
|
||||
#include <xcb/dri3.h>
|
||||
#include <xcb/present.h>
|
||||
|
||||
struct loader_crtc_info {
|
||||
xcb_randr_crtc_t id;
|
||||
xcb_timestamp_t timestamp;
|
||||
|
||||
int16_t x, y;
|
||||
uint16_t width, height;
|
||||
|
||||
unsigned refresh_numerator;
|
||||
unsigned refresh_denominator;
|
||||
};
|
||||
|
||||
struct loader_screen_resources {
|
||||
mtx_t mtx;
|
||||
|
||||
xcb_connection_t *conn;
|
||||
xcb_screen_t *screen;
|
||||
|
||||
xcb_timestamp_t config_timestamp;
|
||||
|
||||
/* Number of CRTCs with an active mode set */
|
||||
unsigned num_crtcs;
|
||||
struct loader_crtc_info *crtcs;
|
||||
};
|
||||
|
||||
void
|
||||
loader_init_screen_resources(struct loader_screen_resources *res,
|
||||
xcb_connection_t *conn,
|
||||
xcb_screen_t *screen);
|
||||
bool
|
||||
loader_update_screen_resources(struct loader_screen_resources *res);
|
||||
|
||||
void
|
||||
loader_destroy_screen_resources(struct loader_screen_resources *res);
|
||||
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
box_intersection_area(int16_t a_x, int16_t a_y, int16_t a_width,
|
||||
int16_t a_height, int16_t b_x, int16_t b_y,
|
||||
int16_t b_width, int16_t b_height)
|
||||
{
|
||||
int w = MIN2(a_x + a_width, b_x + b_width) - MAX2(a_x, b_x);
|
||||
int h = MIN2(a_y + a_height, b_y + b_height) - MAX2(a_y, b_y);
|
||||
|
||||
return (w < 0 || h < 0) ? 0 : w * h;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -49,7 +49,7 @@ endif
|
|||
if with_platform_x11
|
||||
subdir('x11')
|
||||
endif
|
||||
if with_gallium_or_lvp or with_gbm or with_platform_wayland
|
||||
if with_gallium_or_lvp or with_gbm or with_platform_wayland or with_platform_x11 or with_platform_xcb
|
||||
subdir('loader')
|
||||
endif
|
||||
subdir('compiler')
|
||||
|
|
|
|||
|
|
@ -262,6 +262,9 @@ nvk_get_device_extensions(const struct nvk_instance *instance,
|
|||
.EXT_pipeline_robustness = true,
|
||||
.EXT_physical_device_drm = true,
|
||||
.EXT_post_depth_coverage = info->cls_eng3d >= MAXWELL_B,
|
||||
#ifdef NVK_USE_WSI_PLATFORM
|
||||
.EXT_present_timing = true,
|
||||
#endif
|
||||
.EXT_primitive_topology_list_restart = true,
|
||||
.EXT_private_data = true,
|
||||
.EXT_primitives_generated_query = true,
|
||||
|
|
@ -753,6 +756,11 @@ nvk_get_device_features(const struct nv_device_info *info,
|
|||
|
||||
/* VK_KHR_present_wait2 */
|
||||
.presentWait2 = true,
|
||||
|
||||
/* VK_EXT_present_timing */
|
||||
.presentTiming = true,
|
||||
.presentAtRelativeTime = true,
|
||||
.presentAtAbsoluteTime = true,
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@ if with_platform_wayland
|
|||
files_vulkan_wsi += wp_files['color-management-v1']
|
||||
endif
|
||||
|
||||
if with_platform_x11 or with_platform_xcb
|
||||
links_vulkan_wsi += libloader
|
||||
endif
|
||||
|
||||
if with_platform_windows
|
||||
files_vulkan_wsi += files('wsi_common_win32.cpp')
|
||||
platform_deps += dep_dxheaders
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ wsi_device_init(struct wsi_device *wsi,
|
|||
WSI_GET_CB(GetPhysicalDeviceProperties2);
|
||||
WSI_GET_CB(GetPhysicalDeviceMemoryProperties);
|
||||
WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
|
||||
WSI_GET_CB(GetPhysicalDeviceProperties);
|
||||
#undef WSI_GET_CB
|
||||
|
||||
wsi->drm_info.sType =
|
||||
|
|
@ -121,10 +122,18 @@ wsi_device_init(struct wsi_device *wsi,
|
|||
VkQueueFamilyProperties queue_properties[64];
|
||||
GetPhysicalDeviceQueueFamilyProperties(pdevice, &wsi->queue_family_count, queue_properties);
|
||||
|
||||
VkPhysicalDeviceProperties properties;
|
||||
GetPhysicalDeviceProperties(pdevice, &properties);
|
||||
wsi->timestamp_period = properties.limits.timestampPeriod;
|
||||
|
||||
for (unsigned i = 0; i < wsi->queue_family_count; i++) {
|
||||
VkFlags req_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
|
||||
if (queue_properties[i].queueFlags & req_flags)
|
||||
wsi->queue_supports_blit |= BITFIELD64_BIT(i);
|
||||
|
||||
/* Don't want to consider timestamp wrapping logic. */
|
||||
if (queue_properties[i].timestampValidBits == 64)
|
||||
wsi->queue_supports_timestamps |= BITFIELD64_BIT(i);
|
||||
}
|
||||
|
||||
for (VkExternalSemaphoreHandleTypeFlags handle_type = 1;
|
||||
|
|
@ -180,15 +189,19 @@ wsi_device_init(struct wsi_device *wsi,
|
|||
WSI_GET_CB(CmdPipelineBarrier);
|
||||
WSI_GET_CB(CmdCopyImage);
|
||||
WSI_GET_CB(CmdCopyImageToBuffer);
|
||||
WSI_GET_CB(CmdResetQueryPool);
|
||||
WSI_GET_CB(CmdWriteTimestamp);
|
||||
WSI_GET_CB(CreateBuffer);
|
||||
WSI_GET_CB(CreateCommandPool);
|
||||
WSI_GET_CB(CreateFence);
|
||||
WSI_GET_CB(CreateImage);
|
||||
WSI_GET_CB(CreateQueryPool);
|
||||
WSI_GET_CB(CreateSemaphore);
|
||||
WSI_GET_CB(DestroyBuffer);
|
||||
WSI_GET_CB(DestroyCommandPool);
|
||||
WSI_GET_CB(DestroyFence);
|
||||
WSI_GET_CB(DestroyImage);
|
||||
WSI_GET_CB(DestroyQueryPool);
|
||||
WSI_GET_CB(DestroySemaphore);
|
||||
WSI_GET_CB(EndCommandBuffer);
|
||||
WSI_GET_CB(FreeMemory);
|
||||
|
|
@ -200,9 +213,14 @@ wsi_device_init(struct wsi_device *wsi,
|
|||
WSI_GET_CB(GetImageSubresourceLayout);
|
||||
if (!wsi->sw)
|
||||
WSI_GET_CB(GetMemoryFdKHR);
|
||||
WSI_GET_CB(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
|
||||
WSI_GET_CB(GetPhysicalDeviceProperties);
|
||||
WSI_GET_CB(GetPhysicalDeviceFormatProperties);
|
||||
WSI_GET_CB(GetPhysicalDeviceFormatProperties2);
|
||||
WSI_GET_CB(GetPhysicalDeviceImageFormatProperties2);
|
||||
WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
|
||||
WSI_GET_CB(GetCalibratedTimestampsKHR);
|
||||
WSI_GET_CB(GetQueryPoolResults);
|
||||
WSI_GET_CB(GetSemaphoreFdKHR);
|
||||
WSI_GET_CB(ResetFences);
|
||||
WSI_GET_CB(QueueSubmit2);
|
||||
|
|
@ -481,8 +499,10 @@ wsi_swapchain_init(const struct wsi_device *wsi,
|
|||
chain->blit.type = get_blit_type(wsi, image_params, _device);
|
||||
|
||||
chain->blit.queue = NULL;
|
||||
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT) {
|
||||
if (wsi->get_blit_queue) {
|
||||
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT ||
|
||||
(pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT)) {
|
||||
|
||||
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT && wsi->get_blit_queue) {
|
||||
chain->blit.queue = wsi->get_blit_queue(_device);
|
||||
}
|
||||
|
||||
|
|
@ -503,10 +523,18 @@ wsi_swapchain_init(const struct wsi_device *wsi,
|
|||
if (chain->blit.queue != NULL) {
|
||||
queue_family_index = chain->blit.queue->queue_family_index;
|
||||
} else {
|
||||
uint64_t effective_queues = wsi->queue_supports_blit;
|
||||
if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT)
|
||||
effective_queues &= wsi->queue_supports_timestamps;
|
||||
|
||||
/* Fallback. If this happens we don't advertise support for queue complete times. */
|
||||
if (!effective_queues)
|
||||
effective_queues = wsi->queue_supports_blit;
|
||||
|
||||
/* Queues returned by get_blit_queue() might not be listed in
|
||||
* GetPhysicalDeviceQueueFamilyProperties, so this check is skipped for those queues.
|
||||
*/
|
||||
if (!(wsi->queue_supports_blit & BITFIELD64_BIT(queue_family_index)))
|
||||
if (!(effective_queues & BITFIELD64_BIT(queue_family_index)))
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -616,7 +644,7 @@ wsi_swapchain_finish(struct wsi_swapchain *chain)
|
|||
chain->wsi->DestroySemaphore(chain->device, chain->present_id_timeline,
|
||||
&chain->alloc);
|
||||
|
||||
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT) {
|
||||
if (chain->cmd_pools) {
|
||||
int cmd_pools_count = chain->blit.queue != NULL ?
|
||||
1 : chain->wsi->queue_family_count;
|
||||
for (uint32_t i = 0; i < cmd_pools_count; i++) {
|
||||
|
|
@ -628,6 +656,12 @@ wsi_swapchain_finish(struct wsi_swapchain *chain)
|
|||
vk_free(&chain->alloc, chain->cmd_pools);
|
||||
}
|
||||
|
||||
if (chain->present_timing.active) {
|
||||
mtx_destroy(&chain->present_timing.lock);
|
||||
if (chain->present_timing.timings)
|
||||
vk_free(&chain->alloc, chain->present_timing.timings);
|
||||
}
|
||||
|
||||
vk_object_base_finish(&chain->base);
|
||||
}
|
||||
|
||||
|
|
@ -815,6 +849,88 @@ fail:
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the timestamp-query command buffers for the end of rendering, that
|
||||
* will be used to report QUEUE_COMPLETE timestamp for EXT_present_timing.
|
||||
*
|
||||
* Unless the swapchain is blitting, we don't know what queue family a Present
|
||||
* will happen on. So we make a timestamp command buffer for each so they're
|
||||
* ready to go at present time.
|
||||
*/
|
||||
VkResult
|
||||
wsi_image_init_timestamp(const struct wsi_swapchain *chain,
|
||||
struct wsi_image *image)
|
||||
{
|
||||
const struct wsi_device *wsi = chain->wsi;
|
||||
VkResult result;
|
||||
/* Set up command buffer to get timestamp info */
|
||||
|
||||
result = wsi->CreateQueryPool(
|
||||
chain->device,
|
||||
&(const VkQueryPoolCreateInfo){
|
||||
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||
.queryType = VK_QUERY_TYPE_TIMESTAMP,
|
||||
.queryCount = 1,
|
||||
},
|
||||
NULL,
|
||||
&image->query_pool);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
uint32_t family_count = chain->blit.queue ? 1 : wsi->queue_family_count;
|
||||
|
||||
if (!image->timestamp_cmd_buffers) {
|
||||
image->timestamp_cmd_buffers =
|
||||
vk_zalloc(&chain->alloc, sizeof(VkCommandBuffer) * family_count, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!image->timestamp_cmd_buffers)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < family_count; i++) {
|
||||
/* We can only use timestamps on a queue that reports timestamp bits != 0.
|
||||
* Since we don't consider device timestamp wrapping in this implementation (unclear how that would ever work),
|
||||
* only report queue done where timestamp bits == 64. */
|
||||
if (!chain->cmd_pools[i])
|
||||
continue;
|
||||
|
||||
result = wsi->AllocateCommandBuffers(
|
||||
chain->device,
|
||||
&(const VkCommandBufferAllocateInfo){
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
|
||||
.pNext = NULL,
|
||||
.commandPool = chain->cmd_pools[i],
|
||||
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
|
||||
.commandBufferCount = 1,
|
||||
}, &image->timestamp_cmd_buffers[i]);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
wsi->BeginCommandBuffer(
|
||||
image->timestamp_cmd_buffers[i],
|
||||
&(VkCommandBufferBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
|
||||
});
|
||||
|
||||
wsi->CmdResetQueryPool(image->timestamp_cmd_buffers[i],
|
||||
image->query_pool,
|
||||
0, 1);
|
||||
|
||||
wsi->CmdWriteTimestamp(image->timestamp_cmd_buffers[i],
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
image->query_pool,
|
||||
0);
|
||||
|
||||
wsi->EndCommandBuffer(image->timestamp_cmd_buffers[i]);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
fail:
|
||||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
wsi_destroy_image(const struct wsi_swapchain *chain,
|
||||
struct wsi_image *image)
|
||||
|
|
@ -850,6 +966,19 @@ wsi_destroy_image(const struct wsi_swapchain *chain,
|
|||
vk_free(&chain->alloc, image->blit.cmd_buffers);
|
||||
}
|
||||
|
||||
wsi->DestroyQueryPool(chain->device, image->query_pool, NULL);
|
||||
|
||||
if (image->timestamp_cmd_buffers) {
|
||||
uint32_t family_count = chain->blit.queue ? 1 : wsi->queue_family_count;
|
||||
for (uint32_t i = 0; i < family_count; i++) {
|
||||
if (image->timestamp_cmd_buffers[i]) {
|
||||
wsi->FreeCommandBuffers(chain->device, chain->cmd_pools[i],
|
||||
1, &image->timestamp_cmd_buffers[i]);
|
||||
}
|
||||
}
|
||||
vk_free(&chain->alloc, image->timestamp_cmd_buffers);
|
||||
}
|
||||
|
||||
wsi->FreeMemory(chain->device, image->memory, &chain->alloc);
|
||||
wsi->DestroyImage(chain->device, image->image, &chain->alloc);
|
||||
wsi->DestroyImage(chain->device, image->blit.image, &chain->alloc);
|
||||
|
|
@ -912,8 +1041,43 @@ wsi_GetPhysicalDeviceSurfaceCapabilities2KHR(
|
|||
struct wsi_device *wsi_device = device->wsi_device;
|
||||
struct wsi_interface *iface = wsi_device->wsi[surface->platform];
|
||||
|
||||
return iface->get_capabilities2(surface, wsi_device, pSurfaceInfo->pNext,
|
||||
pSurfaceCapabilities);
|
||||
VkResult vr = iface->get_capabilities2(surface, wsi_device, pSurfaceInfo->pNext,
|
||||
pSurfaceCapabilities);
|
||||
if (vr != VK_SUCCESS)
|
||||
return vr;
|
||||
|
||||
struct VkPresentTimingSurfaceCapabilitiesEXT *present_timing =
|
||||
vk_find_struct(pSurfaceCapabilities, PRESENT_TIMING_SURFACE_CAPABILITIES_EXT);
|
||||
|
||||
if (present_timing && present_timing->presentTimingSupported) {
|
||||
if (wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps) {
|
||||
/* Make sure the implementation is capable of calibrating timestamps. */
|
||||
if (wsi_device->GetPhysicalDeviceCalibrateableTimeDomainsKHR && wsi_device->GetCalibratedTimestampsKHR) {
|
||||
VkTimeDomainKHR domains[64];
|
||||
uint32_t count = ARRAY_SIZE(domains);
|
||||
wsi_device->GetPhysicalDeviceCalibrateableTimeDomainsKHR(wsi_device->pdevice, &count, domains);
|
||||
|
||||
bool supports_device = false, supports_monotonic = false, supports_monotonic_raw = false;
|
||||
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
if (domains[i] == VK_TIME_DOMAIN_DEVICE_KHR)
|
||||
supports_device = true;
|
||||
else if (domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR)
|
||||
supports_monotonic = true;
|
||||
else if (domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR)
|
||||
supports_monotonic_raw = true;
|
||||
}
|
||||
|
||||
/* Current present timing implementations do not use anything outside these.
|
||||
* QPC might be relevant for Dozen at some point, but for now, we only consider Linux-centric
|
||||
* platforms for present timing. */
|
||||
if (supports_device && supports_monotonic && supports_monotonic_raw)
|
||||
present_timing->presentStageQueries |= VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return vr;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
|
|
@ -1112,6 +1276,32 @@ wsi_CreateSwapchainKHR(VkDevice _device,
|
|||
|
||||
*pSwapchain = wsi_swapchain_to_handle(swapchain);
|
||||
|
||||
if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) {
|
||||
swapchain->present_timing.active = true;
|
||||
mtx_init(&swapchain->present_timing.lock, 0);
|
||||
|
||||
for (uint32_t i = 0; i < swapchain->image_count; i++) {
|
||||
struct wsi_image *image = swapchain->get_wsi_image(swapchain, i);
|
||||
result = wsi_image_init_timestamp(swapchain, image);
|
||||
if (result != VK_SUCCESS) {
|
||||
swapchain->destroy(swapchain, alloc);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
if (swapchain->poll_early_refresh) {
|
||||
/* If we can query the display directly, we should report something reasonable on first query
|
||||
* before we even present the first time. */
|
||||
uint64_t interval;
|
||||
uint64_t refresh_ns = swapchain->poll_early_refresh(swapchain, &interval);
|
||||
if (refresh_ns) {
|
||||
swapchain->present_timing.refresh_duration = refresh_ns;
|
||||
swapchain->present_timing.refresh_interval = interval;
|
||||
swapchain->present_timing.refresh_counter++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -1168,6 +1358,353 @@ wsi_ReleaseSwapchainImagesKHR(VkDevice _device,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
wsi_swapchain_present_timing_sample_query_pool(struct wsi_swapchain *chain,
|
||||
struct wsi_presentation_timing *timing,
|
||||
struct wsi_image *image,
|
||||
uint64_t upper_bound)
|
||||
{
|
||||
if (!(timing->requested_feedback & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT))
|
||||
return;
|
||||
|
||||
/* The GPU really should be done by now, and we should be able to read the timestamp,
|
||||
* but it's possible that the present was discarded and we have a 0 timestamp here for the present.
|
||||
* In this case, we should not block to wait on the queue dispatch timestamp. */
|
||||
uint64_t queue_ts;
|
||||
|
||||
if (chain->wsi->GetQueryPoolResults(chain->device, image->query_pool, 0, 1, sizeof(uint64_t),
|
||||
&queue_ts, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT) != VK_SUCCESS)
|
||||
return;
|
||||
|
||||
/* There are two ways to deal with DEVICE timestamp domain.
|
||||
* Either we can report PRESENT_STAGE_LOCAL domain and let application
|
||||
* calibrate the timestamps on its own. However, this creates an annoying situation
|
||||
* where application is able to QueuePresentKHR requesting we use QUEUE_OPERATIONS_END time domain as
|
||||
* the reference (targetTimeDomainPresentStage).
|
||||
* In that case, we are forced to re-calibrate the timestamp anyway.
|
||||
* We will also need to implement per-driver plumbing to forward SWAPCHAIN_LOCAL and PRESENT_STAGE_LOCAL
|
||||
* time domains to the swapchain and query the underlying time domain.
|
||||
* Instead of dealing with this mess, just recalibrate the timestamp. The accuracy of queue_operations_end
|
||||
* is not particularly important. */
|
||||
|
||||
/* We have already made sure that the implementation supports these. */
|
||||
const VkCalibratedTimestampInfoKHR infos[2] = {
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
|
||||
.timeDomain = VK_TIME_DOMAIN_DEVICE_KHR,
|
||||
},
|
||||
{
|
||||
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
|
||||
.timeDomain = chain->present_timing.time_domain,
|
||||
},
|
||||
};
|
||||
|
||||
uint64_t timestamps[2];
|
||||
uint64_t max_deviation;
|
||||
if (chain->wsi->GetCalibratedTimestampsKHR(chain->device, 2, infos, timestamps, &max_deviation) == VK_SUCCESS) {
|
||||
int64_t device_delta_ticks = (int64_t)queue_ts - (int64_t)timestamps[0];
|
||||
int64_t device_delta_ns = (int64_t)((double)chain->wsi->timestamp_period * (double)device_delta_ticks);
|
||||
uint64_t queue_timestamp = timestamps[1] + device_delta_ns;
|
||||
|
||||
/* Make sure we don't report GPU completing after we flip the request.
|
||||
* Avoids any weird precision issues creeping through. */
|
||||
if (upper_bound)
|
||||
queue_timestamp = MIN2(queue_timestamp, upper_bound);
|
||||
|
||||
timing->queue_done_time = queue_timestamp;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
wsi_swapchain_present_timing_notify_recycle_locked(struct wsi_swapchain *chain,
|
||||
struct wsi_image *image)
|
||||
{
|
||||
assert(chain->present_timing.active);
|
||||
|
||||
for (size_t i = 0; i < chain->present_timing.timings_count; i++) {
|
||||
if (chain->present_timing.timings[i].image == image) {
|
||||
/* A different present takes ownership of the image's query pool index now. */
|
||||
chain->present_timing.timings[i].image = NULL;
|
||||
chain->present_timing.timings[i].queue_done_time = 0;
|
||||
|
||||
/* We waited on progress fence, so the timestamp query is guaranteed to be done. */
|
||||
wsi_swapchain_present_timing_sample_query_pool(chain, &chain->present_timing.timings[i], image, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult wsi_common_allocate_timing_request(
|
||||
struct wsi_swapchain *swapchain, const VkPresentTimingInfoEXT *timing,
|
||||
uint64_t present_id, struct wsi_image *image)
|
||||
{
|
||||
VkResult vr = VK_SUCCESS;
|
||||
mtx_lock(&swapchain->present_timing.lock);
|
||||
|
||||
if (swapchain->present_timing.timings_count >= swapchain->present_timing.timings_capacity) {
|
||||
vr = VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT;
|
||||
goto err;
|
||||
}
|
||||
|
||||
wsi_swapchain_present_timing_notify_recycle_locked(swapchain, image);
|
||||
|
||||
struct wsi_presentation_timing *wsi_timing =
|
||||
&swapchain->present_timing.timings[swapchain->present_timing.timings_count++];
|
||||
|
||||
memset(wsi_timing, 0, sizeof(*wsi_timing));
|
||||
wsi_timing->serial = ++swapchain->present_timing.serial;
|
||||
wsi_timing->target_time = timing->targetTime;
|
||||
wsi_timing->present_id = present_id;
|
||||
wsi_timing->requested_feedback = timing->presentStageQueries;
|
||||
wsi_timing->image = image;
|
||||
|
||||
/* Ignore the time domain since we have a static domain. */
|
||||
|
||||
err:
|
||||
mtx_unlock(&swapchain->present_timing.lock);
|
||||
return vr;
|
||||
}
|
||||
|
||||
void
|
||||
wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain,
|
||||
uint64_t timing_serial,
|
||||
uint64_t timestamp,
|
||||
struct wsi_image *image)
|
||||
{
|
||||
assert(chain->present_timing.active);
|
||||
mtx_lock(&chain->present_timing.lock);
|
||||
|
||||
for (size_t i = 0; i < chain->present_timing.timings_count; i++) {
|
||||
if (chain->present_timing.timings[i].serial == timing_serial) {
|
||||
chain->present_timing.timings[i].complete_time = timestamp;
|
||||
chain->present_timing.timings[i].complete = VK_TRUE;
|
||||
|
||||
/* It's possible that QueuePresentKHR already handled the queue done timestamp for us,
|
||||
* since the image was recycled before presentation could fully complete.
|
||||
* In this case, we no longer own the timestamp query pool index, so just skip. */
|
||||
if (chain->present_timing.timings[i].image != image)
|
||||
break;
|
||||
|
||||
/* 0 means unknown. Application can probably fall back to its own timestamps if it wants to. */
|
||||
chain->present_timing.timings[i].queue_done_time = 0;
|
||||
wsi_swapchain_present_timing_sample_query_pool(chain, &chain->present_timing.timings[i], image, timestamp);
|
||||
chain->present_timing.timings[i].image = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
mtx_unlock(&chain->present_timing.lock);
|
||||
}
|
||||
|
||||
void
|
||||
wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain,
|
||||
uint64_t refresh_duration,
|
||||
uint64_t refresh_interval,
|
||||
int minimum_delta_for_update)
|
||||
{
|
||||
mtx_lock(&chain->present_timing.lock);
|
||||
|
||||
int64_t duration_delta = llabs((int64_t)refresh_duration - (int64_t)chain->present_timing.refresh_duration);
|
||||
int64_t interval_delta = llabs((int64_t)refresh_interval - (int64_t)chain->present_timing.refresh_interval);
|
||||
|
||||
/* When the refresh rate is an estimate, the value may fluctuate slightly frame to frame,
|
||||
* don't spam refresh counter updates unless there is a meaningful delta.
|
||||
* Applications that use absolute timings are expected to recalibrate based on feedback. */
|
||||
if (duration_delta > minimum_delta_for_update || interval_delta > minimum_delta_for_update ||
|
||||
chain->present_timing.refresh_counter == 0) {
|
||||
/* We'll report this updated refresh counter in feedback,
|
||||
* so that application knows to requery the refresh rate. */
|
||||
chain->present_timing.refresh_counter++;
|
||||
chain->present_timing.refresh_duration = refresh_duration;
|
||||
chain->present_timing.refresh_interval = refresh_interval;
|
||||
}
|
||||
|
||||
mtx_unlock(&chain->present_timing.lock);
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
wsi_GetPastPresentationTimingEXT(
|
||||
VkDevice device,
|
||||
const VkPastPresentationTimingInfoEXT* pPastPresentationTimingInfo,
|
||||
VkPastPresentationTimingPropertiesEXT* pPastPresentationTimingProperties)
|
||||
{
|
||||
VK_FROM_HANDLE(wsi_swapchain, swapchain, pPastPresentationTimingInfo->swapchain);
|
||||
VkResult vr = VK_SUCCESS;
|
||||
bool out_of_order = (pPastPresentationTimingInfo->flags &
|
||||
VK_PAST_PRESENTATION_TIMING_ALLOW_OUT_OF_ORDER_RESULTS_BIT_EXT) != 0;
|
||||
|
||||
if (swapchain->poll_timing_request)
|
||||
swapchain->poll_timing_request(swapchain);
|
||||
|
||||
mtx_lock(&swapchain->present_timing.lock);
|
||||
|
||||
pPastPresentationTimingProperties->timingPropertiesCounter = swapchain->present_timing.refresh_counter;
|
||||
pPastPresentationTimingProperties->timeDomainsCounter = 1;
|
||||
|
||||
/* This implementation always returns results in-order, so can ignore the out-of-order flag.
|
||||
* TODO: Honor the partial results flag. */
|
||||
|
||||
uint32_t done_count = 0;
|
||||
for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) {
|
||||
/* If different presents request different kinds of state, we may get completion out of order.
|
||||
* If flag is not set, we cannot report frame N until we have completed all frames M < N. */
|
||||
if (swapchain->present_timing.timings[i].complete)
|
||||
done_count++;
|
||||
else if (!out_of_order)
|
||||
break;
|
||||
}
|
||||
|
||||
/* We don't remove timing info from queue until it is consumed. */
|
||||
if (!pPastPresentationTimingProperties->pPresentationTimings) {
|
||||
pPastPresentationTimingProperties->presentationTimingCount = done_count;
|
||||
mtx_unlock(&swapchain->present_timing.lock);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VK_OUTARRAY_MAKE_TYPED(VkPastPresentationTimingEXT, timings,
|
||||
pPastPresentationTimingProperties->pPresentationTimings,
|
||||
&pPastPresentationTimingProperties->presentationTimingCount);
|
||||
|
||||
uint32_t new_timings_count = 0;
|
||||
bool stop_timing_removal = false;
|
||||
|
||||
for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) {
|
||||
const struct wsi_presentation_timing *in_timing = &swapchain->present_timing.timings[i];
|
||||
|
||||
if (!swapchain->present_timing.timings[i].complete || stop_timing_removal) {
|
||||
/* Keep output ordered to be compliant without having to re-sort every time.
|
||||
* Queue depth for timestamps is expected to be small. */
|
||||
swapchain->present_timing.timings[new_timings_count++] = swapchain->present_timing.timings[i];
|
||||
if (!out_of_order)
|
||||
stop_timing_removal = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
vk_outarray_append_typed(VkPastPresentationTimingEXT, &timings, timing) {
|
||||
timing->targetTime = swapchain->present_timing.timings[i].target_time;
|
||||
timing->presentId = in_timing->present_id;
|
||||
timing->timeDomain = swapchain->present_timing.time_domain;
|
||||
timing->timeDomainId = 0;
|
||||
timing->reportComplete = in_timing->complete;
|
||||
|
||||
/* No INCOMPLETE is reported here. Failures are silent.
|
||||
* However, application already knows upper bound for stage count based on the query,
|
||||
* so this should never fail. */
|
||||
VK_OUTARRAY_MAKE_TYPED(VkPresentStageTimeEXT, stages, timing->pPresentStages, &timing->presentStageCount);
|
||||
|
||||
if (in_timing->requested_feedback & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
|
||||
vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) {
|
||||
stage->stage = VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
|
||||
stage->time = in_timing->queue_done_time;
|
||||
}
|
||||
}
|
||||
|
||||
if (in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
|
||||
vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) {
|
||||
stage->stage = in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
|
||||
/* It is expected that implementation will only expose one timing value. */
|
||||
assert(util_bitcount(stage->stage) == 1);
|
||||
stage->time = in_timing->complete_time;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
swapchain->present_timing.timings_count = new_timings_count;
|
||||
vr = vk_outarray_status(&timings);
|
||||
|
||||
/* This function is fully atomic within implementation, so have to be thread safe. */
|
||||
mtx_unlock(&swapchain->present_timing.lock);
|
||||
return vr;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
wsi_GetSwapchainTimeDomainPropertiesEXT(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain_,
|
||||
VkSwapchainTimeDomainPropertiesEXT* pSwapchainTimeDomainProperties,
|
||||
uint64_t* pTimeDomainsCounter)
|
||||
{
|
||||
VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
|
||||
|
||||
/* We don't change time domains. Everything is static. */
|
||||
if (pTimeDomainsCounter)
|
||||
*pTimeDomainsCounter = 1;
|
||||
|
||||
/* This style is a bit goofy and doesn't map cleanly to anything. */
|
||||
if (!pSwapchainTimeDomainProperties->pTimeDomainIds && !pSwapchainTimeDomainProperties->pTimeDomains) {
|
||||
pSwapchainTimeDomainProperties->timeDomainCount = 1;
|
||||
return VK_SUCCESS;
|
||||
} else if (pSwapchainTimeDomainProperties->timeDomainCount == 0) {
|
||||
return VK_INCOMPLETE;
|
||||
}
|
||||
|
||||
pSwapchainTimeDomainProperties->timeDomainCount = 1;
|
||||
if (pSwapchainTimeDomainProperties->pTimeDomains)
|
||||
*pSwapchainTimeDomainProperties->pTimeDomains = swapchain->present_timing.time_domain;
|
||||
if (pSwapchainTimeDomainProperties->pTimeDomainIds)
|
||||
*pSwapchainTimeDomainProperties->pTimeDomainIds = 0;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
wsi_GetSwapchainTimingPropertiesEXT(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain_,
|
||||
VkSwapchainTimingPropertiesEXT* pSwapchainTimingProperties,
|
||||
uint64_t* pSwapchainTimingPropertiesCounter)
|
||||
{
|
||||
VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
|
||||
VkResult vr;
|
||||
|
||||
mtx_lock(&swapchain->present_timing.lock);
|
||||
/* If we don't have data yet, must return VK_NOT_READY. */
|
||||
vr = swapchain->present_timing.refresh_counter ? VK_SUCCESS : VK_NOT_READY;
|
||||
pSwapchainTimingProperties->refreshInterval = swapchain->present_timing.refresh_interval;
|
||||
pSwapchainTimingProperties->refreshDuration = swapchain->present_timing.refresh_duration;
|
||||
if (pSwapchainTimingPropertiesCounter)
|
||||
*pSwapchainTimingPropertiesCounter = swapchain->present_timing.refresh_counter;
|
||||
mtx_unlock(&swapchain->present_timing.lock);
|
||||
return vr;
|
||||
}
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
wsi_SetSwapchainPresentTimingQueueSizeEXT(
|
||||
VkDevice device,
|
||||
VkSwapchainKHR swapchain_,
|
||||
uint32_t size)
|
||||
{
|
||||
VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
|
||||
assert(swapchain->present_timing.active);
|
||||
VkResult vr = VK_SUCCESS;
|
||||
|
||||
mtx_lock(&swapchain->present_timing.lock);
|
||||
|
||||
if (size < swapchain->present_timing.timings_count) {
|
||||
vr = VK_NOT_READY;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (size > swapchain->present_timing.timings_capacity) {
|
||||
void *new_ptr = vk_realloc(&swapchain->alloc, swapchain->present_timing.timings,
|
||||
sizeof(*swapchain->present_timing.timings) * size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (new_ptr) {
|
||||
swapchain->present_timing.timings = new_ptr;
|
||||
swapchain->present_timing.timings_capacity = size;
|
||||
} else {
|
||||
vr = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto error;
|
||||
}
|
||||
} else {
|
||||
swapchain->present_timing.timings_capacity = size;
|
||||
}
|
||||
|
||||
error:
|
||||
mtx_unlock(&swapchain->present_timing.lock);
|
||||
return vr;
|
||||
}
|
||||
|
||||
VkDeviceMemory
|
||||
wsi_common_get_memory(VkSwapchainKHR _swapchain, uint32_t index)
|
||||
{
|
||||
|
|
@ -1521,6 +2058,50 @@ wsi_common_queue_present(const struct wsi_device *wsi,
|
|||
vk_find_struct_const(pPresentInfo->pNext, PRESENT_ID_2_KHR);
|
||||
const VkSwapchainPresentFenceInfoKHR *present_fence_info =
|
||||
vk_find_struct_const(pPresentInfo->pNext, SWAPCHAIN_PRESENT_FENCE_INFO_KHR);
|
||||
const VkPresentTimingsInfoEXT *present_timings_info =
|
||||
vk_find_struct_const(pPresentInfo->pNext, PRESENT_TIMINGS_INFO_EXT);
|
||||
|
||||
bool needs_timing_command_buffer = false;
|
||||
|
||||
if (present_timings_info) {
|
||||
/* If we fail a present due to full queue, it's a little unclear from
|
||||
* spec if we should treat it as OUT_OF_DATE or OUT_OF_HOST_MEMORY for
|
||||
* purposes of signaling. Validation layers and at least one other implementation
|
||||
* in the wild seems to treat it as OUT_OF_DATE, so do that. */
|
||||
for (uint32_t i = 0; i < present_timings_info->swapchainCount; i++) {
|
||||
const VkPresentTimingInfoEXT *info = &present_timings_info->pTimingInfos[i];
|
||||
VK_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
|
||||
if (results[i] != VK_SUCCESS || !swapchain->set_timing_request)
|
||||
continue;
|
||||
|
||||
assert(swapchain->present_timing.active);
|
||||
|
||||
uint32_t image_index = pPresentInfo->pImageIndices[i];
|
||||
|
||||
/* EXT_present_timing is defined to only work with present_id2.
|
||||
* It's only used when reporting back timings. */
|
||||
results[i] = wsi_common_allocate_timing_request(
|
||||
swapchain, info, present_ids2 ? present_ids2->pPresentIds[i] : 0,
|
||||
swapchain->get_wsi_image(swapchain, image_index));
|
||||
|
||||
/* Application is responsible for allocating sufficient size here.
|
||||
* We fail with VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT if application is bugged. */
|
||||
if (results[i] == VK_SUCCESS) {
|
||||
swapchain->set_timing_request(swapchain, &(struct wsi_image_timing_request) {
|
||||
.serial = swapchain->present_timing.serial,
|
||||
.time = info->targetTime,
|
||||
.flags = info->flags,
|
||||
});
|
||||
|
||||
if (info->presentStageQueries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
|
||||
/* It's not a problem if we redundantly submit timing command buffers.
|
||||
* VUID-12234 also says all swapchains in this present must have been
|
||||
* created with present timing enabled. */
|
||||
needs_timing_command_buffer = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Gather up all the semaphores and fences we need to signal per-image */
|
||||
STACK_ARRAY(struct wsi_image_signal_info, image_signal_infos,
|
||||
|
|
@ -1596,15 +2177,15 @@ wsi_common_queue_present(const struct wsi_device *wsi,
|
|||
* the per-image semaphores and fences with the blit.
|
||||
*/
|
||||
{
|
||||
STACK_ARRAY(VkCommandBufferSubmitInfo, blit_command_buffer_infos,
|
||||
pPresentInfo->swapchainCount);
|
||||
STACK_ARRAY(VkCommandBufferSubmitInfo, command_buffer_infos,
|
||||
pPresentInfo->swapchainCount * 2);
|
||||
STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos,
|
||||
pPresentInfo->swapchainCount *
|
||||
ARRAY_SIZE(image_signal_infos[0].semaphore_infos));
|
||||
STACK_ARRAY(VkFence, fences,
|
||||
pPresentInfo->swapchainCount *
|
||||
ARRAY_SIZE(image_signal_infos[0].fences));
|
||||
uint32_t blit_count = 0, signal_semaphore_count = 0, fence_count = 0;
|
||||
uint32_t command_buffer_count = 0, signal_semaphore_count = 0, fence_count = 0;
|
||||
|
||||
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
|
||||
VK_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
|
||||
|
|
@ -1612,14 +2193,27 @@ wsi_common_queue_present(const struct wsi_device *wsi,
|
|||
struct wsi_image *image =
|
||||
swapchain->get_wsi_image(swapchain, image_index);
|
||||
|
||||
bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
|
||||
swapchain->blit.queue != NULL;
|
||||
|
||||
/* For TIMING_QUEUE_FULL_EXT, ensure sync objects are signaled,
|
||||
* but don't do any real work. */
|
||||
if (results[i] == VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT || !separate_queue_blit) {
|
||||
for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) {
|
||||
signal_semaphore_infos[signal_semaphore_count++] =
|
||||
image_signal_infos[i].semaphore_infos[j];
|
||||
}
|
||||
for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++)
|
||||
fences[fence_count++] = image_signal_infos[i].fences[j];
|
||||
}
|
||||
|
||||
if (results[i] != VK_SUCCESS)
|
||||
continue;
|
||||
|
||||
/* If we're blitting on another swapchain, just signal the blit
|
||||
* semaphore for now.
|
||||
*/
|
||||
if (swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
|
||||
swapchain->blit.queue != NULL) {
|
||||
if (separate_queue_blit) {
|
||||
/* Create the blit semaphore if needed */
|
||||
if (swapchain->blit.semaphores[image_index] == VK_NULL_HANDLE) {
|
||||
const VkSemaphoreCreateInfo sem_info = {
|
||||
|
|
@ -1644,27 +2238,27 @@ wsi_common_queue_present(const struct wsi_device *wsi,
|
|||
}
|
||||
|
||||
if (swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT) {
|
||||
blit_command_buffer_infos[blit_count++] = (VkCommandBufferSubmitInfo) {
|
||||
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer =
|
||||
image->blit.cmd_buffers[queue->queue_family_index],
|
||||
};
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) {
|
||||
signal_semaphore_infos[signal_semaphore_count++] =
|
||||
image_signal_infos[i].semaphore_infos[j];
|
||||
if (needs_timing_command_buffer) {
|
||||
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer = image->timestamp_cmd_buffers[queue->queue_family_index],
|
||||
};
|
||||
}
|
||||
for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++)
|
||||
fences[fence_count++] = image_signal_infos[i].fences[j];
|
||||
}
|
||||
|
||||
const VkSubmitInfo2 submit_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||
.waitSemaphoreInfoCount = pPresentInfo->waitSemaphoreCount,
|
||||
.pWaitSemaphoreInfos = semaphore_wait_infos,
|
||||
.commandBufferInfoCount = blit_count,
|
||||
.pCommandBufferInfos = blit_command_buffer_infos,
|
||||
.commandBufferInfoCount = command_buffer_count,
|
||||
.pCommandBufferInfos = command_buffer_infos,
|
||||
.signalSemaphoreInfoCount = signal_semaphore_count,
|
||||
.pSignalSemaphoreInfos = signal_semaphore_infos,
|
||||
};
|
||||
|
|
@ -1680,7 +2274,7 @@ wsi_common_queue_present(const struct wsi_device *wsi,
|
|||
|
||||
STACK_ARRAY_FINISH(fences);
|
||||
STACK_ARRAY_FINISH(signal_semaphore_infos);
|
||||
STACK_ARRAY_FINISH(blit_command_buffer_infos);
|
||||
STACK_ARRAY_FINISH(command_buffer_infos);
|
||||
}
|
||||
|
||||
/* Now do blits on any blit queues */
|
||||
|
|
@ -1693,8 +2287,10 @@ wsi_common_queue_present(const struct wsi_device *wsi,
|
|||
if (results[i] != VK_SUCCESS)
|
||||
continue;
|
||||
|
||||
if (swapchain->blit.type == WSI_SWAPCHAIN_NO_BLIT ||
|
||||
swapchain->blit.queue == NULL)
|
||||
bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
|
||||
swapchain->blit.queue != NULL;
|
||||
|
||||
if (!separate_queue_blit)
|
||||
continue;
|
||||
|
||||
const VkSemaphoreSubmitInfo blit_semaphore_info = {
|
||||
|
|
@ -1703,17 +2299,27 @@ wsi_common_queue_present(const struct wsi_device *wsi,
|
|||
.semaphore = swapchain->blit.semaphores[image_index],
|
||||
};
|
||||
|
||||
const VkCommandBufferSubmitInfo blit_command_buffer_info = {
|
||||
VkCommandBufferSubmitInfo command_buffer_infos[2];
|
||||
uint32_t command_buffer_count = 0;
|
||||
|
||||
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer = image->blit.cmd_buffers[0],
|
||||
};
|
||||
|
||||
if (needs_timing_command_buffer) {
|
||||
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
|
||||
.commandBuffer = image->timestamp_cmd_buffers[0],
|
||||
};
|
||||
}
|
||||
|
||||
const VkSubmitInfo2 submit_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
|
||||
.waitSemaphoreInfoCount = 1,
|
||||
.pWaitSemaphoreInfos = &blit_semaphore_info,
|
||||
.commandBufferInfoCount = 1,
|
||||
.pCommandBufferInfos = &blit_command_buffer_info,
|
||||
.commandBufferInfoCount = command_buffer_count,
|
||||
.pCommandBufferInfos = command_buffer_infos,
|
||||
.signalSemaphoreInfoCount = image_signal_infos[i].semaphore_count,
|
||||
.pSignalSemaphoreInfos = image_signal_infos[i].semaphore_infos,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -62,6 +62,8 @@ struct wsi_device {
|
|||
VkPhysicalDeviceMemoryProperties memory_props;
|
||||
uint32_t queue_family_count;
|
||||
uint64_t queue_supports_blit;
|
||||
uint64_t queue_supports_timestamps;
|
||||
float timestamp_period;
|
||||
|
||||
VkPhysicalDeviceDrmPropertiesEXT drm_info;
|
||||
VkPhysicalDevicePCIBusInfoPropertiesEXT pci_bus_info;
|
||||
|
|
@ -201,28 +203,37 @@ struct wsi_device {
|
|||
WSI_CB(CmdPipelineBarrier);
|
||||
WSI_CB(CmdCopyImage);
|
||||
WSI_CB(CmdCopyImageToBuffer);
|
||||
WSI_CB(CmdResetQueryPool);
|
||||
WSI_CB(CmdWriteTimestamp);
|
||||
WSI_CB(CreateBuffer);
|
||||
WSI_CB(CreateCommandPool);
|
||||
WSI_CB(CreateFence);
|
||||
WSI_CB(CreateImage);
|
||||
WSI_CB(CreateQueryPool);
|
||||
WSI_CB(CreateSemaphore);
|
||||
WSI_CB(DestroyBuffer);
|
||||
WSI_CB(DestroyCommandPool);
|
||||
WSI_CB(DestroyFence);
|
||||
WSI_CB(DestroyImage);
|
||||
WSI_CB(DestroyQueryPool);
|
||||
WSI_CB(DestroySemaphore);
|
||||
WSI_CB(EndCommandBuffer);
|
||||
WSI_CB(FreeMemory);
|
||||
WSI_CB(FreeCommandBuffers);
|
||||
WSI_CB(GetBufferMemoryRequirements);
|
||||
WSI_CB(GetCalibratedTimestampsKHR);
|
||||
WSI_CB(GetFenceStatus);
|
||||
WSI_CB(GetImageDrmFormatModifierPropertiesEXT);
|
||||
WSI_CB(GetImageMemoryRequirements);
|
||||
WSI_CB(GetImageSubresourceLayout);
|
||||
WSI_CB(GetMemoryFdKHR);
|
||||
WSI_CB(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
|
||||
WSI_CB(GetPhysicalDeviceProperties);
|
||||
WSI_CB(GetPhysicalDeviceFormatProperties);
|
||||
WSI_CB(GetPhysicalDeviceFormatProperties2);
|
||||
WSI_CB(GetPhysicalDeviceImageFormatProperties2);
|
||||
WSI_CB(GetPhysicalDeviceQueueFamilyProperties);
|
||||
WSI_CB(GetQueryPoolResults);
|
||||
WSI_CB(GetSemaphoreFdKHR);
|
||||
WSI_CB(ResetFences);
|
||||
WSI_CB(QueueSubmit2);
|
||||
|
|
|
|||
|
|
@ -156,6 +156,12 @@ enum colorspace_enum {
|
|||
COLORSPACE_ENUM_MAX,
|
||||
};
|
||||
|
||||
enum vrr_tristate {
|
||||
VRR_TRISTATE_UNKNOWN,
|
||||
VRR_TRISTATE_DISABLED,
|
||||
VRR_TRISTATE_ENABLED,
|
||||
};
|
||||
|
||||
typedef struct wsi_display_connector_metadata {
|
||||
VkHdrMetadataEXT hdr_metadata;
|
||||
bool supports_st2084;
|
||||
|
|
@ -185,6 +191,10 @@ typedef struct wsi_display_connector {
|
|||
struct wsi_display_connector_metadata metadata;
|
||||
uint32_t count_formats;
|
||||
uint32_t *formats;
|
||||
enum vrr_tristate vrr_capable;
|
||||
enum vrr_tristate vrr_enabled;
|
||||
uint64_t last_frame;
|
||||
uint64_t last_nsec;
|
||||
} wsi_display_connector;
|
||||
|
||||
struct wsi_display {
|
||||
|
|
@ -370,6 +380,11 @@ find_properties(struct wsi_display_connector *connector, uint32_t count_props, u
|
|||
}
|
||||
}
|
||||
|
||||
if (!strcmp(prop->name, "vrr_capable"))
|
||||
connector->vrr_capable = prop_values[p] != 0 ? VRR_TRISTATE_ENABLED : VRR_TRISTATE_DISABLED;
|
||||
if (!strcmp(prop->name, "VRR_ENABLED"))
|
||||
connector->vrr_enabled = prop_values[p] != 0 ? VRR_TRISTATE_ENABLED : VRR_TRISTATE_DISABLED;
|
||||
|
||||
drmModeFreeProperty(prop);
|
||||
}
|
||||
|
||||
|
|
@ -431,38 +446,45 @@ find_connector_properties(struct wsi_display_connector *connector, drmModeConnec
|
|||
enum wsi_image_state {
|
||||
WSI_IMAGE_IDLE,
|
||||
WSI_IMAGE_DRAWING,
|
||||
WSI_IMAGE_WAITING,
|
||||
WSI_IMAGE_QUEUED_AFTER_WAIT,
|
||||
WSI_IMAGE_QUEUED,
|
||||
WSI_IMAGE_FLIPPING,
|
||||
WSI_IMAGE_DISPLAYING
|
||||
};
|
||||
|
||||
struct wsi_display_image {
|
||||
struct wsi_image base;
|
||||
struct wsi_display_swapchain *chain;
|
||||
enum wsi_image_state state;
|
||||
uint32_t fb_id;
|
||||
uint32_t buffer[4];
|
||||
uint64_t flip_sequence;
|
||||
uint64_t present_id;
|
||||
struct wsi_image base;
|
||||
struct wsi_display_swapchain *chain;
|
||||
enum wsi_image_state state;
|
||||
uint32_t fb_id;
|
||||
uint32_t buffer[4];
|
||||
uint64_t flip_sequence;
|
||||
uint64_t present_id;
|
||||
struct wsi_image_timing_request timing_request;
|
||||
struct wsi_display_fence *fence;
|
||||
uint64_t minimum_ns;
|
||||
};
|
||||
|
||||
struct wsi_display_swapchain {
|
||||
struct wsi_swapchain base;
|
||||
struct wsi_display *wsi;
|
||||
VkIcdSurfaceDisplay *surface;
|
||||
uint64_t flip_sequence;
|
||||
VkResult status;
|
||||
struct wsi_swapchain base;
|
||||
struct wsi_display *wsi;
|
||||
VkIcdSurfaceDisplay *surface;
|
||||
uint64_t flip_sequence;
|
||||
VkResult status;
|
||||
|
||||
mtx_t present_id_mutex;
|
||||
struct u_cnd_monotonic present_id_cond;
|
||||
uint64_t present_id;
|
||||
VkResult present_id_error;
|
||||
mtx_t present_id_mutex;
|
||||
struct u_cnd_monotonic present_id_cond;
|
||||
uint64_t present_id;
|
||||
VkResult present_id_error;
|
||||
|
||||
/* A unique ID for the color outcome of the swapchain. A serial of 0 means unset/default. */
|
||||
uint64_t color_outcome_serial;
|
||||
VkHdrMetadataEXT hdr_metadata;
|
||||
uint64_t color_outcome_serial;
|
||||
VkHdrMetadataEXT hdr_metadata;
|
||||
|
||||
struct wsi_display_image images[0];
|
||||
struct wsi_image_timing_request timing_request;
|
||||
|
||||
struct wsi_display_image images[0];
|
||||
};
|
||||
|
||||
struct wsi_display_fence {
|
||||
|
|
@ -473,6 +495,9 @@ struct wsi_display_fence {
|
|||
uint32_t syncobj; /* syncobj to signal on event */
|
||||
uint64_t sequence;
|
||||
bool device_event; /* fence is used for device events */
|
||||
struct wsi_display_connector *connector;
|
||||
/* Image to be flipped, if this fence is for an image in the WSI_IMAGE_WAITING state that will need to move to QUEUED. */
|
||||
struct wsi_display_image *image;
|
||||
};
|
||||
|
||||
struct wsi_display_sync {
|
||||
|
|
@ -1319,6 +1344,16 @@ wsi_display_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface,
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
|
||||
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
|
||||
|
||||
wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
|
||||
wait->presentTimingSupported = VK_TRUE;
|
||||
wait->presentAtAbsoluteTimeSupported = VK_TRUE;
|
||||
wait->presentAtRelativeTimeSupported = VK_TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Ignored */
|
||||
break;
|
||||
|
|
@ -1678,6 +1713,8 @@ wsi_display_image_init(struct wsi_swapchain *drv_chain,
|
|||
|
||||
image->chain = chain;
|
||||
image->state = WSI_IMAGE_IDLE;
|
||||
image->fence = NULL;
|
||||
image->minimum_ns = 0;
|
||||
image->fb_id = 0;
|
||||
|
||||
uint64_t *fb_modifiers = NULL;
|
||||
|
|
@ -1789,6 +1826,12 @@ wsi_display_idle_old_displaying(struct wsi_display_image *active_image)
|
|||
static VkResult
|
||||
_wsi_display_queue_next(struct wsi_swapchain *drv_chain);
|
||||
|
||||
static uint64_t
|
||||
widen_32_to_64(uint32_t narrow, uint64_t near)
|
||||
{
|
||||
return near + (int32_t)(narrow - near);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wakes up any vkWaitForPresentKHR() waiters on the last present to this
|
||||
* image.
|
||||
|
|
@ -1817,6 +1860,17 @@ wsi_display_surface_error(struct wsi_display_swapchain *swapchain, VkResult resu
|
|||
mtx_unlock(&swapchain->present_id_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* libdrm callback for when we get a DRM_EVENT_PAGE_FLIP in response to our
|
||||
* atomic commit with DRM_MODE_PAGE_FLIP_EVENT. That event can happen at any
|
||||
* point after vblank, when the old image is no longer being scanned out and
|
||||
* that commit is set up to be scanned out next.
|
||||
*
|
||||
* This means that we can queue up a new atomic commit, if there were presents
|
||||
* that we hadn't submitted yet (the event queue is driven by
|
||||
* wsi_display_wait_thread(), so that's what ends up submitting atomic commits
|
||||
* most of the time).
|
||||
**/
|
||||
static void
|
||||
wsi_display_page_flip_handler2(int fd,
|
||||
unsigned int frame,
|
||||
|
|
@ -1828,6 +1882,28 @@ wsi_display_page_flip_handler2(int fd,
|
|||
struct wsi_display_image *image = data;
|
||||
struct wsi_display_swapchain *chain = image->chain;
|
||||
|
||||
VkIcdSurfaceDisplay *surface = chain->surface;
|
||||
wsi_display_mode *display_mode =
|
||||
wsi_display_mode_from_handle(surface->displayMode);
|
||||
wsi_display_connector *connector = display_mode->connector;
|
||||
|
||||
uint64_t nsec = 1000000000ull * sec + 1000ull * usec;
|
||||
/* If we're on VRR timing path, ensure we get a stable pace. */
|
||||
nsec = MAX2(nsec, image->minimum_ns);
|
||||
|
||||
uint64_t frame64 = widen_32_to_64(frame, connector->last_frame);
|
||||
connector->last_frame = frame64;
|
||||
connector->last_nsec = nsec;
|
||||
|
||||
/* Never update the refresh rate estimate. It's static based on the mode.
|
||||
* Update this before we signal present wait so that applications
|
||||
* get lowest possible latency for present time. */
|
||||
if (image->timing_request.serial) {
|
||||
wsi_swapchain_present_timing_notify_completion(
|
||||
&chain->base, image->timing_request.serial,
|
||||
nsec, &image->base);
|
||||
}
|
||||
|
||||
wsi_display_debug("image %ld displayed at %d\n",
|
||||
image - &(image->chain->images[0]), frame);
|
||||
image->state = WSI_IMAGE_DISPLAYING;
|
||||
|
|
@ -1841,42 +1917,29 @@ wsi_display_page_flip_handler2(int fd,
|
|||
chain->status = result;
|
||||
}
|
||||
|
||||
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence);
|
||||
|
||||
static void wsi_display_page_flip_handler(int fd,
|
||||
unsigned int frame,
|
||||
unsigned int sec,
|
||||
unsigned int usec,
|
||||
void *data)
|
||||
{
|
||||
wsi_display_page_flip_handler2(fd, frame, sec, usec, 0, data);
|
||||
}
|
||||
|
||||
static void wsi_display_vblank_handler(int fd, unsigned int frame,
|
||||
unsigned int sec, unsigned int usec,
|
||||
void *data)
|
||||
{
|
||||
struct wsi_display_fence *fence = data;
|
||||
|
||||
wsi_display_fence_event_handler(fence);
|
||||
}
|
||||
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence,
|
||||
uint64_t nsec,
|
||||
uint64_t frame);
|
||||
|
||||
/**
|
||||
* libdrm callback for when we get a DRM_EVENT_CRTC_SEQUENCE in response to a
|
||||
* drmCrtcQueueSequence(), indicating that the first pixel of a new frame is
|
||||
* being scanned out.
|
||||
**/
|
||||
static void wsi_display_sequence_handler(int fd, uint64_t frame,
|
||||
uint64_t nsec, uint64_t user_data)
|
||||
{
|
||||
struct wsi_display_fence *fence =
|
||||
(struct wsi_display_fence *) (uintptr_t) user_data;
|
||||
|
||||
wsi_display_fence_event_handler(fence);
|
||||
wsi_display_fence_event_handler(fence, nsec, frame);
|
||||
}
|
||||
|
||||
static drmEventContext event_context = {
|
||||
.version = DRM_EVENT_CONTEXT_VERSION,
|
||||
.page_flip_handler = wsi_display_page_flip_handler,
|
||||
#if DRM_EVENT_CONTEXT_VERSION >= 3
|
||||
.page_flip_handler = NULL,
|
||||
.page_flip_handler2 = wsi_display_page_flip_handler2,
|
||||
#endif
|
||||
.vblank_handler = wsi_display_vblank_handler,
|
||||
.vblank_handler = NULL,
|
||||
.sequence_handler = wsi_display_sequence_handler,
|
||||
};
|
||||
|
||||
|
|
@ -2383,13 +2446,30 @@ wsi_display_fence_check_free(struct wsi_display_fence *fence)
|
|||
vk_free(fence->wsi->alloc, fence);
|
||||
}
|
||||
|
||||
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence)
|
||||
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence,
|
||||
uint64_t nsec, uint64_t frame)
|
||||
{
|
||||
struct wsi_display_connector *connector = fence->connector;
|
||||
struct wsi_display_image *image = fence->image;
|
||||
|
||||
if (fence->syncobj) {
|
||||
(void) drmSyncobjSignal(fence->wsi->syncobj_fd, &fence->syncobj, 1);
|
||||
(void) drmSyncobjDestroy(fence->wsi->syncobj_fd, fence->syncobj);
|
||||
}
|
||||
|
||||
if (connector) {
|
||||
connector->last_nsec = nsec;
|
||||
connector->last_frame = frame;
|
||||
}
|
||||
|
||||
if (image && image->state == WSI_IMAGE_WAITING) {
|
||||
/* We may need to do the final sleep on CPU to resolve VRR timings. */
|
||||
image->state = WSI_IMAGE_QUEUED_AFTER_WAIT;
|
||||
VkResult result = _wsi_display_queue_next(&image->chain->base);
|
||||
if (result != VK_SUCCESS)
|
||||
image->chain->status = result;
|
||||
}
|
||||
|
||||
fence->event_received = true;
|
||||
wsi_display_fence_check_free(fence);
|
||||
}
|
||||
|
|
@ -2822,9 +2902,11 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain)
|
|||
|
||||
switch (tmp_image->state) {
|
||||
case WSI_IMAGE_FLIPPING:
|
||||
/* already flipping, don't send another to the kernel yet */
|
||||
case WSI_IMAGE_WAITING:
|
||||
/* already flipping or waiting for a flip, don't send another to the kernel yet */
|
||||
return VK_SUCCESS;
|
||||
case WSI_IMAGE_QUEUED:
|
||||
case WSI_IMAGE_QUEUED_AFTER_WAIT:
|
||||
/* find the oldest queued */
|
||||
if (!image || tmp_image->flip_sequence < image->flip_sequence)
|
||||
image = tmp_image;
|
||||
|
|
@ -2837,6 +2919,95 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain)
|
|||
if (!image)
|
||||
return VK_SUCCESS;
|
||||
|
||||
if (image->fence) {
|
||||
image->fence->image = NULL;
|
||||
wsi_display_fence_destroy(image->fence);
|
||||
image->fence = NULL;
|
||||
}
|
||||
|
||||
unsigned num_cycles_to_skip = 0;
|
||||
int64_t target_relative_ns = 0;
|
||||
bool skip_timing = false;
|
||||
bool nearest_cycle =
|
||||
(image->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) != 0;
|
||||
|
||||
if (image->timing_request.time != 0) {
|
||||
/* Ensure we have some kind of timebase to work from. */
|
||||
if (!connector->last_frame)
|
||||
drmCrtcGetSequence(wsi->fd, connector->crtc_id, &connector->last_frame, &connector->last_nsec);
|
||||
|
||||
if (!connector->last_frame || chain->base.present_timing.refresh_duration == 0) {
|
||||
/* Something has gone very wrong. Just ignore present timing for safety. */
|
||||
skip_timing = true;
|
||||
wsi_display_debug("Cannot get a stable timebase, last frame = %"PRIu64", refresh_duration = %"PRIu64".\n",
|
||||
connector->last_frame, chain->base.present_timing.refresh_duration);
|
||||
}
|
||||
}
|
||||
|
||||
if (!skip_timing && image->state == WSI_IMAGE_QUEUED && image->timing_request.time != 0) {
|
||||
target_relative_ns = (int64_t)image->timing_request.time;
|
||||
|
||||
/* We need to estimate number of refresh cycles to wait for. */
|
||||
if (!(image->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_RELATIVE_TIME_BIT_EXT)) {
|
||||
target_relative_ns -= (int64_t)connector->last_nsec;
|
||||
}
|
||||
|
||||
if (nearest_cycle) {
|
||||
/* No need to lock, we never update refresh_duration dynamically. */
|
||||
target_relative_ns -= (int64_t)chain->base.present_timing.refresh_duration / 2;
|
||||
} else {
|
||||
/* If application is computing an exact value that lands exactly on the refresh cycle,
|
||||
* pull back the estimate a little bit since DRM precision is 1us. */
|
||||
target_relative_ns -= 1000;
|
||||
}
|
||||
}
|
||||
|
||||
target_relative_ns = MAX2(target_relative_ns, 0);
|
||||
if (target_relative_ns && chain->base.present_timing.refresh_duration)
|
||||
num_cycles_to_skip = target_relative_ns / chain->base.present_timing.refresh_duration;
|
||||
|
||||
/* CRTC cycles is not reliable on VRR. We cannot use that as a time base. */
|
||||
bool is_vrr = connector->vrr_enabled == VRR_TRISTATE_ENABLED &&
|
||||
connector->vrr_capable == VRR_TRISTATE_ENABLED;
|
||||
|
||||
if (num_cycles_to_skip) {
|
||||
if (!is_vrr) {
|
||||
/* On FRR, we can rely on vblank events to guide time progression. */
|
||||
VkDisplayKHR display = wsi_display_connector_to_handle(connector);
|
||||
image->fence = wsi_display_fence_alloc(wsi, -1);
|
||||
|
||||
if (image->fence) {
|
||||
image->fence->connector = connector;
|
||||
image->fence->image = image;
|
||||
|
||||
uint64_t frame_queued;
|
||||
uint64_t target_frame = connector->last_frame + num_cycles_to_skip;
|
||||
VkResult result = wsi_register_vblank_event(image->fence, chain->base.wsi, display,
|
||||
0, target_frame, &frame_queued);
|
||||
|
||||
if (result == VK_SUCCESS && frame_queued <= target_frame) {
|
||||
/* Wait until the vblank fence signals and the event handler will attempt to requeue us. */
|
||||
image->state = WSI_IMAGE_WAITING;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* On a VRR display, applications can request frame times which are fractional,
|
||||
* and there is no good way to target absolute time with atomic commits it seems ... */
|
||||
int64_t target_ns = target_relative_ns + (int64_t)connector->last_nsec;
|
||||
image->minimum_ns = target_ns;
|
||||
|
||||
/* Account for some minimum delay in submitting a page flip until it's processed and sleep jitter.
|
||||
* We will compensate for the difference if there is any, so that we don't report completion
|
||||
* times in the past. */
|
||||
target_ns -= 1 * 1000 * 1000;
|
||||
|
||||
os_time_nanosleep_until(target_ns);
|
||||
}
|
||||
}
|
||||
|
||||
image->state = WSI_IMAGE_QUEUED;
|
||||
|
||||
int ret = drm_atomic_commit(connector, image);
|
||||
if (ret == 0) {
|
||||
image->state = WSI_IMAGE_FLIPPING;
|
||||
|
|
@ -2859,6 +3030,44 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain)
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
wsi_display_set_timing_request(struct wsi_swapchain *drv_chain,
|
||||
const struct wsi_image_timing_request *request)
|
||||
{
|
||||
struct wsi_display_swapchain *chain =
|
||||
(struct wsi_display_swapchain *) drv_chain;
|
||||
chain->timing_request = *request;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
wsi_display_poll_refresh_duration(struct wsi_swapchain *drv_chain, uint64_t *interval)
|
||||
{
|
||||
struct wsi_display_swapchain *chain =
|
||||
(struct wsi_display_swapchain *)drv_chain;
|
||||
VkIcdSurfaceDisplay *surface = chain->surface;
|
||||
wsi_display_mode *display_mode =
|
||||
wsi_display_mode_from_handle(surface->displayMode);
|
||||
double refresh = wsi_display_mode_refresh(display_mode);
|
||||
wsi_display_connector *connector = display_mode->connector;
|
||||
|
||||
uint64_t refresh_ns = (uint64_t)(floor(1.0 / refresh * 1e9 + 0.5));
|
||||
|
||||
/* Assume FRR by default. */
|
||||
*interval = refresh_ns;
|
||||
|
||||
/* If VRR is not enabled on the target CRTC, we should honor that.
|
||||
* There is no mechanism to clearly request that VRR is desired,
|
||||
* so we must assume that user might force us into FRR mode. */
|
||||
if (connector->vrr_capable == VRR_TRISTATE_ENABLED) {
|
||||
if (connector->vrr_enabled == VRR_TRISTATE_UNKNOWN)
|
||||
*interval = 0; /* Somehow we don't know if the connector is VRR or FRR. Report unknown. */
|
||||
else if (connector->vrr_enabled == VRR_TRISTATE_ENABLED)
|
||||
*interval = UINT64_MAX;
|
||||
}
|
||||
|
||||
return refresh_ns;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_display_queue_present(struct wsi_swapchain *drv_chain,
|
||||
uint32_t image_index,
|
||||
|
|
@ -2876,16 +3085,19 @@ wsi_display_queue_present(struct wsi_swapchain *drv_chain,
|
|||
return chain->status;
|
||||
|
||||
image->present_id = present_id;
|
||||
image->timing_request = chain->timing_request;
|
||||
|
||||
assert(image->state == WSI_IMAGE_DRAWING);
|
||||
wsi_display_debug("present %d\n", image_index);
|
||||
|
||||
mtx_lock(&wsi->wait_mutex);
|
||||
|
||||
/* Make sure that the page flip handler is processed in finite time if using present wait. */
|
||||
if (present_id)
|
||||
/* Make sure that the page flip handler is processed in finite time if using present wait
|
||||
* or presentation time. */
|
||||
if (present_id || chain->timing_request.serial)
|
||||
wsi_display_start_wait_thread(wsi);
|
||||
|
||||
memset(&chain->timing_request, 0, sizeof(chain->timing_request));
|
||||
image->flip_sequence = ++chain->flip_sequence;
|
||||
image->state = WSI_IMAGE_QUEUED;
|
||||
|
||||
|
|
@ -3045,6 +3257,9 @@ wsi_display_surface_create_swapchain(
|
|||
chain->base.acquire_next_image = wsi_display_acquire_next_image;
|
||||
chain->base.release_images = wsi_display_release_images;
|
||||
chain->base.queue_present = wsi_display_queue_present;
|
||||
chain->base.set_timing_request = wsi_display_set_timing_request;
|
||||
chain->base.poll_early_refresh = wsi_display_poll_refresh_duration;
|
||||
chain->base.present_timing.time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
|
||||
chain->base.wait_for_present = wsi_display_wait_for_present;
|
||||
chain->base.wait_for_present2 = wsi_display_wait_for_present;
|
||||
chain->base.set_hdr_metadata = wsi_display_set_hdr_metadata;
|
||||
|
|
|
|||
|
|
@ -112,6 +112,16 @@ wsi_headless_surface_get_capabilities2(VkIcdSurfaceBase *surface,
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
|
||||
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
|
||||
|
||||
wait->presentStageQueries = 0;
|
||||
wait->presentTimingSupported = VK_FALSE;
|
||||
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
|
||||
wait->presentAtRelativeTimeSupported = VK_FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Ignored */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -139,6 +139,16 @@ wsi_metal_surface_get_capabilities2(VkIcdSurfaceBase *surface,
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
|
||||
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
|
||||
|
||||
wait->presentStageQueries = 0;
|
||||
wait->presentTimingSupported = VK_FALSE;
|
||||
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
|
||||
wait->presentAtRelativeTimeSupported = VK_FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Ignored */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -188,6 +188,29 @@ struct wsi_image {
|
|||
int dma_buf_fd;
|
||||
#endif
|
||||
void *cpu_map;
|
||||
|
||||
VkQueryPool query_pool;
|
||||
VkCommandBuffer *timestamp_cmd_buffers;
|
||||
};
|
||||
|
||||
struct wsi_presentation_timing {
|
||||
uint64_t present_id;
|
||||
uint64_t target_time;
|
||||
uint64_t serial;
|
||||
uint64_t queue_done_time; /* GPU timestamp based. */
|
||||
uint64_t complete_time; /* Best effort timestamp we get from backend. */
|
||||
/* If we're rendering with IMMEDIATE, it's possible for images to IDLE long before they complete.
|
||||
* In this case, we have to ensure that queue_done_time is sampled at QueuePresentKHR time
|
||||
* before we recycle an image. */
|
||||
struct wsi_image *image;
|
||||
VkPresentStageFlagsEXT requested_feedback;
|
||||
VkBool32 complete;
|
||||
};
|
||||
|
||||
struct wsi_image_timing_request {
|
||||
uint64_t serial;
|
||||
uint64_t time;
|
||||
VkPresentTimingInfoFlagsEXT flags;
|
||||
};
|
||||
|
||||
struct wsi_swapchain {
|
||||
|
|
@ -237,7 +260,28 @@ struct wsi_swapchain {
|
|||
struct vk_queue *queue;
|
||||
} blit;
|
||||
|
||||
struct {
|
||||
mtx_t lock;
|
||||
bool active;
|
||||
|
||||
struct wsi_presentation_timing *timings;
|
||||
size_t timings_capacity;
|
||||
size_t timings_count;
|
||||
|
||||
size_t serial;
|
||||
|
||||
/* Maps to Vulkan spec definitions. */
|
||||
uint64_t refresh_duration;
|
||||
uint64_t refresh_interval;
|
||||
/* When 0, we don't know yet. Every time the refresh rate changes,
|
||||
* increase this counter. This counter must also be passed in GetPastTimings. */
|
||||
uint64_t refresh_counter;
|
||||
|
||||
VkTimeDomainKHR time_domain;
|
||||
} present_timing;
|
||||
|
||||
bool capture_key_pressed;
|
||||
float timestamp_period;
|
||||
|
||||
/* Command pools, one per queue family */
|
||||
VkCommandPool *cmd_pools;
|
||||
|
|
@ -266,6 +310,10 @@ struct wsi_swapchain {
|
|||
VkPresentModeKHR mode);
|
||||
void (*set_hdr_metadata)(struct wsi_swapchain *swap_chain,
|
||||
const VkHdrMetadataEXT* pMetadata);
|
||||
void (*set_timing_request)(struct wsi_swapchain *swap_chain,
|
||||
const struct wsi_image_timing_request *request);
|
||||
void (*poll_timing_request)(struct wsi_swapchain *swap_chain);
|
||||
uint64_t (*poll_early_refresh)(struct wsi_swapchain *swap_chain, uint64_t *interval);
|
||||
};
|
||||
|
||||
bool
|
||||
|
|
@ -369,6 +417,10 @@ wsi_create_image(const struct wsi_swapchain *chain,
|
|||
void
|
||||
wsi_image_init(struct wsi_image *image);
|
||||
|
||||
VkResult
|
||||
wsi_image_init_timestamp(const struct wsi_swapchain *chain,
|
||||
struct wsi_image *image);
|
||||
|
||||
void
|
||||
wsi_destroy_image(const struct wsi_swapchain *chain,
|
||||
struct wsi_image *image);
|
||||
|
|
@ -377,6 +429,16 @@ VkResult
|
|||
wsi_swapchain_wait_for_present_semaphore(const struct wsi_swapchain *chain,
|
||||
uint64_t present_id, uint64_t timeout);
|
||||
|
||||
void
|
||||
wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain,
|
||||
uint64_t timing_serial, uint64_t timestamp,
|
||||
struct wsi_image *image);
|
||||
|
||||
void
|
||||
wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain,
|
||||
uint64_t refresh_duration, uint64_t refresh_interval,
|
||||
int minimum_delta_for_update);
|
||||
|
||||
#ifdef HAVE_LIBDRM
|
||||
VkResult
|
||||
wsi_prepare_signal_dma_buf_from_semaphore(struct wsi_swapchain *chain,
|
||||
|
|
|
|||
|
|
@ -254,6 +254,8 @@ struct wsi_wl_swapchain {
|
|||
bool has_hdr_metadata;
|
||||
} color;
|
||||
|
||||
struct wsi_image_timing_request timing_request;
|
||||
|
||||
struct wsi_wl_image images[0];
|
||||
};
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(wsi_wl_swapchain, base.base, VkSwapchainKHR,
|
||||
|
|
@ -1668,7 +1670,15 @@ wsi_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevi
|
|||
struct wsi_wayland *wsi =
|
||||
(struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
|
||||
|
||||
if (!(wsi_device->queue_supports_blit & BITFIELD64_BIT(queueFamilyIndex)))
|
||||
/* These should overlap. */
|
||||
uint64_t effective_queues = wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps;
|
||||
|
||||
/* If there are no queues that support both blits and timestamps,
|
||||
* don't report support for queue timestamps. */
|
||||
if (!effective_queues)
|
||||
effective_queues = wsi_device->queue_supports_blit;
|
||||
|
||||
if (!(effective_queues & BITFIELD64_BIT(queueFamilyIndex)))
|
||||
return false;
|
||||
|
||||
struct wsi_wl_display display;
|
||||
|
|
@ -1789,7 +1799,8 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
|
|||
static VkResult
|
||||
wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface,
|
||||
struct wsi_device *wsi_device,
|
||||
bool *has_wp_presentation)
|
||||
bool *has_wp_presentation, clockid_t *clock_id,
|
||||
bool *has_commit_timing, bool *has_fifo)
|
||||
{
|
||||
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
|
||||
struct wsi_wayland *wsi =
|
||||
|
|
@ -1800,7 +1811,17 @@ wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface,
|
|||
wsi_device->sw, "mesa check wp_presentation"))
|
||||
return VK_ERROR_SURFACE_LOST_KHR;
|
||||
|
||||
*has_wp_presentation = !!display.wp_presentation_notwrapped;
|
||||
if (has_wp_presentation)
|
||||
*has_wp_presentation = !!display.wp_presentation_notwrapped;
|
||||
|
||||
if (clock_id)
|
||||
*clock_id = display.presentation_clock_id;
|
||||
|
||||
if (has_commit_timing)
|
||||
*has_commit_timing = !!display.commit_timing_manager;
|
||||
|
||||
if (has_fifo)
|
||||
*has_fifo = !!display.fifo_manager;
|
||||
|
||||
wsi_wl_display_finish(&display);
|
||||
|
||||
|
|
@ -1893,7 +1914,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
|
|||
bool has_feedback;
|
||||
|
||||
result = wsi_wl_surface_check_presentation(surface, wsi_device,
|
||||
&has_feedback);
|
||||
&has_feedback, NULL, NULL, NULL);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
|
@ -1906,7 +1927,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
|
|||
bool has_feedback;
|
||||
|
||||
result = wsi_wl_surface_check_presentation(surface, wsi_device,
|
||||
&has_feedback);
|
||||
&has_feedback, NULL, NULL, NULL);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
|
|
@ -1914,6 +1935,50 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
|
||||
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
|
||||
bool has_feedback, has_commit_timing, has_fifo;
|
||||
|
||||
wait->presentStageQueries = 0;
|
||||
wait->presentTimingSupported = VK_FALSE;
|
||||
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
|
||||
wait->presentAtRelativeTimeSupported = VK_FALSE;
|
||||
|
||||
clockid_t clock_id;
|
||||
|
||||
result = wsi_wl_surface_check_presentation(surface, wsi_device,
|
||||
&has_feedback, &clock_id,
|
||||
&has_commit_timing, &has_fifo);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (!has_feedback)
|
||||
break;
|
||||
|
||||
/* We could deal with esoteric clock domains by exposing VK_TIME_DOMAIN_SWAPCHAIN or PRESENT_STAGE_LOCAL,
|
||||
* but that requires a lot more scaffolding, and there's no need to add extra complexity if we can
|
||||
* get away with this. */
|
||||
if (clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_MONOTONIC_RAW)
|
||||
break;
|
||||
|
||||
/* Presentation timing spec talks about the reported time targeting "pixel being visible".
|
||||
* From presentation-time spec: "Note, that if the display path has a non-zero latency,
|
||||
* the time instant specified by this counter may differ from the timestamp's."
|
||||
* No compositor I know of reports where it takes display latency into account,
|
||||
* so it's a little unclear if we should actually be reporting PIXEL_OUT or PIXEL_VISIBLE.
|
||||
* Choose PIXEL_OUT for now since no known compositor out there actually implements
|
||||
* PIXEL_VISIBLE as intended, and we don't want to promise something we cannot hold. */
|
||||
wait->presentTimingSupported = VK_TRUE;
|
||||
wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
|
||||
|
||||
/* We cannot reliably implement FIFO guarantee + absolute time without the FIFO barrier.
|
||||
* Presentation timing is only defined to work with FIFO (and its variants like RELAXED and LATEST_READY). */
|
||||
wait->presentAtAbsoluteTimeSupported = has_commit_timing && has_fifo;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Ignored */
|
||||
break;
|
||||
|
|
@ -2404,6 +2469,7 @@ struct wsi_wl_present_id {
|
|||
* which uses frame callback to signal DRI3 COMPLETE. */
|
||||
struct wl_callback *frame;
|
||||
uint64_t present_id;
|
||||
uint64_t timing_serial;
|
||||
struct mesa_trace_flow flow;
|
||||
uint64_t submission_time;
|
||||
const VkAllocationCallbacks *alloc;
|
||||
|
|
@ -2411,6 +2477,8 @@ struct wsi_wl_present_id {
|
|||
uint64_t target_time;
|
||||
uint64_t correction;
|
||||
struct wl_list link;
|
||||
struct wsi_image *img;
|
||||
bool user_target_time;
|
||||
};
|
||||
|
||||
static struct wsi_image *
|
||||
|
|
@ -2441,6 +2509,14 @@ wsi_wl_swapchain_set_present_mode(struct wsi_swapchain *wsi_chain,
|
|||
chain->base.present_mode = mode;
|
||||
}
|
||||
|
||||
static void
|
||||
wsi_wl_swapchain_set_timing_request(struct wsi_swapchain *wsi_chain,
|
||||
const struct wsi_image_timing_request *request)
|
||||
{
|
||||
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;
|
||||
chain->timing_request = *request;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_time)
|
||||
{
|
||||
|
|
@ -2514,6 +2590,15 @@ dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
wsi_wl_swapchain_poll_timing_request(struct wsi_swapchain *wsi_chain)
|
||||
{
|
||||
/* Timing requests must complete in finite time, and if we're not calling present wait
|
||||
* or queue present regularly, timing requests will never come back. */
|
||||
struct timespec instant = {0};
|
||||
dispatch_present_id_queue(wsi_chain, &instant);
|
||||
}
|
||||
|
||||
static bool
|
||||
wsi_wl_swapchain_present_id_completes_in_finite_time_locked(struct wsi_wl_swapchain *chain,
|
||||
uint64_t present_id)
|
||||
|
|
@ -2794,16 +2879,13 @@ wsi_wl_swapchain_acquire_next_image_implicit(struct wsi_swapchain *wsi_chain,
|
|||
}
|
||||
|
||||
static void
|
||||
wsi_wl_presentation_update_present_id(struct wsi_wl_present_id *id)
|
||||
wsi_wl_presentation_update_present_id_locked(struct wsi_wl_present_id *id)
|
||||
{
|
||||
mtx_lock(&id->chain->present_ids.lock);
|
||||
id->chain->present_ids.outstanding_count--;
|
||||
if (id->present_id > id->chain->present_ids.max_completed)
|
||||
id->chain->present_ids.max_completed = id->present_id;
|
||||
|
||||
id->chain->present_ids.display_time_correction -= id->correction;
|
||||
mtx_unlock(&id->chain->present_ids.lock);
|
||||
vk_free(id->alloc, id);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2815,6 +2897,20 @@ presentation_handle_presented(void *data,
|
|||
struct wsi_wl_swapchain *chain = id->chain;
|
||||
uint64_t target_time = id->target_time;
|
||||
|
||||
/* In v1 of presentation time, we can know if we're likely running VRR, given refresh is 0.
|
||||
* However, we cannot know what the base refresh rate is without some kind of external information.
|
||||
* We also cannot know if we're actually driving the display in a VRR fashion.
|
||||
* In v2, we should always know the "base refresh" rate, but that means we cannot know if we're driving
|
||||
* the display VRR or FRR. We could try to deduce it based on timestamps, but that is too brittle.
|
||||
* There is a v3 proposal that adds this information more formally so we don't have to guess.
|
||||
* Knowing VRR or FRR is not mission critical for most use cases, so just report "Unknown" for now. */
|
||||
wsi_swapchain_present_timing_update_refresh_rate(&chain->base, refresh, 0, 0);
|
||||
|
||||
/* Notify this before present wait to reduce latency of presentation timing requests
|
||||
* if the application is driving its queries based off present waits. */
|
||||
if (id->timing_serial)
|
||||
wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, presentation_time, id->img);
|
||||
|
||||
mtx_lock(&chain->present_ids.lock);
|
||||
chain->present_ids.refresh_nsec = refresh;
|
||||
if (!chain->present_ids.valid_refresh_nsec) {
|
||||
|
|
@ -2826,13 +2922,16 @@ presentation_handle_presented(void *data,
|
|||
if (presentation_time > chain->present_ids.displayed_time)
|
||||
chain->present_ids.displayed_time = presentation_time;
|
||||
|
||||
if (target_time && presentation_time > target_time)
|
||||
/* If we have user-defined target time it can be arbitrarily early, and we don't
|
||||
* want to start compensating for that error if application stops requesting specific time. */
|
||||
if (!id->user_target_time && target_time && presentation_time > target_time)
|
||||
chain->present_ids.display_time_error = presentation_time - target_time;
|
||||
else
|
||||
chain->present_ids.display_time_error = 0;
|
||||
mtx_unlock(&chain->present_ids.lock);
|
||||
|
||||
wsi_wl_presentation_update_present_id(id);
|
||||
wsi_wl_presentation_update_present_id_locked(id);
|
||||
mtx_unlock(&chain->present_ids.lock);
|
||||
vk_free(id->alloc, id);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2841,6 +2940,15 @@ presentation_handle_discarded(void *data)
|
|||
struct wsi_wl_present_id *id = data;
|
||||
struct wsi_wl_swapchain *chain = id->chain;
|
||||
|
||||
/* From Vulkan spec:
|
||||
* "Timing information for some present stages may have a time value of 0,
|
||||
* indicating that results for that present stage are not available."
|
||||
* Worst case we can simply take a timestamp of clock_id and pretend, but
|
||||
* applications may start to latch onto that timestamp as ground truth, which
|
||||
* is obviously not correct. */
|
||||
if (id->timing_serial)
|
||||
wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, 0, id->img);
|
||||
|
||||
mtx_lock(&chain->present_ids.lock);
|
||||
if (!chain->present_ids.valid_refresh_nsec) {
|
||||
/* We've started occluded, so make up some safe values to throttle us */
|
||||
|
|
@ -2849,9 +2957,10 @@ presentation_handle_discarded(void *data)
|
|||
chain->present_ids.refresh_nsec = 16666666;
|
||||
chain->present_ids.valid_refresh_nsec = true;
|
||||
}
|
||||
mtx_unlock(&chain->present_ids.lock);
|
||||
|
||||
wsi_wl_presentation_update_present_id(id);
|
||||
wsi_wl_presentation_update_present_id_locked(id);
|
||||
mtx_unlock(&chain->present_ids.lock);
|
||||
vk_free(id->alloc, id);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -2870,9 +2979,10 @@ presentation_frame_handle_done(void *data, struct wl_callback *callback, uint32_
|
|||
|
||||
mtx_lock(&chain->present_ids.lock);
|
||||
wl_list_remove(&id->link);
|
||||
mtx_unlock(&chain->present_ids.lock);
|
||||
|
||||
wsi_wl_presentation_update_present_id(id);
|
||||
wsi_wl_presentation_update_present_id_locked(id);
|
||||
mtx_unlock(&chain->present_ids.lock);
|
||||
vk_free(id->alloc, id);
|
||||
wl_callback_destroy(callback);
|
||||
}
|
||||
|
||||
|
|
@ -2895,6 +3005,29 @@ static const struct wl_callback_listener frame_listener = {
|
|||
frame_handle_done,
|
||||
};
|
||||
|
||||
static bool
|
||||
set_application_driven_timestamp(struct wsi_wl_swapchain *chain,
|
||||
uint64_t *timestamp,
|
||||
uint64_t *correction)
|
||||
{
|
||||
if (chain->timing_request.serial && chain->timing_request.time) {
|
||||
/* Absolute time is requested before we have been able to report a reasonable refresh rate
|
||||
* to application. This is valid, but we should not try to perform any rounding.
|
||||
* NEAREST_REFRESH_CYCLE flag cannot be honored because it's impossible to know at this time. */
|
||||
struct timespec target_ts;
|
||||
timespec_from_nsec(&target_ts, chain->timing_request.time);
|
||||
wp_commit_timer_v1_set_timestamp(chain->commit_timer,
|
||||
(uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec,
|
||||
target_ts.tv_nsec);
|
||||
*timestamp = chain->timing_request.time;
|
||||
*correction = 0;
|
||||
chain->present_ids.last_target_time = chain->timing_request.time;
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* The present_ids lock must be held */
|
||||
static bool
|
||||
set_timestamp(struct wsi_wl_swapchain *chain,
|
||||
|
|
@ -2908,7 +3041,7 @@ set_timestamp(struct wsi_wl_swapchain *chain,
|
|||
int32_t error = 0;
|
||||
|
||||
if (!chain->present_ids.valid_refresh_nsec)
|
||||
return false;
|
||||
return set_application_driven_timestamp(chain, timestamp, correction);
|
||||
|
||||
displayed_time = chain->present_ids.displayed_time;
|
||||
refresh = chain->present_ids.refresh_nsec;
|
||||
|
|
@ -2918,7 +3051,7 @@ set_timestamp(struct wsi_wl_swapchain *chain,
|
|||
* timestamps at all, so bail out.
|
||||
*/
|
||||
if (!refresh)
|
||||
return false;
|
||||
return set_application_driven_timestamp(chain, timestamp, correction);
|
||||
|
||||
/* We assume we're being fed at the display's refresh rate, but
|
||||
* if that doesn't happen our timestamps fall into the past.
|
||||
|
|
@ -2936,6 +3069,10 @@ set_timestamp(struct wsi_wl_swapchain *chain,
|
|||
error = chain->present_ids.display_time_error -
|
||||
chain->present_ids.display_time_correction;
|
||||
|
||||
/* If we're driving timestamps from application, this is somewhat redundant
|
||||
* but it will drain out any accumulated display_time_error over time.
|
||||
* Accumulated errors are expected since application might not
|
||||
* align the target time perfectly against a refresh cycle. */
|
||||
target = chain->present_ids.last_target_time;
|
||||
if (error > 0) {
|
||||
target += (error / refresh) * refresh;
|
||||
|
|
@ -2945,19 +3082,41 @@ set_timestamp(struct wsi_wl_swapchain *chain,
|
|||
}
|
||||
|
||||
chain->present_ids.display_time_correction += *correction;
|
||||
target = next_phase_locked_time(displayed_time,
|
||||
refresh,
|
||||
target);
|
||||
/* Take back 500 us as a safety margin, to ensure we don't miss our
|
||||
* target due to round-off error.
|
||||
*/
|
||||
timespec_from_nsec(&target_ts, target - 500000);
|
||||
|
||||
if (chain->timing_request.serial && chain->timing_request.time) {
|
||||
target = chain->timing_request.time;
|
||||
chain->present_ids.last_target_time = target;
|
||||
*timestamp = target;
|
||||
|
||||
if (chain->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
|
||||
target -= chain->present_ids.refresh_nsec / 2;
|
||||
|
||||
/* Without the flag, the application is supposed to deal with any safety margins on its own. */
|
||||
timespec_from_nsec(&target_ts, target);
|
||||
|
||||
/* If we're using commit timing path, we always have FIFO protocol, so we don't have to
|
||||
* consider scenarios where application is passing a very low present time.
|
||||
* I.e., there is no need to max() the application timestamp against our estimated next refresh cycle.
|
||||
* If the surface is occluded, it's possible to render at a higher rate than display refresh rate,
|
||||
* but that's okay. Those presents will be discarded anyway, and we won't report odd timestamps to application. */
|
||||
} else {
|
||||
target = next_phase_locked_time(displayed_time,
|
||||
refresh,
|
||||
target);
|
||||
|
||||
chain->present_ids.last_target_time = target;
|
||||
*timestamp = target;
|
||||
|
||||
/* Take back 500 us as a safety margin, to ensure we don't miss our
|
||||
* target due to round-off error.
|
||||
*/
|
||||
timespec_from_nsec(&target_ts, target - 500000);
|
||||
}
|
||||
|
||||
wp_commit_timer_v1_set_timestamp(chain->commit_timer,
|
||||
(uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec,
|
||||
target_ts.tv_nsec);
|
||||
|
||||
chain->present_ids.last_target_time = target;
|
||||
*timestamp = target;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -3059,13 +3218,16 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
|
|||
}
|
||||
|
||||
if (present_id > 0 || (mode_fifo && chain->commit_timer) ||
|
||||
util_perfetto_is_tracing_enabled()) {
|
||||
util_perfetto_is_tracing_enabled() || chain->timing_request.serial) {
|
||||
struct wsi_wl_present_id *id =
|
||||
vk_zalloc(chain->wsi_wl_surface->display->wsi_wl->alloc, sizeof(*id), sizeof(uintptr_t),
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
id->chain = chain;
|
||||
id->present_id = present_id;
|
||||
id->alloc = chain->wsi_wl_surface->display->wsi_wl->alloc;
|
||||
id->timing_serial = chain->timing_request.serial;
|
||||
id->img = &chain->images[image_index].base;
|
||||
id->user_target_time = chain->timing_request.time != 0;
|
||||
|
||||
mtx_lock(&chain->present_ids.lock);
|
||||
|
||||
|
|
@ -3193,6 +3355,8 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
|
|||
wsi_wl_surface->display->queue);
|
||||
}
|
||||
|
||||
memset(&chain->timing_request, 0, sizeof(chain->timing_request));
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -3427,6 +3591,20 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain,
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkTimeDomainKHR
|
||||
clock_id_to_vk_time_domain(clockid_t id)
|
||||
{
|
||||
switch (id) {
|
||||
case CLOCK_MONOTONIC:
|
||||
return VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
|
||||
case CLOCK_MONOTONIC_RAW:
|
||||
return VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
|
||||
default:
|
||||
/* Default fallback. Will not be used. */
|
||||
return VK_TIME_DOMAIN_DEVICE_KHR;
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
||||
VkDevice device,
|
||||
|
|
@ -3605,6 +3783,12 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
|||
chain->base.queue_present = wsi_wl_swapchain_queue_present;
|
||||
chain->base.release_images = wsi_wl_swapchain_release_images;
|
||||
chain->base.set_present_mode = wsi_wl_swapchain_set_present_mode;
|
||||
chain->base.set_timing_request = wsi_wl_swapchain_set_timing_request;
|
||||
chain->base.poll_timing_request = wsi_wl_swapchain_poll_timing_request;
|
||||
if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) {
|
||||
chain->base.present_timing.time_domain =
|
||||
clock_id_to_vk_time_domain(wsi_wl_surface->display->presentation_clock_id);
|
||||
}
|
||||
chain->base.wait_for_present = wsi_wl_swapchain_wait_for_present;
|
||||
chain->base.wait_for_present2 = wsi_wl_swapchain_wait_for_present2;
|
||||
chain->base.present_mode = present_mode;
|
||||
|
|
|
|||
|
|
@ -276,6 +276,16 @@ wsi_win32_surface_get_capabilities2(VkIcdSurfaceBase *surface,
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
|
||||
VkPresentTimingSurfaceCapabilitiesEXT *wait = (VkPresentTimingSurfaceCapabilitiesEXT *)ext;
|
||||
|
||||
wait->presentStageQueries = 0;
|
||||
wait->presentTimingSupported = VK_FALSE;
|
||||
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
|
||||
wait->presentAtRelativeTimeSupported = VK_FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Ignored */
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@
|
|||
#include "wsi_common_entrypoints.h"
|
||||
#include "wsi_common_private.h"
|
||||
#include "wsi_common_queue.h"
|
||||
#include "loader/loader_dri_helper_screen.h"
|
||||
|
||||
#ifdef HAVE_SYS_SHM_H
|
||||
#include <sys/ipc.h>
|
||||
|
|
@ -79,7 +80,14 @@
|
|||
|
||||
#define MAX_DAMAGE_RECTS 64
|
||||
|
||||
struct wsi_x11_connection {
|
||||
struct x11_icd_surface_key {
|
||||
xcb_connection_t *conn;
|
||||
xcb_window_t window;
|
||||
uint32_t padding; /* Makes struct memcmp compatible. */
|
||||
};
|
||||
|
||||
struct wsi_x11_icd_surface {
|
||||
struct x11_icd_surface_key key;
|
||||
bool has_dri3;
|
||||
bool has_dri3_modifiers;
|
||||
bool has_dri3_explicit_sync;
|
||||
|
|
@ -88,13 +96,80 @@ struct wsi_x11_connection {
|
|||
bool is_xwayland;
|
||||
bool has_mit_shm;
|
||||
bool has_xfixes;
|
||||
|
||||
struct loader_screen_resources screen_resources;
|
||||
bool screen_resources_valid;
|
||||
mtx_t mtx;
|
||||
|
||||
/* This holds the fallback for MSC rate, i.e. refresh rate.
|
||||
* If we cannot get ahold of a stable estimate based on real feedback,
|
||||
* we defer to using this. With multi-monitors and other potential effects affecting actual rates,
|
||||
* we shouldn't trust this blindly. */
|
||||
uint64_t current_refresh_ns;
|
||||
};
|
||||
|
||||
static uint64_t
|
||||
x11_icd_surface_update_present_timing(struct wsi_x11_icd_surface *surface, uint32_t width, uint32_t height)
|
||||
{
|
||||
uint64_t ret;
|
||||
|
||||
if (!surface->screen_resources_valid)
|
||||
return 0;
|
||||
|
||||
mtx_lock(&surface->mtx);
|
||||
loader_update_screen_resources(&surface->screen_resources);
|
||||
|
||||
if (surface->screen_resources.num_crtcs == 0) {
|
||||
surface->current_refresh_ns = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
surface->current_refresh_ns =
|
||||
1000000000ull * surface->screen_resources.crtcs[0].refresh_denominator /
|
||||
surface->screen_resources.crtcs[0].refresh_numerator;
|
||||
|
||||
/* Don't need to ponder multi-monitor. */
|
||||
if (surface->screen_resources.num_crtcs == 1)
|
||||
goto out;
|
||||
|
||||
/* Find the best matching screen for the window. */
|
||||
xcb_translate_coordinates_cookie_t cookie =
|
||||
xcb_translate_coordinates_unchecked(surface->key.conn, surface->key.window,
|
||||
surface->screen_resources.screen->root, 0, 0);
|
||||
xcb_translate_coordinates_reply_t *reply =
|
||||
xcb_translate_coordinates_reply(surface->key.conn, cookie, NULL);
|
||||
|
||||
if (!reply)
|
||||
goto out;
|
||||
|
||||
int area = 0;
|
||||
|
||||
for (unsigned c = 0; c < surface->screen_resources.num_crtcs; c++) {
|
||||
struct loader_crtc_info *crtc = &surface->screen_resources.crtcs[c];
|
||||
|
||||
int c_area = box_intersection_area(
|
||||
reply->dst_x, reply->dst_y, width, height, crtc->x,
|
||||
crtc->y, crtc->width, crtc->height);
|
||||
|
||||
if (c_area > area) {
|
||||
surface->current_refresh_ns = 1000000000ull * crtc->refresh_denominator / crtc->refresh_numerator;
|
||||
area = c_area;
|
||||
}
|
||||
}
|
||||
|
||||
free(reply);
|
||||
|
||||
out:
|
||||
ret = surface->current_refresh_ns;
|
||||
mtx_unlock(&surface->mtx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct wsi_x11 {
|
||||
struct wsi_interface base;
|
||||
|
||||
mtx_t mutex;
|
||||
/* Hash table of xcb_connection -> wsi_x11_connection mappings */
|
||||
/* Hash table of xcb_connection -> wsi_x11_icd_surface mappings */
|
||||
struct hash_table *connections;
|
||||
};
|
||||
|
||||
|
|
@ -224,9 +299,9 @@ wsi_x11_detect_xwayland(xcb_connection_t *conn,
|
|||
return is_xwayland;
|
||||
}
|
||||
|
||||
static struct wsi_x11_connection *
|
||||
wsi_x11_connection_create(struct wsi_device *wsi_dev,
|
||||
xcb_connection_t *conn)
|
||||
static struct wsi_x11_icd_surface *
|
||||
wsi_x11_icd_surface_create(struct wsi_device *wsi_dev,
|
||||
xcb_connection_t *conn, xcb_window_t window)
|
||||
{
|
||||
xcb_query_extension_cookie_t dri3_cookie, pres_cookie, randr_cookie,
|
||||
amd_cookie, nv_cookie, shm_cookie, sync_cookie,
|
||||
|
|
@ -241,16 +316,19 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
|
|||
bool has_dri3_v1_4 = false;
|
||||
bool has_present_v1_4 = false;
|
||||
|
||||
/* wsi_x11_get_connection may be called from a thread, but we will never end up here on a worker thread,
|
||||
/* wsi_x11_get_icd_surface may be called from a thread, but we will never end up here on a worker thread,
|
||||
* since the connection will always be in the hash-map,
|
||||
* so we will not violate Vulkan's rule on allocation callbacks w.r.t.
|
||||
* when it is allowed to call the allocation callbacks. */
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
vk_alloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8,
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
vk_zalloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||
if (!wsi_conn)
|
||||
return NULL;
|
||||
|
||||
wsi_conn->key.conn = conn;
|
||||
wsi_conn->key.window = window;
|
||||
|
||||
sync_cookie = xcb_query_extension(conn, 4, "SYNC");
|
||||
dri3_cookie = xcb_query_extension(conn, 4, "DRI3");
|
||||
pres_cookie = xcb_query_extension(conn, 7, "Present");
|
||||
|
|
@ -378,6 +456,27 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
|
|||
}
|
||||
#endif
|
||||
|
||||
if (window) {
|
||||
/* This state is only necessary for dealing with present timing, and if we fail, we simply won't expose support. */
|
||||
xcb_get_geometry_cookie_t geometry_cookie = xcb_get_geometry_unchecked(conn, window);
|
||||
xcb_get_geometry_reply_t *geometry_reply = xcb_get_geometry_reply(conn, geometry_cookie, NULL);
|
||||
if (geometry_reply) {
|
||||
xcb_screen_iterator_t it = xcb_setup_roots_iterator(xcb_get_setup(conn));
|
||||
xcb_screen_t *screen;
|
||||
|
||||
for (screen = it.data; it.rem != 0; xcb_screen_next(&it), screen = it.data) {
|
||||
if (screen->root == geometry_reply->root) {
|
||||
loader_init_screen_resources(&wsi_conn->screen_resources, conn, screen);
|
||||
wsi_conn->screen_resources_valid = true;
|
||||
mtx_init(&wsi_conn->mtx, 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(geometry_reply);
|
||||
}
|
||||
}
|
||||
|
||||
free(dri3_reply);
|
||||
free(pres_reply);
|
||||
free(randr_reply);
|
||||
|
|
@ -392,14 +491,18 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
|
|||
}
|
||||
|
||||
static void
|
||||
wsi_x11_connection_destroy(struct wsi_device *wsi_dev,
|
||||
struct wsi_x11_connection *conn)
|
||||
wsi_x11_icd_surface_destroy(struct wsi_device *wsi_dev,
|
||||
struct wsi_x11_icd_surface *conn)
|
||||
{
|
||||
if (conn->screen_resources_valid) {
|
||||
loader_destroy_screen_resources(&conn->screen_resources);
|
||||
mtx_destroy(&conn->mtx);
|
||||
}
|
||||
vk_free(&wsi_dev->instance_alloc, conn);
|
||||
}
|
||||
|
||||
static bool
|
||||
wsi_x11_check_for_dri3(struct wsi_x11_connection *wsi_conn)
|
||||
wsi_x11_check_for_dri3(struct wsi_x11_icd_surface *wsi_conn)
|
||||
{
|
||||
if (wsi_conn->has_dri3)
|
||||
return true;
|
||||
|
|
@ -418,35 +521,37 @@ wsi_x11_check_for_dri3(struct wsi_x11_connection *wsi_conn)
|
|||
*
|
||||
* If the allocation fails NULL is returned.
|
||||
*/
|
||||
static struct wsi_x11_connection *
|
||||
wsi_x11_get_connection(struct wsi_device *wsi_dev,
|
||||
xcb_connection_t *conn)
|
||||
static struct wsi_x11_icd_surface *
|
||||
wsi_x11_get_icd_surface(struct wsi_device *wsi_dev,
|
||||
xcb_connection_t *conn, xcb_window_t window)
|
||||
{
|
||||
struct wsi_x11 *wsi =
|
||||
(struct wsi_x11 *)wsi_dev->wsi[VK_ICD_WSI_PLATFORM_XCB];
|
||||
|
||||
mtx_lock(&wsi->mutex);
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn);
|
||||
struct x11_icd_surface_key key = { .conn = conn, .window = window };
|
||||
|
||||
struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, &key);
|
||||
if (!entry) {
|
||||
/* We're about to make a bunch of blocking calls. Let's drop the
|
||||
* mutex for now so we don't block up too badly.
|
||||
*/
|
||||
mtx_unlock(&wsi->mutex);
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_connection_create(wsi_dev, conn);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_icd_surface_create(wsi_dev, conn, window);
|
||||
if (!wsi_conn)
|
||||
return NULL;
|
||||
|
||||
mtx_lock(&wsi->mutex);
|
||||
|
||||
entry = _mesa_hash_table_search(wsi->connections, conn);
|
||||
entry = _mesa_hash_table_search(wsi->connections, &wsi_conn->key);
|
||||
if (entry) {
|
||||
/* Oops, someone raced us to it */
|
||||
wsi_x11_connection_destroy(wsi_dev, wsi_conn);
|
||||
wsi_x11_icd_surface_destroy(wsi_dev, wsi_conn);
|
||||
} else {
|
||||
entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn);
|
||||
entry = _mesa_hash_table_insert(wsi->connections, &wsi_conn->key, wsi_conn);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -590,11 +695,20 @@ wsi_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice,
|
|||
{
|
||||
VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice);
|
||||
struct wsi_device *wsi_device = pdevice->wsi_device;
|
||||
if (!(wsi_device->queue_supports_blit & BITFIELD64_BIT(queueFamilyIndex)))
|
||||
|
||||
/* These should overlap. */
|
||||
uint64_t effective_queues = wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps;
|
||||
|
||||
/* If there are no queues that support both blits and timestamps,
|
||||
* don't report support for queue timestamps. */
|
||||
if (!effective_queues)
|
||||
effective_queues = wsi_device->queue_supports_blit;
|
||||
|
||||
if (!(effective_queues & BITFIELD64_BIT(queueFamilyIndex)))
|
||||
return false;
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection(wsi_device, connection);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface(wsi_device, connection, 0);
|
||||
|
||||
if (!wsi_conn)
|
||||
return false;
|
||||
|
|
@ -669,8 +783,8 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
|
|||
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
|
||||
xcb_window_t window = x11_surface_get_window(icd_surface);
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection(wsi_device, conn);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface(wsi_device, conn, window);
|
||||
if (!wsi_conn)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
|
|
@ -722,7 +836,7 @@ x11_get_min_image_count(const struct wsi_device *wsi_device, bool is_xwayland)
|
|||
|
||||
static unsigned
|
||||
x11_get_min_image_count_for_present_mode(struct wsi_device *wsi_device,
|
||||
struct wsi_x11_connection *wsi_conn,
|
||||
struct wsi_x11_icd_surface *wsi_conn,
|
||||
VkPresentModeKHR present_mode);
|
||||
|
||||
static VkResult
|
||||
|
|
@ -734,8 +848,8 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
|
|||
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
|
||||
xcb_window_t window = x11_surface_get_window(icd_surface);
|
||||
struct wsi_x11_vk_surface *surface = (struct wsi_x11_vk_surface*)icd_surface;
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection(wsi_device, conn);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface(wsi_device, conn, window);
|
||||
xcb_get_geometry_cookie_t geom_cookie;
|
||||
xcb_generic_error_t *err;
|
||||
xcb_get_geometry_reply_t *geom;
|
||||
|
|
@ -863,6 +977,52 @@ x11_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface,
|
|||
break;
|
||||
}
|
||||
|
||||
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
|
||||
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
|
||||
|
||||
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
|
||||
xcb_window_t window = x11_surface_get_window(icd_surface);
|
||||
struct wsi_x11_icd_surface *wsi_conn = wsi_x11_get_icd_surface(wsi_device, conn, window);
|
||||
|
||||
wait->presentStageQueries = 0;
|
||||
wait->presentTimingSupported = VK_FALSE;
|
||||
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
|
||||
wait->presentAtRelativeTimeSupported = VK_FALSE;
|
||||
|
||||
/* If we cannot query modes for a screen, it's not possible to get reliable timings. */
|
||||
if (!wsi_conn->screen_resources_valid)
|
||||
break;
|
||||
|
||||
wait->presentTimingSupported = VK_TRUE;
|
||||
|
||||
if (wsi_conn->is_xwayland) {
|
||||
/* Wayland COMPLETE is tied to fence callback, so that's what we'll report.
|
||||
* For pure frame pacing support, this is likely fine. */
|
||||
wait->presentStageQueries = VK_PRESENT_STAGE_REQUEST_DEQUEUED_BIT_EXT;
|
||||
|
||||
/* Xwayland cannot get a reliable refresh rate estimate since MSC is not tied to monitor refresh at all.
|
||||
* However, it's pragmatically very important to expose some baseline Xwl support since
|
||||
* a large amount of applications (mostly games) rely on X11 APIs.
|
||||
*
|
||||
* Relative timings are easier to deal with since errors against an absolute timer are more or less expected,
|
||||
* and it's sufficient for implementing present intervals in GL/D3D, etc, but likely not for
|
||||
* tight A/V sync in e.g. media players, but those should be using Wayland when available anyway.
|
||||
* As per-spec the timing request we provide should correlate with PIXEL_VISIBLE_BIT stage,
|
||||
* but when we only observe dequeue, that's not really possible, but relative timings don't have that problem.
|
||||
*
|
||||
* There is PRESENT_CAPABILITY_UST, which would help, but xserver does not implement it at all.
|
||||
*/
|
||||
wait->presentAtRelativeTimeSupported = VK_TRUE;
|
||||
} else {
|
||||
/* COMPLETE should be tied to page flip on native X11. */
|
||||
wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
|
||||
wait->presentAtAbsoluteTimeSupported = VK_TRUE;
|
||||
wait->presentAtRelativeTimeSupported = VK_TRUE;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* Ignored */
|
||||
break;
|
||||
|
|
@ -1092,6 +1252,7 @@ wsi_CreateXlibSurfaceKHR(VkInstance _instance,
|
|||
struct x11_image_pending_completion {
|
||||
uint32_t serial;
|
||||
uint64_t signal_present_id;
|
||||
uint64_t timing_serial;
|
||||
};
|
||||
|
||||
struct x11_image {
|
||||
|
|
@ -1108,6 +1269,7 @@ struct x11_image {
|
|||
VkPresentModeKHR present_mode;
|
||||
xcb_rectangle_t rects[MAX_DAMAGE_RECTS];
|
||||
int rectangle_count;
|
||||
struct wsi_image_timing_request timing_request;
|
||||
|
||||
/* In IMMEDIATE and MAILBOX modes, we can have multiple pending presentations per image.
|
||||
* We need to keep track of them when considering present ID. */
|
||||
|
|
@ -1125,12 +1287,19 @@ struct x11_image {
|
|||
#endif
|
||||
};
|
||||
|
||||
struct x11_present_timing_entry {
|
||||
uint64_t msc;
|
||||
uint64_t ust;
|
||||
};
|
||||
#define X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE 16
|
||||
|
||||
struct x11_swapchain {
|
||||
struct wsi_swapchain base;
|
||||
|
||||
bool has_dri3_modifiers;
|
||||
bool has_mit_shm;
|
||||
bool has_async_may_tear;
|
||||
bool has_reliable_msc;
|
||||
|
||||
xcb_connection_t * conn;
|
||||
xcb_window_t window;
|
||||
|
|
@ -1144,9 +1313,13 @@ struct x11_swapchain {
|
|||
xcb_special_event_t * special_event;
|
||||
uint64_t send_sbc;
|
||||
uint64_t last_present_msc;
|
||||
uint64_t next_present_ust_lower_bound;
|
||||
uint32_t stamp;
|
||||
uint32_t sent_image_count;
|
||||
|
||||
struct x11_present_timing_entry present_timing_window[X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE];
|
||||
uint32_t present_timing_window_index;
|
||||
|
||||
atomic_int status;
|
||||
bool copy_is_suboptimal;
|
||||
struct wsi_queue present_queue;
|
||||
|
|
@ -1168,14 +1341,121 @@ struct x11_swapchain {
|
|||
uint64_t present_id;
|
||||
VkResult present_progress_error;
|
||||
|
||||
struct wsi_image_timing_request timing_request;
|
||||
bool msc_estimate_is_stable;
|
||||
|
||||
struct x11_image images[0];
|
||||
};
|
||||
VK_DEFINE_NONDISP_HANDLE_CASTS(x11_swapchain, base.base, VkSwapchainKHR,
|
||||
VK_OBJECT_TYPE_SWAPCHAIN_KHR)
|
||||
|
||||
static void x11_present_complete(struct x11_swapchain *swapchain,
|
||||
struct x11_image *image, uint32_t index)
|
||||
static bool x11_refresh_rate_estimate_is_stable(struct x11_swapchain *swapchain, uint64_t base_rate)
|
||||
{
|
||||
/* Only accept a refresh rate estimate if it's *very* stable.
|
||||
* Keith's old GOOGLE_display_timing MR suggests that using this estimate is better than blindly
|
||||
* accepting the modeline in some cases.
|
||||
* When running in VRR modes, the MSC will appear to be highly unstable, and we cannot accept those estimates. */
|
||||
|
||||
for (int i = 0; i < X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE; i++) {
|
||||
const struct x11_present_timing_entry *a =
|
||||
&swapchain->present_timing_window[i];
|
||||
const struct x11_present_timing_entry *b =
|
||||
&swapchain->present_timing_window[(i + 1) % X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE];
|
||||
|
||||
if (!a->msc || !b->msc)
|
||||
continue;
|
||||
|
||||
uint64_t ust_delta = MAX2(a->ust, b->ust) - MIN2(a->ust, b->ust);
|
||||
uint64_t msc_delta = MAX2(a->msc, b->msc) - MIN2(a->msc, b->msc);
|
||||
|
||||
if (msc_delta == 0)
|
||||
continue;
|
||||
|
||||
uint64_t refresh_ns = 1000 * ust_delta / msc_delta;
|
||||
|
||||
/* The true UST values are expected to be quite accurate.
|
||||
* Anything more than 10us difference in rate is considered unstable.
|
||||
* If the MSC is driven by GPU progress in VRR mode,
|
||||
* it's extremely unlikely that they are paced *perfectly* for 16 frames in a row. */
|
||||
if (llabs((int64_t)base_rate - (int64_t)refresh_ns) > 10000)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void x11_present_update_refresh_cycle_estimate(struct x11_swapchain *swapchain,
|
||||
uint64_t msc, uint64_t ust)
|
||||
{
|
||||
struct wsi_x11_icd_surface *surface = wsi_x11_get_icd_surface(
|
||||
(struct wsi_device*)swapchain->base.wsi, swapchain->conn, swapchain->window);
|
||||
|
||||
mtx_lock(&surface->mtx);
|
||||
uint64_t randr_refresh_ns = surface->current_refresh_ns;
|
||||
mtx_unlock(&surface->mtx);
|
||||
|
||||
swapchain->present_timing_window_index =
|
||||
(swapchain->present_timing_window_index + 1) % X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE;
|
||||
struct x11_present_timing_entry *entry = &swapchain->present_timing_window[swapchain->present_timing_window_index];
|
||||
|
||||
if (!swapchain->has_reliable_msc) {
|
||||
/* If we don't have reliable MSC, we always trust the fallback RANDR query.
|
||||
* We have no idea if we're FRR or VRR. */
|
||||
wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, randr_refresh_ns, 0, 0);
|
||||
entry->msc = msc;
|
||||
entry->ust = ust;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Try to get an initial estimate as quickly as possible, we will refine it over time. */
|
||||
if (entry->msc == 0)
|
||||
entry = &swapchain->present_timing_window[1];
|
||||
|
||||
if (entry->msc != 0) {
|
||||
uint64_t msc_delta = msc - entry->msc;
|
||||
|
||||
/* Safeguard against any weird interactions with IMMEDIATE. */
|
||||
if (msc_delta != 0) {
|
||||
uint64_t ust_delta = 1000 * (ust - entry->ust);
|
||||
uint64_t refresh_ns = ust_delta / msc_delta;
|
||||
|
||||
swapchain->msc_estimate_is_stable = x11_refresh_rate_estimate_is_stable(swapchain, refresh_ns);
|
||||
|
||||
if (swapchain->msc_estimate_is_stable) {
|
||||
/* If MSC is tightly locked in, we can safely make the assumption we're in FRR mode.
|
||||
* It's possible we're technically doing VRR, but if we're rendering at above monitor refresh
|
||||
* rate consistently, then there is no meaningful difference anyway. */
|
||||
|
||||
/* Our refresh rates are only estimates, so expect some deviation (+/- 1us). */
|
||||
wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, refresh_ns, refresh_ns, 1000);
|
||||
} else {
|
||||
/* If we have enabled adaptive sync, and we're seeing highly irregular MSC values, we assume
|
||||
* we're driving the display VRR. */
|
||||
uint64_t refresh_interval = swapchain->base.wsi->enable_adaptive_sync ? UINT64_MAX : 0;
|
||||
wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, randr_refresh_ns, refresh_interval, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
entry = &swapchain->present_timing_window[swapchain->present_timing_window_index];
|
||||
entry->msc = msc;
|
||||
entry->ust = ust;
|
||||
}
|
||||
|
||||
static void x11_present_complete(struct x11_swapchain *swapchain,
|
||||
struct x11_image *image, uint32_t index,
|
||||
uint64_t msc, uint64_t ust)
|
||||
{
|
||||
/* Update estimate for refresh rate. */
|
||||
if (swapchain->base.present_timing.active)
|
||||
x11_present_update_refresh_cycle_estimate(swapchain, msc, ust);
|
||||
|
||||
/* Make sure to signal present timings before signalling present wait,
|
||||
* this way we get minimal latency for reports. */
|
||||
uint64_t timing_serial = image->pending_completions[index].timing_serial;
|
||||
if (timing_serial)
|
||||
wsi_swapchain_present_timing_notify_completion(&swapchain->base, timing_serial, ust * 1000, &image->base);
|
||||
|
||||
uint64_t signal_present_id = image->pending_completions[index].signal_present_id;
|
||||
if (signal_present_id) {
|
||||
mtx_lock(&swapchain->present_progress_mutex);
|
||||
|
|
@ -1327,6 +1607,16 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain,
|
|||
return VK_SUBOPTIMAL_KHR;
|
||||
}
|
||||
|
||||
if (chain->base.present_timing.active) {
|
||||
/* It's possible that we have multiple monitors and moving windows around change the effective rate.
|
||||
* Lots of logic reused from platform_x11.c. */
|
||||
|
||||
/* TODO: Should we rate-limit this query? */
|
||||
struct wsi_x11_icd_surface *surface = wsi_x11_get_icd_surface(
|
||||
(struct wsi_device *)chain->base.wsi, chain->conn, chain->window);
|
||||
x11_icd_surface_update_present_timing(surface, config->width, config->height);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -1348,13 +1638,14 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain,
|
|||
|
||||
case XCB_PRESENT_EVENT_COMPLETE_NOTIFY: {
|
||||
xcb_present_complete_notify_event_t *complete = (void *) event;
|
||||
uint64_t ust = MAX2(complete->ust, chain->next_present_ust_lower_bound);
|
||||
if (complete->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
|
||||
unsigned i, j;
|
||||
for (i = 0; i < chain->base.image_count; i++) {
|
||||
struct x11_image *image = &chain->images[i];
|
||||
for (j = 0; j < image->present_queued_count; j++) {
|
||||
if (image->pending_completions[j].serial == complete->serial) {
|
||||
x11_present_complete(chain, image, j);
|
||||
x11_present_complete(chain, image, j, complete->msc, ust);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1424,8 +1715,8 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
|
|||
int64_t divisor = 0;
|
||||
int64_t remainder = 0;
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
|
||||
if (!wsi_conn)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
|
|
@ -1457,6 +1748,7 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
|
|||
(struct x11_image_pending_completion) {
|
||||
.signal_present_id = image->present_id,
|
||||
.serial = serial,
|
||||
.timing_serial = image->timing_request.serial,
|
||||
};
|
||||
|
||||
xcb_void_cookie_t cookie;
|
||||
|
|
@ -1654,7 +1946,7 @@ static VkResult x11_swapchain_read_status_atomic(struct x11_swapchain *chain)
|
|||
*/
|
||||
static bool
|
||||
x11_needs_wait_for_fences(const struct wsi_device *wsi_device,
|
||||
struct wsi_x11_connection *wsi_conn,
|
||||
struct wsi_x11_icd_surface *wsi_conn,
|
||||
VkPresentModeKHR present_mode)
|
||||
{
|
||||
if (wsi_conn->is_xwayland && !wsi_device->x11.xwaylandWaitReady) {
|
||||
|
|
@ -1676,7 +1968,7 @@ x11_needs_wait_for_fences(const struct wsi_device *wsi_device,
|
|||
|
||||
static bool
|
||||
x11_requires_mailbox_image_count(const struct wsi_device *device,
|
||||
struct wsi_x11_connection *wsi_conn,
|
||||
struct wsi_x11_icd_surface *wsi_conn,
|
||||
VkPresentModeKHR present_mode)
|
||||
{
|
||||
/* If we're resorting to wait for fences, we're assuming a MAILBOX-like model,
|
||||
|
|
@ -1773,6 +2065,26 @@ x11_set_present_mode(struct wsi_swapchain *wsi_chain,
|
|||
chain->base.present_mode = mode;
|
||||
}
|
||||
|
||||
static void
|
||||
x11_set_timing_request(struct wsi_swapchain *wsi_chain,
|
||||
const struct wsi_image_timing_request *request)
|
||||
{
|
||||
struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain;
|
||||
chain->timing_request = *request;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
x11_poll_early_refresh(struct wsi_swapchain *wsi_chain, uint64_t *interval)
|
||||
{
|
||||
struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain;
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
|
||||
|
||||
/* We don't know yet. */
|
||||
*interval = 0;
|
||||
return x11_icd_surface_update_present_timing(wsi_conn, chain->extent.width, chain->extent.height);
|
||||
}
|
||||
|
||||
/**
|
||||
* Acquire a ready-to-use image from the swapchain.
|
||||
*
|
||||
|
|
@ -1874,6 +2186,8 @@ x11_queue_present(struct wsi_swapchain *wsi_chain,
|
|||
chain->images[image_index].present_id = present_id;
|
||||
/* With KHR_swapchain_maintenance1, the present mode can change per present. */
|
||||
chain->images[image_index].present_mode = chain->base.present_mode;
|
||||
chain->images[image_index].timing_request = chain->timing_request;
|
||||
memset(&chain->timing_request, 0, sizeof(chain->timing_request));
|
||||
|
||||
wsi_queue_push(&chain->present_queue, image_index);
|
||||
return x11_swapchain_read_status_atomic(chain);
|
||||
|
|
@ -1974,6 +2288,125 @@ x11_manage_event_queue(void *state)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
x11_present_compute_target_msc(struct x11_swapchain *chain,
|
||||
const struct wsi_image_timing_request *request,
|
||||
uint64_t minimum_msc)
|
||||
{
|
||||
const struct x11_present_timing_entry *entry = &chain->present_timing_window[chain->present_timing_window_index];
|
||||
bool relative = (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_RELATIVE_TIME_BIT_EXT) != 0;
|
||||
|
||||
/* Just use the FIFO derived MSC. From spec on relative:
|
||||
* "If the swapchain has never been used to present an image, the provided targetTime is ignored." */
|
||||
if (!request->serial || !request->time || (relative && !entry->ust))
|
||||
return minimum_msc;
|
||||
|
||||
int64_t target_ns;
|
||||
|
||||
mtx_lock(&chain->base.present_timing.lock);
|
||||
|
||||
/* Present timing is only defined to work with FIFO modes, so we can rely on having
|
||||
* reliable relative timings, since we block for COMPLETE to come through before we queue up more presents. */
|
||||
if (relative) {
|
||||
/* If application is trying to drive us at refresh rate, FIFO will take care of it.
|
||||
* Don't end up in a situation where we sleep and miss the deadline by mistake. */
|
||||
if (!chain->has_reliable_msc) {
|
||||
uint64_t relative_threshold;
|
||||
if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
|
||||
relative_threshold = 3 * chain->base.present_timing.refresh_duration / 2;
|
||||
else
|
||||
relative_threshold = chain->base.present_timing.refresh_duration;
|
||||
|
||||
if (request->time <= relative_threshold) {
|
||||
mtx_unlock(&chain->base.present_timing.lock);
|
||||
return minimum_msc;
|
||||
}
|
||||
}
|
||||
target_ns = 1000 * (int64_t)entry->ust + (int64_t)request->time;
|
||||
} else {
|
||||
target_ns = (int64_t)request->time;
|
||||
}
|
||||
|
||||
/* Snap to nearest half refresh. This only makes sense for FRR, but it is the application's
|
||||
* responsibility to not use this for VRR. If this flag is not used, this is strictly a "not before". */
|
||||
if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
|
||||
target_ns -= (int64_t)chain->base.present_timing.refresh_duration / 2;
|
||||
|
||||
if (entry->msc && chain->base.present_timing.refresh_duration != 0 &&
|
||||
chain->msc_estimate_is_stable && chain->has_reliable_msc) {
|
||||
/* If we can trust MSC to be a stable FRR heartbeat, we sync to that. */
|
||||
uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0);
|
||||
uint64_t periods = (delta_time_ns + chain->base.present_timing.refresh_duration - 1) /
|
||||
chain->base.present_timing.refresh_duration;
|
||||
mtx_unlock(&chain->base.present_timing.lock);
|
||||
|
||||
/* Xwl cannot understand MSC that jumps by more than 1. It appears that if there are MSC jumps above 1,
|
||||
* each MSC cycle is padded by 16.6ms or something like that.
|
||||
* If we want to target specific time, we must sleep to achieve that until Xwl improves.
|
||||
* Fortunately, we're on a submit thread, so that is mostly an acceptable solution. */
|
||||
minimum_msc = MAX2(minimum_msc, entry->msc + periods);
|
||||
} else {
|
||||
/* If we don't have a stable estimate (e.g. true VRR, or Xwl) we just sleep until deadline.
|
||||
* This relies on timebase on os_time_nanosleep is MONOTONIC as well as UST being MONOTONIC. */
|
||||
|
||||
if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) {
|
||||
if (!chain->has_reliable_msc && chain->base.present_timing.refresh_duration) {
|
||||
uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0);
|
||||
uint64_t periods = delta_time_ns / chain->base.present_timing.refresh_duration;
|
||||
|
||||
target_ns = 1000ull * entry->ust + periods * chain->base.present_timing.refresh_duration;
|
||||
|
||||
/* Set a minimum target that is very close to the real estimate.
|
||||
* This way, we ensure that we don't regularly round estimates up in
|
||||
* chain->next_present_ust_lower_bound. */
|
||||
target_ns += 63 * chain->base.present_timing.refresh_duration / 64;
|
||||
}
|
||||
}
|
||||
|
||||
if (chain->has_reliable_msc) {
|
||||
/* Very regular sleeping can trigger a strange feedback loop where MSC estimates becomes stable enough
|
||||
* that we accept it as stable MSC. Perturb the rates enough to make it extremely unlikely
|
||||
* we accept sleeping patterns as ground truth rate, introduce a 50 us error between each timestamp,
|
||||
* which should avoid the 10 us check reliably. If sleep quantas are not as accurate, it's extremely unlikely
|
||||
* we get a stable pace anyway. TODO: Is there a more reliable way? */
|
||||
|
||||
/* On Xwl we never accept MSC estimates as ground truth, so ignore this perturbation. */
|
||||
target_ns += 50000ll * (chain->present_timing_window_index & 1) - 25000;
|
||||
target_ns = MAX2(target_ns, 0);
|
||||
}
|
||||
|
||||
/* If we're on Xwl or VRR X11 and trying to target a specific cycle by sleeping, pull back the sleep a bit.
|
||||
* We will be racing against time once we wake up to send the request to Xwl -> Wayland -> frame callback -> COMPLETE.
|
||||
* If target_ns syncs well to a refresh cycle, we speculate that COMPLETE will come through at about target_ns. */
|
||||
|
||||
/* To get proper pace on an actual VRR display, we will have to detect if we're presenting too early
|
||||
* compared to what application actually expected.
|
||||
* In that case, we need to remove this compensation if we detect that presents come in too early.
|
||||
* Effectively, we will need to adjust the report UST up if we somehow end up seeing a timestamp too early.
|
||||
* The relative refresh will feed off this adjustment in a tight loop, so this should be pretty solid
|
||||
* for both VRR and FRR. Present timing can only be used with FIFO modes, i.e. we will not overwrite this
|
||||
* until the present is actually complete. */
|
||||
chain->next_present_ust_lower_bound = target_ns / 1000;
|
||||
|
||||
/* We also need to pull back the sleep a bit to account for X.org roundtrip delays.
|
||||
* Allow up to 4ms of error here. */
|
||||
int64_t eager_present_ns = MIN2((int64_t)chain->base.present_timing.refresh_duration / 4, 4 * 1000 * 1000);
|
||||
target_ns -= eager_present_ns;
|
||||
target_ns = MAX2(target_ns, 0);
|
||||
|
||||
mtx_unlock(&chain->base.present_timing.lock);
|
||||
mtx_unlock(&chain->thread_state_lock);
|
||||
|
||||
os_time_nanosleep_until(target_ns);
|
||||
|
||||
/* Reacquiring the lock won't change any invariants for us, so this is fine.
|
||||
* We make sure to check chain->status after this function in case that got updated while we were sleeping. */
|
||||
mtx_lock(&chain->thread_state_lock);
|
||||
}
|
||||
|
||||
return minimum_msc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Presentation thread.
|
||||
*
|
||||
|
|
@ -1991,8 +2424,8 @@ static int
|
|||
x11_manage_present_queue(void *state)
|
||||
{
|
||||
struct x11_swapchain *chain = state;
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
u_thread_setname("WSI swapchain queue");
|
||||
|
|
@ -2040,6 +2473,8 @@ x11_manage_present_queue(void *state)
|
|||
u_cnd_monotonic_wait(&chain->thread_state_cond, &chain->thread_state_lock);
|
||||
}
|
||||
|
||||
target_msc = x11_present_compute_target_msc(chain, &chain->images[image_index].timing_request, target_msc);
|
||||
|
||||
if (chain->status < 0) {
|
||||
mtx_unlock(&chain->thread_state_lock);
|
||||
break;
|
||||
|
|
@ -2315,7 +2750,7 @@ wsi_x11_recompute_dri3_modifier_hash(blake3_hash *hash, const struct wsi_drm_ima
|
|||
}
|
||||
|
||||
static void
|
||||
wsi_x11_get_dri3_modifiers(struct wsi_x11_connection *wsi_conn,
|
||||
wsi_x11_get_dri3_modifiers(struct wsi_x11_icd_surface *wsi_conn,
|
||||
xcb_connection_t *conn, xcb_window_t window,
|
||||
uint8_t depth, uint8_t bpp,
|
||||
uint64_t **modifiers_in, uint32_t *num_modifiers_in,
|
||||
|
|
@ -2402,8 +2837,8 @@ wsi_x11_swapchain_query_dri3_modifiers_changed(struct x11_swapchain *chain)
|
|||
uint64_t *modifiers[2] = {NULL, NULL};
|
||||
uint32_t num_modifiers[2] = {0, 0};
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
|
||||
|
||||
xcb_get_geometry_reply_t *geometry =
|
||||
xcb_get_geometry_reply(chain->conn, xcb_get_geometry(chain->conn, chain->window), NULL);
|
||||
|
|
@ -2551,7 +2986,7 @@ static VkResult x11_wait_for_present(struct wsi_swapchain *wsi_chain,
|
|||
|
||||
static unsigned
|
||||
x11_get_min_image_count_for_present_mode(struct wsi_device *wsi_device,
|
||||
struct wsi_x11_connection *wsi_conn,
|
||||
struct wsi_x11_icd_surface *wsi_conn,
|
||||
VkPresentModeKHR present_mode)
|
||||
{
|
||||
uint32_t min_image_count = x11_get_min_image_count(wsi_device, wsi_conn->is_xwayland);
|
||||
|
|
@ -2592,8 +3027,9 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
|||
* representing it.
|
||||
*/
|
||||
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_get_connection(wsi_device, conn);
|
||||
xcb_window_t window = x11_surface_get_window(icd_surface);
|
||||
struct wsi_x11_icd_surface *wsi_conn =
|
||||
wsi_x11_get_icd_surface(wsi_device, conn, window);
|
||||
if (!wsi_conn)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
|
|
@ -2613,7 +3049,6 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
|||
}
|
||||
|
||||
/* Check that we have a window up-front. It is an error to not have one. */
|
||||
xcb_window_t window = x11_surface_get_window(icd_surface);
|
||||
|
||||
/* Get the geometry of that window. The bit depth of the swapchain will be fitted and the
|
||||
* chain's images extents should fit it for performance-optimizing flips.
|
||||
|
|
@ -2736,8 +3171,14 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
|||
chain->base.wait_for_present2 = x11_wait_for_present;
|
||||
chain->base.release_images = x11_release_images;
|
||||
chain->base.set_present_mode = x11_set_present_mode;
|
||||
chain->base.set_timing_request = x11_set_timing_request;
|
||||
chain->base.poll_early_refresh = x11_poll_early_refresh;
|
||||
chain->base.present_mode = present_mode;
|
||||
chain->base.image_count = num_images;
|
||||
|
||||
/* This is what Xserver is using. We cannot really query it, but we rely on it working. */
|
||||
chain->base.present_timing.time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
|
||||
|
||||
chain->conn = conn;
|
||||
chain->window = window;
|
||||
chain->depth = bit_depth;
|
||||
|
|
@ -2749,6 +3190,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
|||
chain->has_dri3_modifiers = wsi_conn->has_dri3_modifiers;
|
||||
chain->has_mit_shm = wsi_conn->has_mit_shm;
|
||||
chain->has_async_may_tear = present_caps & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR;
|
||||
chain->has_reliable_msc = !wsi_conn->is_xwayland;
|
||||
|
||||
/* When images in the swapchain don't fit the window, X can still present them, but it won't
|
||||
* happen by flip, only by copy. So this is a suboptimal copy, because if the client would change
|
||||
|
|
@ -2856,6 +3298,9 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
|
|||
/* It is safe to set it here as only one swapchain can be associated with
|
||||
* the window, and swapchain creation does the association. At this point
|
||||
* we know the creation is going to succeed. */
|
||||
|
||||
/* If we have present timing, we need to make sure we get a useable estimate for refresh rate
|
||||
* before we let the window run in full VRR. Once we have locked in the rate, we can enable VRR property. */
|
||||
wsi_x11_set_adaptive_sync_property(conn, window,
|
||||
wsi_device->enable_adaptive_sync);
|
||||
|
||||
|
|
@ -2889,6 +3334,18 @@ fail_alloc:
|
|||
return result;
|
||||
}
|
||||
|
||||
static uint32_t x11_hash_icd_surface(const void *key)
|
||||
{
|
||||
return _mesa_hash_data(key, sizeof(struct x11_icd_surface_key));
|
||||
}
|
||||
|
||||
static bool x11_icd_surface_equal(const void *a_, const void *b_)
|
||||
{
|
||||
const struct x11_icd_surface_key *a = a_;
|
||||
const struct x11_icd_surface_key *b = b_;
|
||||
return a->conn == b->conn && a->window == b->window;
|
||||
}
|
||||
|
||||
VkResult
|
||||
wsi_x11_init_wsi(struct wsi_device *wsi_device,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
|
|
@ -2916,8 +3373,7 @@ wsi_x11_init_wsi(struct wsi_device *wsi_device,
|
|||
goto fail_alloc;
|
||||
}
|
||||
|
||||
wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
wsi->connections = _mesa_hash_table_create(NULL, x11_hash_icd_surface, x11_icd_surface_equal);
|
||||
if (!wsi->connections) {
|
||||
result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
goto fail_mutex;
|
||||
|
|
@ -2981,7 +3437,7 @@ wsi_x11_finish_wsi(struct wsi_device *wsi_device,
|
|||
|
||||
if (wsi) {
|
||||
hash_table_foreach(wsi->connections, entry)
|
||||
wsi_x11_connection_destroy(wsi_device, entry->data);
|
||||
wsi_x11_icd_surface_destroy(wsi_device, entry->data);
|
||||
|
||||
_mesa_hash_table_destroy(wsi->connections, NULL);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue