Merge branch 'present-timing' into 'main'

vulkan/wsi: Implement EXT_present_timing.

See merge request mesa/mesa!38770
This commit is contained in:
Hans-Kristian Arntzen 2025-12-20 00:47:43 +00:00
commit 32a5663d35
21 changed files with 1845 additions and 201 deletions

View file

@ -658,6 +658,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_EXT_physical_device_drm DONE (anv, hasvk, hk, nvk, panvk, pvr, radv, tu, v3dv, vn)
VK_EXT_pipeline_library_group_handles DONE (anv, lvp, radv, vn)
VK_EXT_post_depth_coverage DONE (anv/gfx11+, lvp, nvk, radv/gfx10+, tu, vn)
VK_EXT_present_timing DONE (anv, hk, nvk, radv, tu)
VK_EXT_primitive_topology_list_restart DONE (anv, hasvk, lvp, nvk, panvk, radv, tu, v3dv, vn, nvk)
VK_EXT_primitives_generated_query DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
VK_EXT_provoking_vertex DONE (anv, hasvk, hk, lvp, nvk, panvk, pvr, radv, tu, v3dv, vn)

View file

@ -20,3 +20,4 @@ VK_KHR_surface_maintenance1 promotion everywhere EXT is exposed
VK_KHR_swapchain_maintenance1 promotion everywhere EXT is exposed
VK_KHR_dynamic_rendering on PowerVR
VK_EXT_multisampled_render_to_single_sampled on panvk
VK_EXT_present_timing on RADV, NVK, Turnip, ANV, Honeykrisp

View file

@ -791,6 +791,10 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
.EXT_pipeline_library_group_handles = radv_enable_rt(pdev),
.EXT_pipeline_robustness = !pdev->use_llvm,
.EXT_post_depth_coverage = pdev->info.gfx_level >= GFX10,
#ifdef RADV_USE_WSI_PLATFORM
/* KHR_calibrated_timestamps is a requirement to expose EXT_present_timing. */
.EXT_present_timing = radv_calibrated_timestamps_enabled(pdev),
#endif
.EXT_primitive_topology_list_restart = true,
.EXT_primitives_generated_query = true,
.EXT_private_data = true,
@ -1481,6 +1485,14 @@ radv_physical_device_get_features(const struct radv_physical_device *pdev, struc
/* VK_EXT_custom_resolve */
.customResolve = true,
#ifdef RADV_USE_WSI_PLATFORM
/* VK_EXT_present_timing */
/* The actual query is deferred to surface time. */
.presentTiming = true,
.presentAtAbsoluteTime = true,
.presentAtRelativeTime = true,
#endif
};
}

View file

@ -181,6 +181,9 @@ hk_get_device_extensions(const struct hk_instance *instance,
.EXT_pipeline_protected_access = true,
.EXT_pipeline_robustness = true,
.EXT_physical_device_drm = true,
#ifdef HK_USE_WSI_PLATFORM
.EXT_present_timing = true,
#endif
.EXT_primitive_topology_list_restart = true,
.EXT_private_data = true,
.EXT_primitives_generated_query = false,
@ -623,6 +626,13 @@ hk_get_device_features(
/* VK_KHR_shader_relaxed_extended_instruction */
.shaderRelaxedExtendedInstruction = true,
#ifdef HK_USE_WSI_PLATFORM
/* VK_EXT_present_timing */
.presentTiming = true,
.presentAtRelativeTime = true,
.presentAtAbsoluteTime = true,
#endif
};
}

View file

@ -792,17 +792,6 @@ dri2_fourcc_for_depth(struct dri2_egl_display *dri2_dpy, uint32_t depth)
}
}
static int
box_intersection_area(int16_t a_x, int16_t a_y, int16_t a_width,
int16_t a_height, int16_t b_x, int16_t b_y,
int16_t b_width, int16_t b_height)
{
int w = MIN2(a_x + a_width, b_x + b_width) - MAX2(a_x, b_x);
int h = MIN2(a_y + a_height, b_y + b_height) - MAX2(a_y, b_y);
return (w < 0 || h < 0) ? 0 : w * h;
}
EGLBoolean
dri2_x11_get_msc_rate(_EGLDisplay *display, _EGLSurface *surface,
EGLint *numerator, EGLint *denominator)

View file

@ -314,6 +314,9 @@ get_device_extensions(const struct tu_physical_device *device,
.EXT_physical_device_drm = !is_kgsl(device->instance),
.EXT_pipeline_creation_cache_control = true,
.EXT_pipeline_creation_feedback = true,
#ifdef TU_USE_WSI_PLATFORM
.EXT_present_timing = device->info->props.has_persistent_counter,
#endif
.EXT_primitive_topology_list_restart = true,
.EXT_primitives_generated_query = true,
.EXT_private_data = true,
@ -825,6 +828,13 @@ tu_get_features(struct tu_physical_device *pdevice,
/* VK_EXT_custom_resolve */
features->customResolve = true;
#ifdef TU_USE_WSI_PLATFORM
/* VK_EXT_present_timing */
features->presentTiming = true;
features->presentAtRelativeTime = true;
features->presentAtAbsoluteTime = true;
#endif
}
static void

View file

@ -354,6 +354,9 @@ get_device_extensions(const struct anv_physical_device *device,
.EXT_pipeline_protected_access = device->has_protected_contexts,
.EXT_pipeline_robustness = true,
.EXT_post_depth_coverage = true,
#ifdef ANV_USE_WSI_PLATFORM
.EXT_present_timing = device->has_reg_timestamp,
#endif
.EXT_primitive_topology_list_restart = true,
.EXT_primitives_generated_query = true,
.EXT_private_data = true,
@ -1005,6 +1008,13 @@ get_features(const struct anv_physical_device *pdevice,
/* VK_KHR_pipeline_binary */
.pipelineBinaries = true,
#ifdef ANV_USE_WSI_PLATFORM
/* VK_EXT_present_timing */
.presentTiming = true,
.presentAtRelativeTime = true,
.presentAtAbsoluteTime = true,
#endif
};
/* The new DOOM and Wolfenstein games require depthBounds without

View file

@ -29,36 +29,7 @@
#include <c11/threads.h>
#include "util/format/u_formats.h"
#ifdef HAVE_X11_PLATFORM
#include <xcb/xcb.h>
#include <xcb/dri3.h>
#include <xcb/present.h>
struct loader_crtc_info {
xcb_randr_crtc_t id;
xcb_timestamp_t timestamp;
int16_t x, y;
uint16_t width, height;
unsigned refresh_numerator;
unsigned refresh_denominator;
};
struct loader_screen_resources {
mtx_t mtx;
xcb_connection_t *conn;
xcb_screen_t *screen;
xcb_timestamp_t config_timestamp;
/* Number of CRTCs with an active mode set */
unsigned num_crtcs;
struct loader_crtc_info *crtcs;
};
#endif
#include "loader_dri_helper_screen.h"
/**
* These formats are endian independent they result in the same layout
@ -110,16 +81,4 @@ loader_pipe_format_to_fourcc(enum pipe_format pipe);
enum pipe_format
loader_fourcc_to_pipe_format(uint32_t fourcc);
#ifdef HAVE_X11_PLATFORM
void
loader_init_screen_resources(struct loader_screen_resources *res,
xcb_connection_t *conn,
xcb_screen_t *screen);
bool
loader_update_screen_resources(struct loader_screen_resources *res);
void
loader_destroy_screen_resources(struct loader_screen_resources *res);
#endif
#endif /* LOADER_DRI_HELPER_H */

View file

@ -0,0 +1,76 @@
/*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that copyright
* notice and this permission notice appear in supporting documentation, and
* that the name of the copyright holders not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. The copyright holders make no representations
* about the suitability of this software for any purpose. It is provided "as
* is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
* EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*/
#ifndef LOADER_DRI_HELPER_SCREEN_H
#define LOADER_DRI_HELPER_SCREEN_H
#ifdef HAVE_X11_PLATFORM
#include <xcb/xcb.h>
#include <xcb/dri3.h>
#include <xcb/present.h>
struct loader_crtc_info {
xcb_randr_crtc_t id;
xcb_timestamp_t timestamp;
int16_t x, y;
uint16_t width, height;
unsigned refresh_numerator;
unsigned refresh_denominator;
};
struct loader_screen_resources {
mtx_t mtx;
xcb_connection_t *conn;
xcb_screen_t *screen;
xcb_timestamp_t config_timestamp;
/* Number of CRTCs with an active mode set */
unsigned num_crtcs;
struct loader_crtc_info *crtcs;
};
void
loader_init_screen_resources(struct loader_screen_resources *res,
xcb_connection_t *conn,
xcb_screen_t *screen);
bool
loader_update_screen_resources(struct loader_screen_resources *res);
void
loader_destroy_screen_resources(struct loader_screen_resources *res);
#endif
static inline int
box_intersection_area(int16_t a_x, int16_t a_y, int16_t a_width,
int16_t a_height, int16_t b_x, int16_t b_y,
int16_t b_width, int16_t b_height)
{
int w = MIN2(a_x + a_width, b_x + b_width) - MAX2(a_x, b_x);
int h = MIN2(a_y + a_height, b_y + b_height) - MAX2(a_y, b_y);
return (w < 0 || h < 0) ? 0 : w * h;
}
#endif

View file

@ -49,7 +49,7 @@ endif
if with_platform_x11
subdir('x11')
endif
if with_gallium_or_lvp or with_gbm or with_platform_wayland
if with_gallium_or_lvp or with_gbm or with_platform_wayland or with_platform_x11 or with_platform_xcb
subdir('loader')
endif
subdir('compiler')

View file

@ -262,6 +262,9 @@ nvk_get_device_extensions(const struct nvk_instance *instance,
.EXT_pipeline_robustness = true,
.EXT_physical_device_drm = true,
.EXT_post_depth_coverage = info->cls_eng3d >= MAXWELL_B,
#ifdef NVK_USE_WSI_PLATFORM
.EXT_present_timing = true,
#endif
.EXT_primitive_topology_list_restart = true,
.EXT_private_data = true,
.EXT_primitives_generated_query = true,
@ -753,6 +756,11 @@ nvk_get_device_features(const struct nv_device_info *info,
/* VK_KHR_present_wait2 */
.presentWait2 = true,
/* VK_EXT_present_timing */
.presentTiming = true,
.presentAtRelativeTime = true,
.presentAtAbsoluteTime = true,
#endif
};
}

View file

@ -26,6 +26,10 @@ if with_platform_wayland
files_vulkan_wsi += wp_files['color-management-v1']
endif
if with_platform_x11 or with_platform_xcb
links_vulkan_wsi += libloader
endif
if with_platform_windows
files_vulkan_wsi += files('wsi_common_win32.cpp')
platform_deps += dep_dxheaders

View file

@ -95,6 +95,7 @@ wsi_device_init(struct wsi_device *wsi,
WSI_GET_CB(GetPhysicalDeviceProperties2);
WSI_GET_CB(GetPhysicalDeviceMemoryProperties);
WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
WSI_GET_CB(GetPhysicalDeviceProperties);
#undef WSI_GET_CB
wsi->drm_info.sType =
@ -121,10 +122,18 @@ wsi_device_init(struct wsi_device *wsi,
VkQueueFamilyProperties queue_properties[64];
GetPhysicalDeviceQueueFamilyProperties(pdevice, &wsi->queue_family_count, queue_properties);
VkPhysicalDeviceProperties properties;
GetPhysicalDeviceProperties(pdevice, &properties);
wsi->timestamp_period = properties.limits.timestampPeriod;
for (unsigned i = 0; i < wsi->queue_family_count; i++) {
VkFlags req_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
if (queue_properties[i].queueFlags & req_flags)
wsi->queue_supports_blit |= BITFIELD64_BIT(i);
/* Don't want to consider timestamp wrapping logic. */
if (queue_properties[i].timestampValidBits == 64)
wsi->queue_supports_timestamps |= BITFIELD64_BIT(i);
}
for (VkExternalSemaphoreHandleTypeFlags handle_type = 1;
@ -180,15 +189,19 @@ wsi_device_init(struct wsi_device *wsi,
WSI_GET_CB(CmdPipelineBarrier);
WSI_GET_CB(CmdCopyImage);
WSI_GET_CB(CmdCopyImageToBuffer);
WSI_GET_CB(CmdResetQueryPool);
WSI_GET_CB(CmdWriteTimestamp);
WSI_GET_CB(CreateBuffer);
WSI_GET_CB(CreateCommandPool);
WSI_GET_CB(CreateFence);
WSI_GET_CB(CreateImage);
WSI_GET_CB(CreateQueryPool);
WSI_GET_CB(CreateSemaphore);
WSI_GET_CB(DestroyBuffer);
WSI_GET_CB(DestroyCommandPool);
WSI_GET_CB(DestroyFence);
WSI_GET_CB(DestroyImage);
WSI_GET_CB(DestroyQueryPool);
WSI_GET_CB(DestroySemaphore);
WSI_GET_CB(EndCommandBuffer);
WSI_GET_CB(FreeMemory);
@ -200,9 +213,14 @@ wsi_device_init(struct wsi_device *wsi,
WSI_GET_CB(GetImageSubresourceLayout);
if (!wsi->sw)
WSI_GET_CB(GetMemoryFdKHR);
WSI_GET_CB(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
WSI_GET_CB(GetPhysicalDeviceProperties);
WSI_GET_CB(GetPhysicalDeviceFormatProperties);
WSI_GET_CB(GetPhysicalDeviceFormatProperties2);
WSI_GET_CB(GetPhysicalDeviceImageFormatProperties2);
WSI_GET_CB(GetPhysicalDeviceQueueFamilyProperties);
WSI_GET_CB(GetCalibratedTimestampsKHR);
WSI_GET_CB(GetQueryPoolResults);
WSI_GET_CB(GetSemaphoreFdKHR);
WSI_GET_CB(ResetFences);
WSI_GET_CB(QueueSubmit2);
@ -481,8 +499,10 @@ wsi_swapchain_init(const struct wsi_device *wsi,
chain->blit.type = get_blit_type(wsi, image_params, _device);
chain->blit.queue = NULL;
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT) {
if (wsi->get_blit_queue) {
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT ||
(pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT)) {
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT && wsi->get_blit_queue) {
chain->blit.queue = wsi->get_blit_queue(_device);
}
@ -503,10 +523,18 @@ wsi_swapchain_init(const struct wsi_device *wsi,
if (chain->blit.queue != NULL) {
queue_family_index = chain->blit.queue->queue_family_index;
} else {
uint64_t effective_queues = wsi->queue_supports_blit;
if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT)
effective_queues &= wsi->queue_supports_timestamps;
/* Fallback. If this happens we don't advertise support for queue complete times. */
if (!effective_queues)
effective_queues = wsi->queue_supports_blit;
/* Queues returned by get_blit_queue() might not be listed in
* GetPhysicalDeviceQueueFamilyProperties, so this check is skipped for those queues.
*/
if (!(wsi->queue_supports_blit & BITFIELD64_BIT(queue_family_index)))
if (!(effective_queues & BITFIELD64_BIT(queue_family_index)))
continue;
}
@ -616,7 +644,7 @@ wsi_swapchain_finish(struct wsi_swapchain *chain)
chain->wsi->DestroySemaphore(chain->device, chain->present_id_timeline,
&chain->alloc);
if (chain->blit.type != WSI_SWAPCHAIN_NO_BLIT) {
if (chain->cmd_pools) {
int cmd_pools_count = chain->blit.queue != NULL ?
1 : chain->wsi->queue_family_count;
for (uint32_t i = 0; i < cmd_pools_count; i++) {
@ -628,6 +656,12 @@ wsi_swapchain_finish(struct wsi_swapchain *chain)
vk_free(&chain->alloc, chain->cmd_pools);
}
if (chain->present_timing.active) {
mtx_destroy(&chain->present_timing.lock);
if (chain->present_timing.timings)
vk_free(&chain->alloc, chain->present_timing.timings);
}
vk_object_base_finish(&chain->base);
}
@ -815,6 +849,88 @@ fail:
return result;
}
/**
* Creates the timestamp-query command buffers for the end of rendering, that
* will be used to report QUEUE_COMPLETE timestamp for EXT_present_timing.
*
* Unless the swapchain is blitting, we don't know what queue family a Present
* will happen on. So we make a timestamp command buffer for each so they're
* ready to go at present time.
*/
VkResult
wsi_image_init_timestamp(const struct wsi_swapchain *chain,
struct wsi_image *image)
{
const struct wsi_device *wsi = chain->wsi;
VkResult result;
/* Set up command buffer to get timestamp info */
result = wsi->CreateQueryPool(
chain->device,
&(const VkQueryPoolCreateInfo){
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.queryType = VK_QUERY_TYPE_TIMESTAMP,
.queryCount = 1,
},
NULL,
&image->query_pool);
if (result != VK_SUCCESS)
goto fail;
uint32_t family_count = chain->blit.queue ? 1 : wsi->queue_family_count;
if (!image->timestamp_cmd_buffers) {
image->timestamp_cmd_buffers =
vk_zalloc(&chain->alloc, sizeof(VkCommandBuffer) * family_count, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!image->timestamp_cmd_buffers)
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for (uint32_t i = 0; i < family_count; i++) {
/* We can only use timestamps on a queue that reports timestamp bits != 0.
* Since we don't consider device timestamp wrapping in this implementation (unclear how that would ever work),
* only report queue done where timestamp bits == 64. */
if (!chain->cmd_pools[i])
continue;
result = wsi->AllocateCommandBuffers(
chain->device,
&(const VkCommandBufferAllocateInfo){
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.pNext = NULL,
.commandPool = chain->cmd_pools[i],
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = 1,
}, &image->timestamp_cmd_buffers[i]);
if (result != VK_SUCCESS)
goto fail;
wsi->BeginCommandBuffer(
image->timestamp_cmd_buffers[i],
&(VkCommandBufferBeginInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
});
wsi->CmdResetQueryPool(image->timestamp_cmd_buffers[i],
image->query_pool,
0, 1);
wsi->CmdWriteTimestamp(image->timestamp_cmd_buffers[i],
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
image->query_pool,
0);
wsi->EndCommandBuffer(image->timestamp_cmd_buffers[i]);
}
return VK_SUCCESS;
fail:
return result;
}
void
wsi_destroy_image(const struct wsi_swapchain *chain,
struct wsi_image *image)
@ -850,6 +966,19 @@ wsi_destroy_image(const struct wsi_swapchain *chain,
vk_free(&chain->alloc, image->blit.cmd_buffers);
}
wsi->DestroyQueryPool(chain->device, image->query_pool, NULL);
if (image->timestamp_cmd_buffers) {
uint32_t family_count = chain->blit.queue ? 1 : wsi->queue_family_count;
for (uint32_t i = 0; i < family_count; i++) {
if (image->timestamp_cmd_buffers[i]) {
wsi->FreeCommandBuffers(chain->device, chain->cmd_pools[i],
1, &image->timestamp_cmd_buffers[i]);
}
}
vk_free(&chain->alloc, image->timestamp_cmd_buffers);
}
wsi->FreeMemory(chain->device, image->memory, &chain->alloc);
wsi->DestroyImage(chain->device, image->image, &chain->alloc);
wsi->DestroyImage(chain->device, image->blit.image, &chain->alloc);
@ -912,8 +1041,43 @@ wsi_GetPhysicalDeviceSurfaceCapabilities2KHR(
struct wsi_device *wsi_device = device->wsi_device;
struct wsi_interface *iface = wsi_device->wsi[surface->platform];
return iface->get_capabilities2(surface, wsi_device, pSurfaceInfo->pNext,
VkResult vr = iface->get_capabilities2(surface, wsi_device, pSurfaceInfo->pNext,
pSurfaceCapabilities);
if (vr != VK_SUCCESS)
return vr;
struct VkPresentTimingSurfaceCapabilitiesEXT *present_timing =
vk_find_struct(pSurfaceCapabilities, PRESENT_TIMING_SURFACE_CAPABILITIES_EXT);
if (present_timing && present_timing->presentTimingSupported) {
if (wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps) {
/* Make sure the implementation is capable of calibrating timestamps. */
if (wsi_device->GetPhysicalDeviceCalibrateableTimeDomainsKHR && wsi_device->GetCalibratedTimestampsKHR) {
VkTimeDomainKHR domains[64];
uint32_t count = ARRAY_SIZE(domains);
wsi_device->GetPhysicalDeviceCalibrateableTimeDomainsKHR(wsi_device->pdevice, &count, domains);
bool supports_device = false, supports_monotonic = false, supports_monotonic_raw = false;
for (uint32_t i = 0; i < count; i++) {
if (domains[i] == VK_TIME_DOMAIN_DEVICE_KHR)
supports_device = true;
else if (domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR)
supports_monotonic = true;
else if (domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR)
supports_monotonic_raw = true;
}
/* Current present timing implementations do not use anything outside these.
* QPC might be relevant for Dozen at some point, but for now, we only consider Linux-centric
* platforms for present timing. */
if (supports_device && supports_monotonic && supports_monotonic_raw)
present_timing->presentStageQueries |= VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
}
}
}
return vr;
}
VKAPI_ATTR VkResult VKAPI_CALL
@ -1112,6 +1276,32 @@ wsi_CreateSwapchainKHR(VkDevice _device,
*pSwapchain = wsi_swapchain_to_handle(swapchain);
if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) {
swapchain->present_timing.active = true;
mtx_init(&swapchain->present_timing.lock, 0);
for (uint32_t i = 0; i < swapchain->image_count; i++) {
struct wsi_image *image = swapchain->get_wsi_image(swapchain, i);
result = wsi_image_init_timestamp(swapchain, image);
if (result != VK_SUCCESS) {
swapchain->destroy(swapchain, alloc);
return result;
}
}
if (swapchain->poll_early_refresh) {
/* If we can query the display directly, we should report something reasonable on first query
* before we even present the first time. */
uint64_t interval;
uint64_t refresh_ns = swapchain->poll_early_refresh(swapchain, &interval);
if (refresh_ns) {
swapchain->present_timing.refresh_duration = refresh_ns;
swapchain->present_timing.refresh_interval = interval;
swapchain->present_timing.refresh_counter++;
}
}
}
return VK_SUCCESS;
}
@ -1168,6 +1358,353 @@ wsi_ReleaseSwapchainImagesKHR(VkDevice _device,
return VK_SUCCESS;
}
static void
wsi_swapchain_present_timing_sample_query_pool(struct wsi_swapchain *chain,
struct wsi_presentation_timing *timing,
struct wsi_image *image,
uint64_t upper_bound)
{
if (!(timing->requested_feedback & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT))
return;
/* The GPU really should be done by now, and we should be able to read the timestamp,
* but it's possible that the present was discarded and we have a 0 timestamp here for the present.
* In this case, we should not block to wait on the queue dispatch timestamp. */
uint64_t queue_ts;
if (chain->wsi->GetQueryPoolResults(chain->device, image->query_pool, 0, 1, sizeof(uint64_t),
&queue_ts, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT) != VK_SUCCESS)
return;
/* There are two ways to deal with DEVICE timestamp domain.
* Either we can report PRESENT_STAGE_LOCAL domain and let application
* calibrate the timestamps on its own. However, this creates an annoying situation
* where application is able to QueuePresentKHR requesting we use QUEUE_OPERATIONS_END time domain as
* the reference (targetTimeDomainPresentStage).
* In that case, we are forced to re-calibrate the timestamp anyway.
* We will also need to implement per-driver plumbing to forward SWAPCHAIN_LOCAL and PRESENT_STAGE_LOCAL
* time domains to the swapchain and query the underlying time domain.
* Instead of dealing with this mess, just recalibrate the timestamp. The accuracy of queue_operations_end
* is not particularly important. */
/* We have already made sure that the implementation supports these. */
const VkCalibratedTimestampInfoKHR infos[2] = {
{
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
.timeDomain = VK_TIME_DOMAIN_DEVICE_KHR,
},
{
.sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
.timeDomain = chain->present_timing.time_domain,
},
};
uint64_t timestamps[2];
uint64_t max_deviation;
if (chain->wsi->GetCalibratedTimestampsKHR(chain->device, 2, infos, timestamps, &max_deviation) == VK_SUCCESS) {
int64_t device_delta_ticks = (int64_t)queue_ts - (int64_t)timestamps[0];
int64_t device_delta_ns = (int64_t)((double)chain->wsi->timestamp_period * (double)device_delta_ticks);
uint64_t queue_timestamp = timestamps[1] + device_delta_ns;
/* Make sure we don't report GPU completing after we flip the request.
* Avoids any weird precision issues creeping through. */
if (upper_bound)
queue_timestamp = MIN2(queue_timestamp, upper_bound);
timing->queue_done_time = queue_timestamp;
}
}
static void
wsi_swapchain_present_timing_notify_recycle_locked(struct wsi_swapchain *chain,
struct wsi_image *image)
{
assert(chain->present_timing.active);
for (size_t i = 0; i < chain->present_timing.timings_count; i++) {
if (chain->present_timing.timings[i].image == image) {
/* A different present takes ownership of the image's query pool index now. */
chain->present_timing.timings[i].image = NULL;
chain->present_timing.timings[i].queue_done_time = 0;
/* We waited on progress fence, so the timestamp query is guaranteed to be done. */
wsi_swapchain_present_timing_sample_query_pool(chain, &chain->present_timing.timings[i], image, 0);
break;
}
}
}
static VkResult wsi_common_allocate_timing_request(
struct wsi_swapchain *swapchain, const VkPresentTimingInfoEXT *timing,
uint64_t present_id, struct wsi_image *image)
{
VkResult vr = VK_SUCCESS;
mtx_lock(&swapchain->present_timing.lock);
if (swapchain->present_timing.timings_count >= swapchain->present_timing.timings_capacity) {
vr = VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT;
goto err;
}
wsi_swapchain_present_timing_notify_recycle_locked(swapchain, image);
struct wsi_presentation_timing *wsi_timing =
&swapchain->present_timing.timings[swapchain->present_timing.timings_count++];
memset(wsi_timing, 0, sizeof(*wsi_timing));
wsi_timing->serial = ++swapchain->present_timing.serial;
wsi_timing->target_time = timing->targetTime;
wsi_timing->present_id = present_id;
wsi_timing->requested_feedback = timing->presentStageQueries;
wsi_timing->image = image;
/* Ignore the time domain since we have a static domain. */
err:
mtx_unlock(&swapchain->present_timing.lock);
return vr;
}
void
wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain,
uint64_t timing_serial,
uint64_t timestamp,
struct wsi_image *image)
{
assert(chain->present_timing.active);
mtx_lock(&chain->present_timing.lock);
for (size_t i = 0; i < chain->present_timing.timings_count; i++) {
if (chain->present_timing.timings[i].serial == timing_serial) {
chain->present_timing.timings[i].complete_time = timestamp;
chain->present_timing.timings[i].complete = VK_TRUE;
/* It's possible that QueuePresentKHR already handled the queue done timestamp for us,
* since the image was recycled before presentation could fully complete.
* In this case, we no longer own the timestamp query pool index, so just skip. */
if (chain->present_timing.timings[i].image != image)
break;
/* 0 means unknown. Application can probably fall back to its own timestamps if it wants to. */
chain->present_timing.timings[i].queue_done_time = 0;
wsi_swapchain_present_timing_sample_query_pool(chain, &chain->present_timing.timings[i], image, timestamp);
chain->present_timing.timings[i].image = NULL;
break;
}
}
mtx_unlock(&chain->present_timing.lock);
}
void
wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain,
uint64_t refresh_duration,
uint64_t refresh_interval,
int minimum_delta_for_update)
{
mtx_lock(&chain->present_timing.lock);
int64_t duration_delta = llabs((int64_t)refresh_duration - (int64_t)chain->present_timing.refresh_duration);
int64_t interval_delta = llabs((int64_t)refresh_interval - (int64_t)chain->present_timing.refresh_interval);
/* When the refresh rate is an estimate, the value may fluctuate slightly frame to frame,
* don't spam refresh counter updates unless there is a meaningful delta.
* Applications that use absolute timings are expected to recalibrate based on feedback. */
if (duration_delta > minimum_delta_for_update || interval_delta > minimum_delta_for_update ||
chain->present_timing.refresh_counter == 0) {
/* We'll report this updated refresh counter in feedback,
* so that application knows to requery the refresh rate. */
chain->present_timing.refresh_counter++;
chain->present_timing.refresh_duration = refresh_duration;
chain->present_timing.refresh_interval = refresh_interval;
}
mtx_unlock(&chain->present_timing.lock);
}
VKAPI_ATTR VkResult VKAPI_CALL
wsi_GetPastPresentationTimingEXT(
VkDevice device,
const VkPastPresentationTimingInfoEXT* pPastPresentationTimingInfo,
VkPastPresentationTimingPropertiesEXT* pPastPresentationTimingProperties)
{
VK_FROM_HANDLE(wsi_swapchain, swapchain, pPastPresentationTimingInfo->swapchain);
VkResult vr = VK_SUCCESS;
bool out_of_order = (pPastPresentationTimingInfo->flags &
VK_PAST_PRESENTATION_TIMING_ALLOW_OUT_OF_ORDER_RESULTS_BIT_EXT) != 0;
if (swapchain->poll_timing_request)
swapchain->poll_timing_request(swapchain);
mtx_lock(&swapchain->present_timing.lock);
pPastPresentationTimingProperties->timingPropertiesCounter = swapchain->present_timing.refresh_counter;
pPastPresentationTimingProperties->timeDomainsCounter = 1;
/* This implementation always returns results in-order, so can ignore the out-of-order flag.
* TODO: Honor the partial results flag. */
uint32_t done_count = 0;
for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) {
/* If different presents request different kinds of state, we may get completion out of order.
* If flag is not set, we cannot report frame N until we have completed all frames M < N. */
if (swapchain->present_timing.timings[i].complete)
done_count++;
else if (!out_of_order)
break;
}
/* We don't remove timing info from queue until it is consumed. */
if (!pPastPresentationTimingProperties->pPresentationTimings) {
pPastPresentationTimingProperties->presentationTimingCount = done_count;
mtx_unlock(&swapchain->present_timing.lock);
return VK_SUCCESS;
}
VK_OUTARRAY_MAKE_TYPED(VkPastPresentationTimingEXT, timings,
pPastPresentationTimingProperties->pPresentationTimings,
&pPastPresentationTimingProperties->presentationTimingCount);
uint32_t new_timings_count = 0;
bool stop_timing_removal = false;
for (uint32_t i = 0; i < swapchain->present_timing.timings_count; i++) {
const struct wsi_presentation_timing *in_timing = &swapchain->present_timing.timings[i];
if (!swapchain->present_timing.timings[i].complete || stop_timing_removal) {
/* Keep output ordered to be compliant without having to re-sort every time.
* Queue depth for timestamps is expected to be small. */
swapchain->present_timing.timings[new_timings_count++] = swapchain->present_timing.timings[i];
if (!out_of_order)
stop_timing_removal = true;
continue;
}
vk_outarray_append_typed(VkPastPresentationTimingEXT, &timings, timing) {
timing->targetTime = swapchain->present_timing.timings[i].target_time;
timing->presentId = in_timing->present_id;
timing->timeDomain = swapchain->present_timing.time_domain;
timing->timeDomainId = 0;
timing->reportComplete = in_timing->complete;
/* No INCOMPLETE is reported here. Failures are silent.
* However, application already knows upper bound for stage count based on the query,
* so this should never fail. */
VK_OUTARRAY_MAKE_TYPED(VkPresentStageTimeEXT, stages, timing->pPresentStages, &timing->presentStageCount);
if (in_timing->requested_feedback & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) {
stage->stage = VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
stage->time = in_timing->queue_done_time;
}
}
if (in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
vk_outarray_append_typed(VkPresentStageTimeEXT, &stages, stage) {
stage->stage = in_timing->requested_feedback & ~VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
/* It is expected that implementation will only expose one timing value. */
assert(util_bitcount(stage->stage) == 1);
stage->time = in_timing->complete_time;
}
}
}
}
swapchain->present_timing.timings_count = new_timings_count;
vr = vk_outarray_status(&timings);
/* This function is fully atomic within implementation, so have to be thread safe. */
mtx_unlock(&swapchain->present_timing.lock);
return vr;
}
VKAPI_ATTR VkResult VKAPI_CALL
wsi_GetSwapchainTimeDomainPropertiesEXT(
VkDevice device,
VkSwapchainKHR swapchain_,
VkSwapchainTimeDomainPropertiesEXT* pSwapchainTimeDomainProperties,
uint64_t* pTimeDomainsCounter)
{
VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
/* We don't change time domains. Everything is static. */
if (pTimeDomainsCounter)
*pTimeDomainsCounter = 1;
/* This style is a bit goofy and doesn't map cleanly to anything. */
if (!pSwapchainTimeDomainProperties->pTimeDomainIds && !pSwapchainTimeDomainProperties->pTimeDomains) {
pSwapchainTimeDomainProperties->timeDomainCount = 1;
return VK_SUCCESS;
} else if (pSwapchainTimeDomainProperties->timeDomainCount == 0) {
return VK_INCOMPLETE;
}
pSwapchainTimeDomainProperties->timeDomainCount = 1;
if (pSwapchainTimeDomainProperties->pTimeDomains)
*pSwapchainTimeDomainProperties->pTimeDomains = swapchain->present_timing.time_domain;
if (pSwapchainTimeDomainProperties->pTimeDomainIds)
*pSwapchainTimeDomainProperties->pTimeDomainIds = 0;
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
wsi_GetSwapchainTimingPropertiesEXT(
VkDevice device,
VkSwapchainKHR swapchain_,
VkSwapchainTimingPropertiesEXT* pSwapchainTimingProperties,
uint64_t* pSwapchainTimingPropertiesCounter)
{
VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
VkResult vr;
mtx_lock(&swapchain->present_timing.lock);
/* If we don't have data yet, must return VK_NOT_READY. */
vr = swapchain->present_timing.refresh_counter ? VK_SUCCESS : VK_NOT_READY;
pSwapchainTimingProperties->refreshInterval = swapchain->present_timing.refresh_interval;
pSwapchainTimingProperties->refreshDuration = swapchain->present_timing.refresh_duration;
if (pSwapchainTimingPropertiesCounter)
*pSwapchainTimingPropertiesCounter = swapchain->present_timing.refresh_counter;
mtx_unlock(&swapchain->present_timing.lock);
return vr;
}
VKAPI_ATTR VkResult VKAPI_CALL
wsi_SetSwapchainPresentTimingQueueSizeEXT(
VkDevice device,
VkSwapchainKHR swapchain_,
uint32_t size)
{
VK_FROM_HANDLE(wsi_swapchain, swapchain, swapchain_);
assert(swapchain->present_timing.active);
VkResult vr = VK_SUCCESS;
mtx_lock(&swapchain->present_timing.lock);
if (size < swapchain->present_timing.timings_count) {
vr = VK_NOT_READY;
goto error;
}
if (size > swapchain->present_timing.timings_capacity) {
void *new_ptr = vk_realloc(&swapchain->alloc, swapchain->present_timing.timings,
sizeof(*swapchain->present_timing.timings) * size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (new_ptr) {
swapchain->present_timing.timings = new_ptr;
swapchain->present_timing.timings_capacity = size;
} else {
vr = VK_ERROR_OUT_OF_HOST_MEMORY;
goto error;
}
} else {
swapchain->present_timing.timings_capacity = size;
}
error:
mtx_unlock(&swapchain->present_timing.lock);
return vr;
}
VkDeviceMemory
wsi_common_get_memory(VkSwapchainKHR _swapchain, uint32_t index)
{
@ -1521,6 +2058,50 @@ wsi_common_queue_present(const struct wsi_device *wsi,
vk_find_struct_const(pPresentInfo->pNext, PRESENT_ID_2_KHR);
const VkSwapchainPresentFenceInfoKHR *present_fence_info =
vk_find_struct_const(pPresentInfo->pNext, SWAPCHAIN_PRESENT_FENCE_INFO_KHR);
const VkPresentTimingsInfoEXT *present_timings_info =
vk_find_struct_const(pPresentInfo->pNext, PRESENT_TIMINGS_INFO_EXT);
bool needs_timing_command_buffer = false;
if (present_timings_info) {
/* If we fail a present due to full queue, it's a little unclear from
* spec if we should treat it as OUT_OF_DATE or OUT_OF_HOST_MEMORY for
* purposes of signaling. Validation layers and at least one other implementation
* in the wild seems to treat it as OUT_OF_DATE, so do that. */
for (uint32_t i = 0; i < present_timings_info->swapchainCount; i++) {
const VkPresentTimingInfoEXT *info = &present_timings_info->pTimingInfos[i];
VK_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
if (results[i] != VK_SUCCESS || !swapchain->set_timing_request)
continue;
assert(swapchain->present_timing.active);
uint32_t image_index = pPresentInfo->pImageIndices[i];
/* EXT_present_timing is defined to only work with present_id2.
* It's only used when reporting back timings. */
results[i] = wsi_common_allocate_timing_request(
swapchain, info, present_ids2 ? present_ids2->pPresentIds[i] : 0,
swapchain->get_wsi_image(swapchain, image_index));
/* Application is responsible for allocating sufficient size here.
* We fail with VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT if application is bugged. */
if (results[i] == VK_SUCCESS) {
swapchain->set_timing_request(swapchain, &(struct wsi_image_timing_request) {
.serial = swapchain->present_timing.serial,
.time = info->targetTime,
.flags = info->flags,
});
if (info->presentStageQueries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) {
/* It's not a problem if we redundantly submit timing command buffers.
* VUID-12234 also says all swapchains in this present must have been
* created with present timing enabled. */
needs_timing_command_buffer = true;
}
}
}
}
/* Gather up all the semaphores and fences we need to signal per-image */
STACK_ARRAY(struct wsi_image_signal_info, image_signal_infos,
@ -1596,15 +2177,15 @@ wsi_common_queue_present(const struct wsi_device *wsi,
* the per-image semaphores and fences with the blit.
*/
{
STACK_ARRAY(VkCommandBufferSubmitInfo, blit_command_buffer_infos,
pPresentInfo->swapchainCount);
STACK_ARRAY(VkCommandBufferSubmitInfo, command_buffer_infos,
pPresentInfo->swapchainCount * 2);
STACK_ARRAY(VkSemaphoreSubmitInfo, signal_semaphore_infos,
pPresentInfo->swapchainCount *
ARRAY_SIZE(image_signal_infos[0].semaphore_infos));
STACK_ARRAY(VkFence, fences,
pPresentInfo->swapchainCount *
ARRAY_SIZE(image_signal_infos[0].fences));
uint32_t blit_count = 0, signal_semaphore_count = 0, fence_count = 0;
uint32_t command_buffer_count = 0, signal_semaphore_count = 0, fence_count = 0;
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
VK_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
@ -1612,14 +2193,27 @@ wsi_common_queue_present(const struct wsi_device *wsi,
struct wsi_image *image =
swapchain->get_wsi_image(swapchain, image_index);
bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
swapchain->blit.queue != NULL;
/* For TIMING_QUEUE_FULL_EXT, ensure sync objects are signaled,
* but don't do any real work. */
if (results[i] == VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT || !separate_queue_blit) {
for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) {
signal_semaphore_infos[signal_semaphore_count++] =
image_signal_infos[i].semaphore_infos[j];
}
for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++)
fences[fence_count++] = image_signal_infos[i].fences[j];
}
if (results[i] != VK_SUCCESS)
continue;
/* If we're blitting on another swapchain, just signal the blit
* semaphore for now.
*/
if (swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
swapchain->blit.queue != NULL) {
if (separate_queue_blit) {
/* Create the blit semaphore if needed */
if (swapchain->blit.semaphores[image_index] == VK_NULL_HANDLE) {
const VkSemaphoreCreateInfo sem_info = {
@ -1644,27 +2238,27 @@ wsi_common_queue_present(const struct wsi_device *wsi,
}
if (swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT) {
blit_command_buffer_infos[blit_count++] = (VkCommandBufferSubmitInfo) {
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer =
image->blit.cmd_buffers[queue->queue_family_index],
};
}
for (uint32_t j = 0; j < image_signal_infos[i].semaphore_count; j++) {
signal_semaphore_infos[signal_semaphore_count++] =
image_signal_infos[i].semaphore_infos[j];
if (needs_timing_command_buffer) {
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = image->timestamp_cmd_buffers[queue->queue_family_index],
};
}
for (uint32_t j = 0; j < image_signal_infos[i].fence_count; j++)
fences[fence_count++] = image_signal_infos[i].fences[j];
}
const VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = pPresentInfo->waitSemaphoreCount,
.pWaitSemaphoreInfos = semaphore_wait_infos,
.commandBufferInfoCount = blit_count,
.pCommandBufferInfos = blit_command_buffer_infos,
.commandBufferInfoCount = command_buffer_count,
.pCommandBufferInfos = command_buffer_infos,
.signalSemaphoreInfoCount = signal_semaphore_count,
.pSignalSemaphoreInfos = signal_semaphore_infos,
};
@ -1680,7 +2274,7 @@ wsi_common_queue_present(const struct wsi_device *wsi,
STACK_ARRAY_FINISH(fences);
STACK_ARRAY_FINISH(signal_semaphore_infos);
STACK_ARRAY_FINISH(blit_command_buffer_infos);
STACK_ARRAY_FINISH(command_buffer_infos);
}
/* Now do blits on any blit queues */
@ -1693,8 +2287,10 @@ wsi_common_queue_present(const struct wsi_device *wsi,
if (results[i] != VK_SUCCESS)
continue;
if (swapchain->blit.type == WSI_SWAPCHAIN_NO_BLIT ||
swapchain->blit.queue == NULL)
bool separate_queue_blit = swapchain->blit.type != WSI_SWAPCHAIN_NO_BLIT &&
swapchain->blit.queue != NULL;
if (!separate_queue_blit)
continue;
const VkSemaphoreSubmitInfo blit_semaphore_info = {
@ -1703,17 +2299,27 @@ wsi_common_queue_present(const struct wsi_device *wsi,
.semaphore = swapchain->blit.semaphores[image_index],
};
const VkCommandBufferSubmitInfo blit_command_buffer_info = {
VkCommandBufferSubmitInfo command_buffer_infos[2];
uint32_t command_buffer_count = 0;
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = image->blit.cmd_buffers[0],
};
if (needs_timing_command_buffer) {
command_buffer_infos[command_buffer_count++] = (VkCommandBufferSubmitInfo) {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
.commandBuffer = image->timestamp_cmd_buffers[0],
};
}
const VkSubmitInfo2 submit_info = {
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2,
.waitSemaphoreInfoCount = 1,
.pWaitSemaphoreInfos = &blit_semaphore_info,
.commandBufferInfoCount = 1,
.pCommandBufferInfos = &blit_command_buffer_info,
.commandBufferInfoCount = command_buffer_count,
.pCommandBufferInfos = command_buffer_infos,
.signalSemaphoreInfoCount = image_signal_infos[i].semaphore_count,
.pSignalSemaphoreInfos = image_signal_infos[i].semaphore_infos,
};

View file

@ -62,6 +62,8 @@ struct wsi_device {
VkPhysicalDeviceMemoryProperties memory_props;
uint32_t queue_family_count;
uint64_t queue_supports_blit;
uint64_t queue_supports_timestamps;
float timestamp_period;
VkPhysicalDeviceDrmPropertiesEXT drm_info;
VkPhysicalDevicePCIBusInfoPropertiesEXT pci_bus_info;
@ -201,28 +203,37 @@ struct wsi_device {
WSI_CB(CmdPipelineBarrier);
WSI_CB(CmdCopyImage);
WSI_CB(CmdCopyImageToBuffer);
WSI_CB(CmdResetQueryPool);
WSI_CB(CmdWriteTimestamp);
WSI_CB(CreateBuffer);
WSI_CB(CreateCommandPool);
WSI_CB(CreateFence);
WSI_CB(CreateImage);
WSI_CB(CreateQueryPool);
WSI_CB(CreateSemaphore);
WSI_CB(DestroyBuffer);
WSI_CB(DestroyCommandPool);
WSI_CB(DestroyFence);
WSI_CB(DestroyImage);
WSI_CB(DestroyQueryPool);
WSI_CB(DestroySemaphore);
WSI_CB(EndCommandBuffer);
WSI_CB(FreeMemory);
WSI_CB(FreeCommandBuffers);
WSI_CB(GetBufferMemoryRequirements);
WSI_CB(GetCalibratedTimestampsKHR);
WSI_CB(GetFenceStatus);
WSI_CB(GetImageDrmFormatModifierPropertiesEXT);
WSI_CB(GetImageMemoryRequirements);
WSI_CB(GetImageSubresourceLayout);
WSI_CB(GetMemoryFdKHR);
WSI_CB(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
WSI_CB(GetPhysicalDeviceProperties);
WSI_CB(GetPhysicalDeviceFormatProperties);
WSI_CB(GetPhysicalDeviceFormatProperties2);
WSI_CB(GetPhysicalDeviceImageFormatProperties2);
WSI_CB(GetPhysicalDeviceQueueFamilyProperties);
WSI_CB(GetQueryPoolResults);
WSI_CB(GetSemaphoreFdKHR);
WSI_CB(ResetFences);
WSI_CB(QueueSubmit2);

View file

@ -156,6 +156,12 @@ enum colorspace_enum {
COLORSPACE_ENUM_MAX,
};
enum vrr_tristate {
VRR_TRISTATE_UNKNOWN,
VRR_TRISTATE_DISABLED,
VRR_TRISTATE_ENABLED,
};
typedef struct wsi_display_connector_metadata {
VkHdrMetadataEXT hdr_metadata;
bool supports_st2084;
@ -185,6 +191,10 @@ typedef struct wsi_display_connector {
struct wsi_display_connector_metadata metadata;
uint32_t count_formats;
uint32_t *formats;
enum vrr_tristate vrr_capable;
enum vrr_tristate vrr_enabled;
uint64_t last_frame;
uint64_t last_nsec;
} wsi_display_connector;
struct wsi_display {
@ -370,6 +380,11 @@ find_properties(struct wsi_display_connector *connector, uint32_t count_props, u
}
}
if (!strcmp(prop->name, "vrr_capable"))
connector->vrr_capable = prop_values[p] != 0 ? VRR_TRISTATE_ENABLED : VRR_TRISTATE_DISABLED;
if (!strcmp(prop->name, "VRR_ENABLED"))
connector->vrr_enabled = prop_values[p] != 0 ? VRR_TRISTATE_ENABLED : VRR_TRISTATE_DISABLED;
drmModeFreeProperty(prop);
}
@ -431,6 +446,8 @@ find_connector_properties(struct wsi_display_connector *connector, drmModeConnec
enum wsi_image_state {
WSI_IMAGE_IDLE,
WSI_IMAGE_DRAWING,
WSI_IMAGE_WAITING,
WSI_IMAGE_QUEUED_AFTER_WAIT,
WSI_IMAGE_QUEUED,
WSI_IMAGE_FLIPPING,
WSI_IMAGE_DISPLAYING
@ -444,6 +461,9 @@ struct wsi_display_image {
uint32_t buffer[4];
uint64_t flip_sequence;
uint64_t present_id;
struct wsi_image_timing_request timing_request;
struct wsi_display_fence *fence;
uint64_t minimum_ns;
};
struct wsi_display_swapchain {
@ -462,6 +482,8 @@ struct wsi_display_swapchain {
uint64_t color_outcome_serial;
VkHdrMetadataEXT hdr_metadata;
struct wsi_image_timing_request timing_request;
struct wsi_display_image images[0];
};
@ -473,6 +495,9 @@ struct wsi_display_fence {
uint32_t syncobj; /* syncobj to signal on event */
uint64_t sequence;
bool device_event; /* fence is used for device events */
struct wsi_display_connector *connector;
/* Image to be flipped, if this fence is for an image in the WSI_IMAGE_WAITING state that will need to move to QUEUED. */
struct wsi_display_image *image;
};
struct wsi_display_sync {
@ -1319,6 +1344,16 @@ wsi_display_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface,
break;
}
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
wait->presentTimingSupported = VK_TRUE;
wait->presentAtAbsoluteTimeSupported = VK_TRUE;
wait->presentAtRelativeTimeSupported = VK_TRUE;
break;
}
default:
/* Ignored */
break;
@ -1678,6 +1713,8 @@ wsi_display_image_init(struct wsi_swapchain *drv_chain,
image->chain = chain;
image->state = WSI_IMAGE_IDLE;
image->fence = NULL;
image->minimum_ns = 0;
image->fb_id = 0;
uint64_t *fb_modifiers = NULL;
@ -1789,6 +1826,12 @@ wsi_display_idle_old_displaying(struct wsi_display_image *active_image)
static VkResult
_wsi_display_queue_next(struct wsi_swapchain *drv_chain);
static uint64_t
widen_32_to_64(uint32_t narrow, uint64_t near)
{
return near + (int32_t)(narrow - near);
}
/**
* Wakes up any vkWaitForPresentKHR() waiters on the last present to this
* image.
@ -1817,6 +1860,17 @@ wsi_display_surface_error(struct wsi_display_swapchain *swapchain, VkResult resu
mtx_unlock(&swapchain->present_id_mutex);
}
/**
* libdrm callback for when we get a DRM_EVENT_PAGE_FLIP in response to our
* atomic commit with DRM_MODE_PAGE_FLIP_EVENT. That event can happen at any
* point after vblank, when the old image is no longer being scanned out and
* that commit is set up to be scanned out next.
*
* This means that we can queue up a new atomic commit, if there were presents
* that we hadn't submitted yet (the event queue is driven by
* wsi_display_wait_thread(), so that's what ends up submitting atomic commits
* most of the time).
**/
static void
wsi_display_page_flip_handler2(int fd,
unsigned int frame,
@ -1828,6 +1882,28 @@ wsi_display_page_flip_handler2(int fd,
struct wsi_display_image *image = data;
struct wsi_display_swapchain *chain = image->chain;
VkIcdSurfaceDisplay *surface = chain->surface;
wsi_display_mode *display_mode =
wsi_display_mode_from_handle(surface->displayMode);
wsi_display_connector *connector = display_mode->connector;
uint64_t nsec = 1000000000ull * sec + 1000ull * usec;
/* If we're on VRR timing path, ensure we get a stable pace. */
nsec = MAX2(nsec, image->minimum_ns);
uint64_t frame64 = widen_32_to_64(frame, connector->last_frame);
connector->last_frame = frame64;
connector->last_nsec = nsec;
/* Never update the refresh rate estimate. It's static based on the mode.
* Update this before we signal present wait so that applications
* get lowest possible latency for present time. */
if (image->timing_request.serial) {
wsi_swapchain_present_timing_notify_completion(
&chain->base, image->timing_request.serial,
nsec, &image->base);
}
wsi_display_debug("image %ld displayed at %d\n",
image - &(image->chain->images[0]), frame);
image->state = WSI_IMAGE_DISPLAYING;
@ -1841,42 +1917,29 @@ wsi_display_page_flip_handler2(int fd,
chain->status = result;
}
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence);
static void wsi_display_page_flip_handler(int fd,
unsigned int frame,
unsigned int sec,
unsigned int usec,
void *data)
{
wsi_display_page_flip_handler2(fd, frame, sec, usec, 0, data);
}
static void wsi_display_vblank_handler(int fd, unsigned int frame,
unsigned int sec, unsigned int usec,
void *data)
{
struct wsi_display_fence *fence = data;
wsi_display_fence_event_handler(fence);
}
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence,
uint64_t nsec,
uint64_t frame);
/**
* libdrm callback for when we get a DRM_EVENT_CRTC_SEQUENCE in response to a
* drmCrtcQueueSequence(), indicating that the first pixel of a new frame is
* being scanned out.
**/
static void wsi_display_sequence_handler(int fd, uint64_t frame,
uint64_t nsec, uint64_t user_data)
{
struct wsi_display_fence *fence =
(struct wsi_display_fence *) (uintptr_t) user_data;
wsi_display_fence_event_handler(fence);
wsi_display_fence_event_handler(fence, nsec, frame);
}
static drmEventContext event_context = {
.version = DRM_EVENT_CONTEXT_VERSION,
.page_flip_handler = wsi_display_page_flip_handler,
#if DRM_EVENT_CONTEXT_VERSION >= 3
.page_flip_handler = NULL,
.page_flip_handler2 = wsi_display_page_flip_handler2,
#endif
.vblank_handler = wsi_display_vblank_handler,
.vblank_handler = NULL,
.sequence_handler = wsi_display_sequence_handler,
};
@ -2383,13 +2446,30 @@ wsi_display_fence_check_free(struct wsi_display_fence *fence)
vk_free(fence->wsi->alloc, fence);
}
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence)
static void wsi_display_fence_event_handler(struct wsi_display_fence *fence,
uint64_t nsec, uint64_t frame)
{
struct wsi_display_connector *connector = fence->connector;
struct wsi_display_image *image = fence->image;
if (fence->syncobj) {
(void) drmSyncobjSignal(fence->wsi->syncobj_fd, &fence->syncobj, 1);
(void) drmSyncobjDestroy(fence->wsi->syncobj_fd, fence->syncobj);
}
if (connector) {
connector->last_nsec = nsec;
connector->last_frame = frame;
}
if (image && image->state == WSI_IMAGE_WAITING) {
/* We may need to do the final sleep on CPU to resolve VRR timings. */
image->state = WSI_IMAGE_QUEUED_AFTER_WAIT;
VkResult result = _wsi_display_queue_next(&image->chain->base);
if (result != VK_SUCCESS)
image->chain->status = result;
}
fence->event_received = true;
wsi_display_fence_check_free(fence);
}
@ -2822,9 +2902,11 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain)
switch (tmp_image->state) {
case WSI_IMAGE_FLIPPING:
/* already flipping, don't send another to the kernel yet */
case WSI_IMAGE_WAITING:
/* already flipping or waiting for a flip, don't send another to the kernel yet */
return VK_SUCCESS;
case WSI_IMAGE_QUEUED:
case WSI_IMAGE_QUEUED_AFTER_WAIT:
/* find the oldest queued */
if (!image || tmp_image->flip_sequence < image->flip_sequence)
image = tmp_image;
@ -2837,6 +2919,95 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain)
if (!image)
return VK_SUCCESS;
if (image->fence) {
image->fence->image = NULL;
wsi_display_fence_destroy(image->fence);
image->fence = NULL;
}
unsigned num_cycles_to_skip = 0;
int64_t target_relative_ns = 0;
bool skip_timing = false;
bool nearest_cycle =
(image->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) != 0;
if (image->timing_request.time != 0) {
/* Ensure we have some kind of timebase to work from. */
if (!connector->last_frame)
drmCrtcGetSequence(wsi->fd, connector->crtc_id, &connector->last_frame, &connector->last_nsec);
if (!connector->last_frame || chain->base.present_timing.refresh_duration == 0) {
/* Something has gone very wrong. Just ignore present timing for safety. */
skip_timing = true;
wsi_display_debug("Cannot get a stable timebase, last frame = %"PRIu64", refresh_duration = %"PRIu64".\n",
connector->last_frame, chain->base.present_timing.refresh_duration);
}
}
if (!skip_timing && image->state == WSI_IMAGE_QUEUED && image->timing_request.time != 0) {
target_relative_ns = (int64_t)image->timing_request.time;
/* We need to estimate number of refresh cycles to wait for. */
if (!(image->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_RELATIVE_TIME_BIT_EXT)) {
target_relative_ns -= (int64_t)connector->last_nsec;
}
if (nearest_cycle) {
/* No need to lock, we never update refresh_duration dynamically. */
target_relative_ns -= (int64_t)chain->base.present_timing.refresh_duration / 2;
} else {
/* If application is computing an exact value that lands exactly on the refresh cycle,
* pull back the estimate a little bit since DRM precision is 1us. */
target_relative_ns -= 1000;
}
}
target_relative_ns = MAX2(target_relative_ns, 0);
if (target_relative_ns && chain->base.present_timing.refresh_duration)
num_cycles_to_skip = target_relative_ns / chain->base.present_timing.refresh_duration;
/* CRTC cycles is not reliable on VRR. We cannot use that as a time base. */
bool is_vrr = connector->vrr_enabled == VRR_TRISTATE_ENABLED &&
connector->vrr_capable == VRR_TRISTATE_ENABLED;
if (num_cycles_to_skip) {
if (!is_vrr) {
/* On FRR, we can rely on vblank events to guide time progression. */
VkDisplayKHR display = wsi_display_connector_to_handle(connector);
image->fence = wsi_display_fence_alloc(wsi, -1);
if (image->fence) {
image->fence->connector = connector;
image->fence->image = image;
uint64_t frame_queued;
uint64_t target_frame = connector->last_frame + num_cycles_to_skip;
VkResult result = wsi_register_vblank_event(image->fence, chain->base.wsi, display,
0, target_frame, &frame_queued);
if (result == VK_SUCCESS && frame_queued <= target_frame) {
/* Wait until the vblank fence signals and the event handler will attempt to requeue us. */
image->state = WSI_IMAGE_WAITING;
return VK_SUCCESS;
}
}
} else {
/* On a VRR display, applications can request frame times which are fractional,
* and there is no good way to target absolute time with atomic commits it seems ... */
int64_t target_ns = target_relative_ns + (int64_t)connector->last_nsec;
image->minimum_ns = target_ns;
/* Account for some minimum delay in submitting a page flip until it's processed and sleep jitter.
* We will compensate for the difference if there is any, so that we don't report completion
* times in the past. */
target_ns -= 1 * 1000 * 1000;
os_time_nanosleep_until(target_ns);
}
}
image->state = WSI_IMAGE_QUEUED;
int ret = drm_atomic_commit(connector, image);
if (ret == 0) {
image->state = WSI_IMAGE_FLIPPING;
@ -2859,6 +3030,44 @@ _wsi_display_queue_next(struct wsi_swapchain *drv_chain)
}
}
static void
wsi_display_set_timing_request(struct wsi_swapchain *drv_chain,
const struct wsi_image_timing_request *request)
{
struct wsi_display_swapchain *chain =
(struct wsi_display_swapchain *) drv_chain;
chain->timing_request = *request;
}
static uint64_t
wsi_display_poll_refresh_duration(struct wsi_swapchain *drv_chain, uint64_t *interval)
{
struct wsi_display_swapchain *chain =
(struct wsi_display_swapchain *)drv_chain;
VkIcdSurfaceDisplay *surface = chain->surface;
wsi_display_mode *display_mode =
wsi_display_mode_from_handle(surface->displayMode);
double refresh = wsi_display_mode_refresh(display_mode);
wsi_display_connector *connector = display_mode->connector;
uint64_t refresh_ns = (uint64_t)(floor(1.0 / refresh * 1e9 + 0.5));
/* Assume FRR by default. */
*interval = refresh_ns;
/* If VRR is not enabled on the target CRTC, we should honor that.
* There is no mechanism to clearly request that VRR is desired,
* so we must assume that user might force us into FRR mode. */
if (connector->vrr_capable == VRR_TRISTATE_ENABLED) {
if (connector->vrr_enabled == VRR_TRISTATE_UNKNOWN)
*interval = 0; /* Somehow we don't know if the connector is VRR or FRR. Report unknown. */
else if (connector->vrr_enabled == VRR_TRISTATE_ENABLED)
*interval = UINT64_MAX;
}
return refresh_ns;
}
static VkResult
wsi_display_queue_present(struct wsi_swapchain *drv_chain,
uint32_t image_index,
@ -2876,16 +3085,19 @@ wsi_display_queue_present(struct wsi_swapchain *drv_chain,
return chain->status;
image->present_id = present_id;
image->timing_request = chain->timing_request;
assert(image->state == WSI_IMAGE_DRAWING);
wsi_display_debug("present %d\n", image_index);
mtx_lock(&wsi->wait_mutex);
/* Make sure that the page flip handler is processed in finite time if using present wait. */
if (present_id)
/* Make sure that the page flip handler is processed in finite time if using present wait
* or presentation time. */
if (present_id || chain->timing_request.serial)
wsi_display_start_wait_thread(wsi);
memset(&chain->timing_request, 0, sizeof(chain->timing_request));
image->flip_sequence = ++chain->flip_sequence;
image->state = WSI_IMAGE_QUEUED;
@ -3045,6 +3257,9 @@ wsi_display_surface_create_swapchain(
chain->base.acquire_next_image = wsi_display_acquire_next_image;
chain->base.release_images = wsi_display_release_images;
chain->base.queue_present = wsi_display_queue_present;
chain->base.set_timing_request = wsi_display_set_timing_request;
chain->base.poll_early_refresh = wsi_display_poll_refresh_duration;
chain->base.present_timing.time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
chain->base.wait_for_present = wsi_display_wait_for_present;
chain->base.wait_for_present2 = wsi_display_wait_for_present;
chain->base.set_hdr_metadata = wsi_display_set_hdr_metadata;

View file

@ -112,6 +112,16 @@ wsi_headless_surface_get_capabilities2(VkIcdSurfaceBase *surface,
break;
}
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
wait->presentStageQueries = 0;
wait->presentTimingSupported = VK_FALSE;
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
wait->presentAtRelativeTimeSupported = VK_FALSE;
break;
}
default:
/* Ignored */
break;

View file

@ -139,6 +139,16 @@ wsi_metal_surface_get_capabilities2(VkIcdSurfaceBase *surface,
break;
}
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
wait->presentStageQueries = 0;
wait->presentTimingSupported = VK_FALSE;
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
wait->presentAtRelativeTimeSupported = VK_FALSE;
break;
}
default:
/* Ignored */
break;

View file

@ -188,6 +188,29 @@ struct wsi_image {
int dma_buf_fd;
#endif
void *cpu_map;
VkQueryPool query_pool;
VkCommandBuffer *timestamp_cmd_buffers;
};
struct wsi_presentation_timing {
uint64_t present_id;
uint64_t target_time;
uint64_t serial;
uint64_t queue_done_time; /* GPU timestamp based. */
uint64_t complete_time; /* Best effort timestamp we get from backend. */
/* If we're rendering with IMMEDIATE, it's possible for images to IDLE long before they complete.
* In this case, we have to ensure that queue_done_time is sampled at QueuePresentKHR time
* before we recycle an image. */
struct wsi_image *image;
VkPresentStageFlagsEXT requested_feedback;
VkBool32 complete;
};
struct wsi_image_timing_request {
uint64_t serial;
uint64_t time;
VkPresentTimingInfoFlagsEXT flags;
};
struct wsi_swapchain {
@ -237,7 +260,28 @@ struct wsi_swapchain {
struct vk_queue *queue;
} blit;
struct {
mtx_t lock;
bool active;
struct wsi_presentation_timing *timings;
size_t timings_capacity;
size_t timings_count;
size_t serial;
/* Maps to Vulkan spec definitions. */
uint64_t refresh_duration;
uint64_t refresh_interval;
/* When 0, we don't know yet. Every time the refresh rate changes,
* increase this counter. This counter must also be passed in GetPastTimings. */
uint64_t refresh_counter;
VkTimeDomainKHR time_domain;
} present_timing;
bool capture_key_pressed;
float timestamp_period;
/* Command pools, one per queue family */
VkCommandPool *cmd_pools;
@ -266,6 +310,10 @@ struct wsi_swapchain {
VkPresentModeKHR mode);
void (*set_hdr_metadata)(struct wsi_swapchain *swap_chain,
const VkHdrMetadataEXT* pMetadata);
void (*set_timing_request)(struct wsi_swapchain *swap_chain,
const struct wsi_image_timing_request *request);
void (*poll_timing_request)(struct wsi_swapchain *swap_chain);
uint64_t (*poll_early_refresh)(struct wsi_swapchain *swap_chain, uint64_t *interval);
};
bool
@ -369,6 +417,10 @@ wsi_create_image(const struct wsi_swapchain *chain,
void
wsi_image_init(struct wsi_image *image);
VkResult
wsi_image_init_timestamp(const struct wsi_swapchain *chain,
struct wsi_image *image);
void
wsi_destroy_image(const struct wsi_swapchain *chain,
struct wsi_image *image);
@ -377,6 +429,16 @@ VkResult
wsi_swapchain_wait_for_present_semaphore(const struct wsi_swapchain *chain,
uint64_t present_id, uint64_t timeout);
void
wsi_swapchain_present_timing_notify_completion(struct wsi_swapchain *chain,
uint64_t timing_serial, uint64_t timestamp,
struct wsi_image *image);
void
wsi_swapchain_present_timing_update_refresh_rate(struct wsi_swapchain *chain,
uint64_t refresh_duration, uint64_t refresh_interval,
int minimum_delta_for_update);
#ifdef HAVE_LIBDRM
VkResult
wsi_prepare_signal_dma_buf_from_semaphore(struct wsi_swapchain *chain,

View file

@ -254,6 +254,8 @@ struct wsi_wl_swapchain {
bool has_hdr_metadata;
} color;
struct wsi_image_timing_request timing_request;
struct wsi_wl_image images[0];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(wsi_wl_swapchain, base.base, VkSwapchainKHR,
@ -1668,7 +1670,15 @@ wsi_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevi
struct wsi_wayland *wsi =
(struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];
if (!(wsi_device->queue_supports_blit & BITFIELD64_BIT(queueFamilyIndex)))
/* These should overlap. */
uint64_t effective_queues = wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps;
/* If there are no queues that support both blits and timestamps,
* don't report support for queue timestamps. */
if (!effective_queues)
effective_queues = wsi_device->queue_supports_blit;
if (!(effective_queues & BITFIELD64_BIT(queueFamilyIndex)))
return false;
struct wsi_wl_display display;
@ -1789,7 +1799,8 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
static VkResult
wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface,
struct wsi_device *wsi_device,
bool *has_wp_presentation)
bool *has_wp_presentation, clockid_t *clock_id,
bool *has_commit_timing, bool *has_fifo)
{
VkIcdSurfaceWayland *surface = (VkIcdSurfaceWayland *)icd_surface;
struct wsi_wayland *wsi =
@ -1800,8 +1811,18 @@ wsi_wl_surface_check_presentation(VkIcdSurfaceBase *icd_surface,
wsi_device->sw, "mesa check wp_presentation"))
return VK_ERROR_SURFACE_LOST_KHR;
if (has_wp_presentation)
*has_wp_presentation = !!display.wp_presentation_notwrapped;
if (clock_id)
*clock_id = display.presentation_clock_id;
if (has_commit_timing)
*has_commit_timing = !!display.commit_timing_manager;
if (has_fifo)
*has_fifo = !!display.fifo_manager;
wsi_wl_display_finish(&display);
return VK_SUCCESS;
@ -1893,7 +1914,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
bool has_feedback;
result = wsi_wl_surface_check_presentation(surface, wsi_device,
&has_feedback);
&has_feedback, NULL, NULL, NULL);
if (result != VK_SUCCESS)
return result;
@ -1906,7 +1927,7 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
bool has_feedback;
result = wsi_wl_surface_check_presentation(surface, wsi_device,
&has_feedback);
&has_feedback, NULL, NULL, NULL);
if (result != VK_SUCCESS)
return result;
@ -1914,6 +1935,50 @@ wsi_wl_surface_get_capabilities2(VkIcdSurfaceBase *surface,
break;
}
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
bool has_feedback, has_commit_timing, has_fifo;
wait->presentStageQueries = 0;
wait->presentTimingSupported = VK_FALSE;
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
wait->presentAtRelativeTimeSupported = VK_FALSE;
clockid_t clock_id;
result = wsi_wl_surface_check_presentation(surface, wsi_device,
&has_feedback, &clock_id,
&has_commit_timing, &has_fifo);
if (result != VK_SUCCESS)
return result;
if (!has_feedback)
break;
/* We could deal with esoteric clock domains by exposing VK_TIME_DOMAIN_SWAPCHAIN or PRESENT_STAGE_LOCAL,
* but that requires a lot more scaffolding, and there's no need to add extra complexity if we can
* get away with this. */
if (clock_id != CLOCK_MONOTONIC && clock_id != CLOCK_MONOTONIC_RAW)
break;
/* Presentation timing spec talks about the reported time targeting "pixel being visible".
* From presentation-time spec: "Note, that if the display path has a non-zero latency,
* the time instant specified by this counter may differ from the timestamp's."
* No compositor I know of reports where it takes display latency into account,
* so it's a little unclear if we should actually be reporting PIXEL_OUT or PIXEL_VISIBLE.
* Choose PIXEL_OUT for now since no known compositor out there actually implements
* PIXEL_VISIBLE as intended, and we don't want to promise something we cannot hold. */
wait->presentTimingSupported = VK_TRUE;
wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
/* We cannot reliably implement FIFO guarantee + absolute time without the FIFO barrier.
* Presentation timing is only defined to work with FIFO (and its variants like RELAXED and LATEST_READY). */
wait->presentAtAbsoluteTimeSupported = has_commit_timing && has_fifo;
break;
}
default:
/* Ignored */
break;
@ -2404,6 +2469,7 @@ struct wsi_wl_present_id {
* which uses frame callback to signal DRI3 COMPLETE. */
struct wl_callback *frame;
uint64_t present_id;
uint64_t timing_serial;
struct mesa_trace_flow flow;
uint64_t submission_time;
const VkAllocationCallbacks *alloc;
@ -2411,6 +2477,8 @@ struct wsi_wl_present_id {
uint64_t target_time;
uint64_t correction;
struct wl_list link;
struct wsi_image *img;
bool user_target_time;
};
static struct wsi_image *
@ -2441,6 +2509,14 @@ wsi_wl_swapchain_set_present_mode(struct wsi_swapchain *wsi_chain,
chain->base.present_mode = mode;
}
static void
wsi_wl_swapchain_set_timing_request(struct wsi_swapchain *wsi_chain,
const struct wsi_image_timing_request *request)
{
struct wsi_wl_swapchain *chain = (struct wsi_wl_swapchain *)wsi_chain;
chain->timing_request = *request;
}
static VkResult
dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_time)
{
@ -2514,6 +2590,15 @@ dispatch_present_id_queue(struct wsi_swapchain *wsi_chain, struct timespec *end_
return VK_SUCCESS;
}
static void
wsi_wl_swapchain_poll_timing_request(struct wsi_swapchain *wsi_chain)
{
/* Timing requests must complete in finite time, and if we're not calling present wait
* or queue present regularly, timing requests will never come back. */
struct timespec instant = {0};
dispatch_present_id_queue(wsi_chain, &instant);
}
static bool
wsi_wl_swapchain_present_id_completes_in_finite_time_locked(struct wsi_wl_swapchain *chain,
uint64_t present_id)
@ -2794,16 +2879,13 @@ wsi_wl_swapchain_acquire_next_image_implicit(struct wsi_swapchain *wsi_chain,
}
static void
wsi_wl_presentation_update_present_id(struct wsi_wl_present_id *id)
wsi_wl_presentation_update_present_id_locked(struct wsi_wl_present_id *id)
{
mtx_lock(&id->chain->present_ids.lock);
id->chain->present_ids.outstanding_count--;
if (id->present_id > id->chain->present_ids.max_completed)
id->chain->present_ids.max_completed = id->present_id;
id->chain->present_ids.display_time_correction -= id->correction;
mtx_unlock(&id->chain->present_ids.lock);
vk_free(id->alloc, id);
}
static void
@ -2815,6 +2897,20 @@ presentation_handle_presented(void *data,
struct wsi_wl_swapchain *chain = id->chain;
uint64_t target_time = id->target_time;
/* In v1 of presentation time, we can know if we're likely running VRR, given refresh is 0.
* However, we cannot know what the base refresh rate is without some kind of external information.
* We also cannot know if we're actually driving the display in a VRR fashion.
* In v2, we should always know the "base refresh" rate, but that means we cannot know if we're driving
* the display VRR or FRR. We could try to deduce it based on timestamps, but that is too brittle.
* There is a v3 proposal that adds this information more formally so we don't have to guess.
* Knowing VRR or FRR is not mission critical for most use cases, so just report "Unknown" for now. */
wsi_swapchain_present_timing_update_refresh_rate(&chain->base, refresh, 0, 0);
/* Notify this before present wait to reduce latency of presentation timing requests
* if the application is driving its queries based off present waits. */
if (id->timing_serial)
wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, presentation_time, id->img);
mtx_lock(&chain->present_ids.lock);
chain->present_ids.refresh_nsec = refresh;
if (!chain->present_ids.valid_refresh_nsec) {
@ -2826,13 +2922,16 @@ presentation_handle_presented(void *data,
if (presentation_time > chain->present_ids.displayed_time)
chain->present_ids.displayed_time = presentation_time;
if (target_time && presentation_time > target_time)
/* If we have user-defined target time it can be arbitrarily early, and we don't
* want to start compensating for that error if application stops requesting specific time. */
if (!id->user_target_time && target_time && presentation_time > target_time)
chain->present_ids.display_time_error = presentation_time - target_time;
else
chain->present_ids.display_time_error = 0;
mtx_unlock(&chain->present_ids.lock);
wsi_wl_presentation_update_present_id(id);
wsi_wl_presentation_update_present_id_locked(id);
mtx_unlock(&chain->present_ids.lock);
vk_free(id->alloc, id);
}
static void
@ -2841,6 +2940,15 @@ presentation_handle_discarded(void *data)
struct wsi_wl_present_id *id = data;
struct wsi_wl_swapchain *chain = id->chain;
/* From Vulkan spec:
* "Timing information for some present stages may have a time value of 0,
* indicating that results for that present stage are not available."
* Worst case we can simply take a timestamp of clock_id and pretend, but
* applications may start to latch onto that timestamp as ground truth, which
* is obviously not correct. */
if (id->timing_serial)
wsi_swapchain_present_timing_notify_completion(&chain->base, id->timing_serial, 0, id->img);
mtx_lock(&chain->present_ids.lock);
if (!chain->present_ids.valid_refresh_nsec) {
/* We've started occluded, so make up some safe values to throttle us */
@ -2849,9 +2957,10 @@ presentation_handle_discarded(void *data)
chain->present_ids.refresh_nsec = 16666666;
chain->present_ids.valid_refresh_nsec = true;
}
mtx_unlock(&chain->present_ids.lock);
wsi_wl_presentation_update_present_id(id);
wsi_wl_presentation_update_present_id_locked(id);
mtx_unlock(&chain->present_ids.lock);
vk_free(id->alloc, id);
}
static void
@ -2870,9 +2979,10 @@ presentation_frame_handle_done(void *data, struct wl_callback *callback, uint32_
mtx_lock(&chain->present_ids.lock);
wl_list_remove(&id->link);
mtx_unlock(&chain->present_ids.lock);
wsi_wl_presentation_update_present_id(id);
wsi_wl_presentation_update_present_id_locked(id);
mtx_unlock(&chain->present_ids.lock);
vk_free(id->alloc, id);
wl_callback_destroy(callback);
}
@ -2895,6 +3005,29 @@ static const struct wl_callback_listener frame_listener = {
frame_handle_done,
};
static bool
set_application_driven_timestamp(struct wsi_wl_swapchain *chain,
uint64_t *timestamp,
uint64_t *correction)
{
if (chain->timing_request.serial && chain->timing_request.time) {
/* Absolute time is requested before we have been able to report a reasonable refresh rate
* to application. This is valid, but we should not try to perform any rounding.
* NEAREST_REFRESH_CYCLE flag cannot be honored because it's impossible to know at this time. */
struct timespec target_ts;
timespec_from_nsec(&target_ts, chain->timing_request.time);
wp_commit_timer_v1_set_timestamp(chain->commit_timer,
(uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec,
target_ts.tv_nsec);
*timestamp = chain->timing_request.time;
*correction = 0;
chain->present_ids.last_target_time = chain->timing_request.time;
return true;
} else {
return false;
}
}
/* The present_ids lock must be held */
static bool
set_timestamp(struct wsi_wl_swapchain *chain,
@ -2908,7 +3041,7 @@ set_timestamp(struct wsi_wl_swapchain *chain,
int32_t error = 0;
if (!chain->present_ids.valid_refresh_nsec)
return false;
return set_application_driven_timestamp(chain, timestamp, correction);
displayed_time = chain->present_ids.displayed_time;
refresh = chain->present_ids.refresh_nsec;
@ -2918,7 +3051,7 @@ set_timestamp(struct wsi_wl_swapchain *chain,
* timestamps at all, so bail out.
*/
if (!refresh)
return false;
return set_application_driven_timestamp(chain, timestamp, correction);
/* We assume we're being fed at the display's refresh rate, but
* if that doesn't happen our timestamps fall into the past.
@ -2936,6 +3069,10 @@ set_timestamp(struct wsi_wl_swapchain *chain,
error = chain->present_ids.display_time_error -
chain->present_ids.display_time_correction;
/* If we're driving timestamps from application, this is somewhat redundant
* but it will drain out any accumulated display_time_error over time.
* Accumulated errors are expected since application might not
* align the target time perfectly against a refresh cycle. */
target = chain->present_ids.last_target_time;
if (error > 0) {
target += (error / refresh) * refresh;
@ -2945,19 +3082,41 @@ set_timestamp(struct wsi_wl_swapchain *chain,
}
chain->present_ids.display_time_correction += *correction;
if (chain->timing_request.serial && chain->timing_request.time) {
target = chain->timing_request.time;
chain->present_ids.last_target_time = target;
*timestamp = target;
if (chain->timing_request.flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
target -= chain->present_ids.refresh_nsec / 2;
/* Without the flag, the application is supposed to deal with any safety margins on its own. */
timespec_from_nsec(&target_ts, target);
/* If we're using commit timing path, we always have FIFO protocol, so we don't have to
* consider scenarios where application is passing a very low present time.
* I.e., there is no need to max() the application timestamp against our estimated next refresh cycle.
* If the surface is occluded, it's possible to render at a higher rate than display refresh rate,
* but that's okay. Those presents will be discarded anyway, and we won't report odd timestamps to application. */
} else {
target = next_phase_locked_time(displayed_time,
refresh,
target);
chain->present_ids.last_target_time = target;
*timestamp = target;
/* Take back 500 us as a safety margin, to ensure we don't miss our
* target due to round-off error.
*/
timespec_from_nsec(&target_ts, target - 500000);
}
wp_commit_timer_v1_set_timestamp(chain->commit_timer,
(uint64_t)target_ts.tv_sec >> 32, target_ts.tv_sec,
target_ts.tv_nsec);
chain->present_ids.last_target_time = target;
*timestamp = target;
return true;
}
@ -3059,13 +3218,16 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
}
if (present_id > 0 || (mode_fifo && chain->commit_timer) ||
util_perfetto_is_tracing_enabled()) {
util_perfetto_is_tracing_enabled() || chain->timing_request.serial) {
struct wsi_wl_present_id *id =
vk_zalloc(chain->wsi_wl_surface->display->wsi_wl->alloc, sizeof(*id), sizeof(uintptr_t),
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
id->chain = chain;
id->present_id = present_id;
id->alloc = chain->wsi_wl_surface->display->wsi_wl->alloc;
id->timing_serial = chain->timing_request.serial;
id->img = &chain->images[image_index].base;
id->user_target_time = chain->timing_request.time != 0;
mtx_lock(&chain->present_ids.lock);
@ -3193,6 +3355,8 @@ wsi_wl_swapchain_queue_present(struct wsi_swapchain *wsi_chain,
wsi_wl_surface->display->queue);
}
memset(&chain->timing_request, 0, sizeof(chain->timing_request));
return VK_SUCCESS;
}
@ -3427,6 +3591,20 @@ wsi_wl_swapchain_destroy(struct wsi_swapchain *wsi_chain,
return VK_SUCCESS;
}
static VkTimeDomainKHR
clock_id_to_vk_time_domain(clockid_t id)
{
switch (id) {
case CLOCK_MONOTONIC:
return VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
case CLOCK_MONOTONIC_RAW:
return VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR;
default:
/* Default fallback. Will not be used. */
return VK_TIME_DOMAIN_DEVICE_KHR;
}
}
static VkResult
wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
VkDevice device,
@ -3605,6 +3783,12 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->base.queue_present = wsi_wl_swapchain_queue_present;
chain->base.release_images = wsi_wl_swapchain_release_images;
chain->base.set_present_mode = wsi_wl_swapchain_set_present_mode;
chain->base.set_timing_request = wsi_wl_swapchain_set_timing_request;
chain->base.poll_timing_request = wsi_wl_swapchain_poll_timing_request;
if (pCreateInfo->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT) {
chain->base.present_timing.time_domain =
clock_id_to_vk_time_domain(wsi_wl_surface->display->presentation_clock_id);
}
chain->base.wait_for_present = wsi_wl_swapchain_wait_for_present;
chain->base.wait_for_present2 = wsi_wl_swapchain_wait_for_present2;
chain->base.present_mode = present_mode;

View file

@ -276,6 +276,16 @@ wsi_win32_surface_get_capabilities2(VkIcdSurfaceBase *surface,
break;
}
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
VkPresentTimingSurfaceCapabilitiesEXT *wait = (VkPresentTimingSurfaceCapabilitiesEXT *)ext;
wait->presentStageQueries = 0;
wait->presentTimingSupported = VK_FALSE;
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
wait->presentAtRelativeTimeSupported = VK_FALSE;
break;
}
default:
/* Ignored */
break;

View file

@ -64,6 +64,7 @@
#include "wsi_common_entrypoints.h"
#include "wsi_common_private.h"
#include "wsi_common_queue.h"
#include "loader/loader_dri_helper_screen.h"
#ifdef HAVE_SYS_SHM_H
#include <sys/ipc.h>
@ -79,7 +80,14 @@
#define MAX_DAMAGE_RECTS 64
struct wsi_x11_connection {
struct x11_icd_surface_key {
xcb_connection_t *conn;
xcb_window_t window;
uint32_t padding; /* Makes struct memcmp compatible. */
};
struct wsi_x11_icd_surface {
struct x11_icd_surface_key key;
bool has_dri3;
bool has_dri3_modifiers;
bool has_dri3_explicit_sync;
@ -88,13 +96,80 @@ struct wsi_x11_connection {
bool is_xwayland;
bool has_mit_shm;
bool has_xfixes;
struct loader_screen_resources screen_resources;
bool screen_resources_valid;
mtx_t mtx;
/* This holds the fallback for MSC rate, i.e. refresh rate.
* If we cannot get ahold of a stable estimate based on real feedback,
* we defer to using this. With multi-monitors and other potential effects affecting actual rates,
* we shouldn't trust this blindly. */
uint64_t current_refresh_ns;
};
static uint64_t
x11_icd_surface_update_present_timing(struct wsi_x11_icd_surface *surface, uint32_t width, uint32_t height)
{
uint64_t ret;
if (!surface->screen_resources_valid)
return 0;
mtx_lock(&surface->mtx);
loader_update_screen_resources(&surface->screen_resources);
if (surface->screen_resources.num_crtcs == 0) {
surface->current_refresh_ns = 0;
goto out;
}
surface->current_refresh_ns =
1000000000ull * surface->screen_resources.crtcs[0].refresh_denominator /
surface->screen_resources.crtcs[0].refresh_numerator;
/* Don't need to ponder multi-monitor. */
if (surface->screen_resources.num_crtcs == 1)
goto out;
/* Find the best matching screen for the window. */
xcb_translate_coordinates_cookie_t cookie =
xcb_translate_coordinates_unchecked(surface->key.conn, surface->key.window,
surface->screen_resources.screen->root, 0, 0);
xcb_translate_coordinates_reply_t *reply =
xcb_translate_coordinates_reply(surface->key.conn, cookie, NULL);
if (!reply)
goto out;
int area = 0;
for (unsigned c = 0; c < surface->screen_resources.num_crtcs; c++) {
struct loader_crtc_info *crtc = &surface->screen_resources.crtcs[c];
int c_area = box_intersection_area(
reply->dst_x, reply->dst_y, width, height, crtc->x,
crtc->y, crtc->width, crtc->height);
if (c_area > area) {
surface->current_refresh_ns = 1000000000ull * crtc->refresh_denominator / crtc->refresh_numerator;
area = c_area;
}
}
free(reply);
out:
ret = surface->current_refresh_ns;
mtx_unlock(&surface->mtx);
return ret;
}
struct wsi_x11 {
struct wsi_interface base;
mtx_t mutex;
/* Hash table of xcb_connection -> wsi_x11_connection mappings */
/* Hash table of xcb_connection -> wsi_x11_icd_surface mappings */
struct hash_table *connections;
};
@ -224,9 +299,9 @@ wsi_x11_detect_xwayland(xcb_connection_t *conn,
return is_xwayland;
}
static struct wsi_x11_connection *
wsi_x11_connection_create(struct wsi_device *wsi_dev,
xcb_connection_t *conn)
static struct wsi_x11_icd_surface *
wsi_x11_icd_surface_create(struct wsi_device *wsi_dev,
xcb_connection_t *conn, xcb_window_t window)
{
xcb_query_extension_cookie_t dri3_cookie, pres_cookie, randr_cookie,
amd_cookie, nv_cookie, shm_cookie, sync_cookie,
@ -241,16 +316,19 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
bool has_dri3_v1_4 = false;
bool has_present_v1_4 = false;
/* wsi_x11_get_connection may be called from a thread, but we will never end up here on a worker thread,
/* wsi_x11_get_icd_surface may be called from a thread, but we will never end up here on a worker thread,
* since the connection will always be in the hash-map,
* so we will not violate Vulkan's rule on allocation callbacks w.r.t.
* when it is allowed to call the allocation callbacks. */
struct wsi_x11_connection *wsi_conn =
vk_alloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8,
struct wsi_x11_icd_surface *wsi_conn =
vk_zalloc(&wsi_dev->instance_alloc, sizeof(*wsi_conn), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!wsi_conn)
return NULL;
wsi_conn->key.conn = conn;
wsi_conn->key.window = window;
sync_cookie = xcb_query_extension(conn, 4, "SYNC");
dri3_cookie = xcb_query_extension(conn, 4, "DRI3");
pres_cookie = xcb_query_extension(conn, 7, "Present");
@ -378,6 +456,27 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
}
#endif
if (window) {
/* This state is only necessary for dealing with present timing, and if we fail, we simply won't expose support. */
xcb_get_geometry_cookie_t geometry_cookie = xcb_get_geometry_unchecked(conn, window);
xcb_get_geometry_reply_t *geometry_reply = xcb_get_geometry_reply(conn, geometry_cookie, NULL);
if (geometry_reply) {
xcb_screen_iterator_t it = xcb_setup_roots_iterator(xcb_get_setup(conn));
xcb_screen_t *screen;
for (screen = it.data; it.rem != 0; xcb_screen_next(&it), screen = it.data) {
if (screen->root == geometry_reply->root) {
loader_init_screen_resources(&wsi_conn->screen_resources, conn, screen);
wsi_conn->screen_resources_valid = true;
mtx_init(&wsi_conn->mtx, 0);
break;
}
}
free(geometry_reply);
}
}
free(dri3_reply);
free(pres_reply);
free(randr_reply);
@ -392,14 +491,18 @@ wsi_x11_connection_create(struct wsi_device *wsi_dev,
}
static void
wsi_x11_connection_destroy(struct wsi_device *wsi_dev,
struct wsi_x11_connection *conn)
wsi_x11_icd_surface_destroy(struct wsi_device *wsi_dev,
struct wsi_x11_icd_surface *conn)
{
if (conn->screen_resources_valid) {
loader_destroy_screen_resources(&conn->screen_resources);
mtx_destroy(&conn->mtx);
}
vk_free(&wsi_dev->instance_alloc, conn);
}
static bool
wsi_x11_check_for_dri3(struct wsi_x11_connection *wsi_conn)
wsi_x11_check_for_dri3(struct wsi_x11_icd_surface *wsi_conn)
{
if (wsi_conn->has_dri3)
return true;
@ -418,35 +521,37 @@ wsi_x11_check_for_dri3(struct wsi_x11_connection *wsi_conn)
*
* If the allocation fails NULL is returned.
*/
static struct wsi_x11_connection *
wsi_x11_get_connection(struct wsi_device *wsi_dev,
xcb_connection_t *conn)
static struct wsi_x11_icd_surface *
wsi_x11_get_icd_surface(struct wsi_device *wsi_dev,
xcb_connection_t *conn, xcb_window_t window)
{
struct wsi_x11 *wsi =
(struct wsi_x11 *)wsi_dev->wsi[VK_ICD_WSI_PLATFORM_XCB];
mtx_lock(&wsi->mutex);
struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, conn);
struct x11_icd_surface_key key = { .conn = conn, .window = window };
struct hash_entry *entry = _mesa_hash_table_search(wsi->connections, &key);
if (!entry) {
/* We're about to make a bunch of blocking calls. Let's drop the
* mutex for now so we don't block up too badly.
*/
mtx_unlock(&wsi->mutex);
struct wsi_x11_connection *wsi_conn =
wsi_x11_connection_create(wsi_dev, conn);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_icd_surface_create(wsi_dev, conn, window);
if (!wsi_conn)
return NULL;
mtx_lock(&wsi->mutex);
entry = _mesa_hash_table_search(wsi->connections, conn);
entry = _mesa_hash_table_search(wsi->connections, &wsi_conn->key);
if (entry) {
/* Oops, someone raced us to it */
wsi_x11_connection_destroy(wsi_dev, wsi_conn);
wsi_x11_icd_surface_destroy(wsi_dev, wsi_conn);
} else {
entry = _mesa_hash_table_insert(wsi->connections, conn, wsi_conn);
entry = _mesa_hash_table_insert(wsi->connections, &wsi_conn->key, wsi_conn);
}
}
@ -590,11 +695,20 @@ wsi_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice,
{
VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice);
struct wsi_device *wsi_device = pdevice->wsi_device;
if (!(wsi_device->queue_supports_blit & BITFIELD64_BIT(queueFamilyIndex)))
/* These should overlap. */
uint64_t effective_queues = wsi_device->queue_supports_blit & wsi_device->queue_supports_timestamps;
/* If there are no queues that support both blits and timestamps,
* don't report support for queue timestamps. */
if (!effective_queues)
effective_queues = wsi_device->queue_supports_blit;
if (!(effective_queues & BITFIELD64_BIT(queueFamilyIndex)))
return false;
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection(wsi_device, connection);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface(wsi_device, connection, 0);
if (!wsi_conn)
return false;
@ -669,8 +783,8 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
xcb_window_t window = x11_surface_get_window(icd_surface);
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection(wsi_device, conn);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface(wsi_device, conn, window);
if (!wsi_conn)
return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -722,7 +836,7 @@ x11_get_min_image_count(const struct wsi_device *wsi_device, bool is_xwayland)
static unsigned
x11_get_min_image_count_for_present_mode(struct wsi_device *wsi_device,
struct wsi_x11_connection *wsi_conn,
struct wsi_x11_icd_surface *wsi_conn,
VkPresentModeKHR present_mode);
static VkResult
@ -734,8 +848,8 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
xcb_window_t window = x11_surface_get_window(icd_surface);
struct wsi_x11_vk_surface *surface = (struct wsi_x11_vk_surface*)icd_surface;
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection(wsi_device, conn);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface(wsi_device, conn, window);
xcb_get_geometry_cookie_t geom_cookie;
xcb_generic_error_t *err;
xcb_get_geometry_reply_t *geom;
@ -863,6 +977,52 @@ x11_surface_get_capabilities2(VkIcdSurfaceBase *icd_surface,
break;
}
case VK_STRUCTURE_TYPE_PRESENT_TIMING_SURFACE_CAPABILITIES_EXT: {
VkPresentTimingSurfaceCapabilitiesEXT *wait = (void *)ext;
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
xcb_window_t window = x11_surface_get_window(icd_surface);
struct wsi_x11_icd_surface *wsi_conn = wsi_x11_get_icd_surface(wsi_device, conn, window);
wait->presentStageQueries = 0;
wait->presentTimingSupported = VK_FALSE;
wait->presentAtAbsoluteTimeSupported = VK_FALSE;
wait->presentAtRelativeTimeSupported = VK_FALSE;
/* If we cannot query modes for a screen, it's not possible to get reliable timings. */
if (!wsi_conn->screen_resources_valid)
break;
wait->presentTimingSupported = VK_TRUE;
if (wsi_conn->is_xwayland) {
/* Wayland COMPLETE is tied to fence callback, so that's what we'll report.
* For pure frame pacing support, this is likely fine. */
wait->presentStageQueries = VK_PRESENT_STAGE_REQUEST_DEQUEUED_BIT_EXT;
/* Xwayland cannot get a reliable refresh rate estimate since MSC is not tied to monitor refresh at all.
* However, it's pragmatically very important to expose some baseline Xwl support since
* a large amount of applications (mostly games) rely on X11 APIs.
*
* Relative timings are easier to deal with since errors against an absolute timer are more or less expected,
* and it's sufficient for implementing present intervals in GL/D3D, etc, but likely not for
* tight A/V sync in e.g. media players, but those should be using Wayland when available anyway.
* As per-spec the timing request we provide should correlate with PIXEL_VISIBLE_BIT stage,
* but when we only observe dequeue, that's not really possible, but relative timings don't have that problem.
*
* There is PRESENT_CAPABILITY_UST, which would help, but xserver does not implement it at all.
*/
wait->presentAtRelativeTimeSupported = VK_TRUE;
} else {
/* COMPLETE should be tied to page flip on native X11. */
wait->presentStageQueries = VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT;
wait->presentAtAbsoluteTimeSupported = VK_TRUE;
wait->presentAtRelativeTimeSupported = VK_TRUE;
}
break;
}
default:
/* Ignored */
break;
@ -1092,6 +1252,7 @@ wsi_CreateXlibSurfaceKHR(VkInstance _instance,
struct x11_image_pending_completion {
uint32_t serial;
uint64_t signal_present_id;
uint64_t timing_serial;
};
struct x11_image {
@ -1108,6 +1269,7 @@ struct x11_image {
VkPresentModeKHR present_mode;
xcb_rectangle_t rects[MAX_DAMAGE_RECTS];
int rectangle_count;
struct wsi_image_timing_request timing_request;
/* In IMMEDIATE and MAILBOX modes, we can have multiple pending presentations per image.
* We need to keep track of them when considering present ID. */
@ -1125,12 +1287,19 @@ struct x11_image {
#endif
};
struct x11_present_timing_entry {
uint64_t msc;
uint64_t ust;
};
#define X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE 16
struct x11_swapchain {
struct wsi_swapchain base;
bool has_dri3_modifiers;
bool has_mit_shm;
bool has_async_may_tear;
bool has_reliable_msc;
xcb_connection_t * conn;
xcb_window_t window;
@ -1144,9 +1313,13 @@ struct x11_swapchain {
xcb_special_event_t * special_event;
uint64_t send_sbc;
uint64_t last_present_msc;
uint64_t next_present_ust_lower_bound;
uint32_t stamp;
uint32_t sent_image_count;
struct x11_present_timing_entry present_timing_window[X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE];
uint32_t present_timing_window_index;
atomic_int status;
bool copy_is_suboptimal;
struct wsi_queue present_queue;
@ -1168,14 +1341,121 @@ struct x11_swapchain {
uint64_t present_id;
VkResult present_progress_error;
struct wsi_image_timing_request timing_request;
bool msc_estimate_is_stable;
struct x11_image images[0];
};
VK_DEFINE_NONDISP_HANDLE_CASTS(x11_swapchain, base.base, VkSwapchainKHR,
VK_OBJECT_TYPE_SWAPCHAIN_KHR)
static void x11_present_complete(struct x11_swapchain *swapchain,
struct x11_image *image, uint32_t index)
static bool x11_refresh_rate_estimate_is_stable(struct x11_swapchain *swapchain, uint64_t base_rate)
{
/* Only accept a refresh rate estimate if it's *very* stable.
* Keith's old GOOGLE_display_timing MR suggests that using this estimate is better than blindly
* accepting the modeline in some cases.
* When running in VRR modes, the MSC will appear to be highly unstable, and we cannot accept those estimates. */
for (int i = 0; i < X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE; i++) {
const struct x11_present_timing_entry *a =
&swapchain->present_timing_window[i];
const struct x11_present_timing_entry *b =
&swapchain->present_timing_window[(i + 1) % X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE];
if (!a->msc || !b->msc)
continue;
uint64_t ust_delta = MAX2(a->ust, b->ust) - MIN2(a->ust, b->ust);
uint64_t msc_delta = MAX2(a->msc, b->msc) - MIN2(a->msc, b->msc);
if (msc_delta == 0)
continue;
uint64_t refresh_ns = 1000 * ust_delta / msc_delta;
/* The true UST values are expected to be quite accurate.
* Anything more than 10us difference in rate is considered unstable.
* If the MSC is driven by GPU progress in VRR mode,
* it's extremely unlikely that they are paced *perfectly* for 16 frames in a row. */
if (llabs((int64_t)base_rate - (int64_t)refresh_ns) > 10000)
return false;
}
return true;
}
static void x11_present_update_refresh_cycle_estimate(struct x11_swapchain *swapchain,
uint64_t msc, uint64_t ust)
{
struct wsi_x11_icd_surface *surface = wsi_x11_get_icd_surface(
(struct wsi_device*)swapchain->base.wsi, swapchain->conn, swapchain->window);
mtx_lock(&surface->mtx);
uint64_t randr_refresh_ns = surface->current_refresh_ns;
mtx_unlock(&surface->mtx);
swapchain->present_timing_window_index =
(swapchain->present_timing_window_index + 1) % X11_SWAPCHAIN_REFRESH_RATE_WINDOW_SIZE;
struct x11_present_timing_entry *entry = &swapchain->present_timing_window[swapchain->present_timing_window_index];
if (!swapchain->has_reliable_msc) {
/* If we don't have reliable MSC, we always trust the fallback RANDR query.
* We have no idea if we're FRR or VRR. */
wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, randr_refresh_ns, 0, 0);
entry->msc = msc;
entry->ust = ust;
return;
}
/* Try to get an initial estimate as quickly as possible, we will refine it over time. */
if (entry->msc == 0)
entry = &swapchain->present_timing_window[1];
if (entry->msc != 0) {
uint64_t msc_delta = msc - entry->msc;
/* Safeguard against any weird interactions with IMMEDIATE. */
if (msc_delta != 0) {
uint64_t ust_delta = 1000 * (ust - entry->ust);
uint64_t refresh_ns = ust_delta / msc_delta;
swapchain->msc_estimate_is_stable = x11_refresh_rate_estimate_is_stable(swapchain, refresh_ns);
if (swapchain->msc_estimate_is_stable) {
/* If MSC is tightly locked in, we can safely make the assumption we're in FRR mode.
* It's possible we're technically doing VRR, but if we're rendering at above monitor refresh
* rate consistently, then there is no meaningful difference anyway. */
/* Our refresh rates are only estimates, so expect some deviation (+/- 1us). */
wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, refresh_ns, refresh_ns, 1000);
} else {
/* If we have enabled adaptive sync, and we're seeing highly irregular MSC values, we assume
* we're driving the display VRR. */
uint64_t refresh_interval = swapchain->base.wsi->enable_adaptive_sync ? UINT64_MAX : 0;
wsi_swapchain_present_timing_update_refresh_rate(&swapchain->base, randr_refresh_ns, refresh_interval, 0);
}
}
}
entry = &swapchain->present_timing_window[swapchain->present_timing_window_index];
entry->msc = msc;
entry->ust = ust;
}
static void x11_present_complete(struct x11_swapchain *swapchain,
struct x11_image *image, uint32_t index,
uint64_t msc, uint64_t ust)
{
/* Update estimate for refresh rate. */
if (swapchain->base.present_timing.active)
x11_present_update_refresh_cycle_estimate(swapchain, msc, ust);
/* Make sure to signal present timings before signalling present wait,
* this way we get minimal latency for reports. */
uint64_t timing_serial = image->pending_completions[index].timing_serial;
if (timing_serial)
wsi_swapchain_present_timing_notify_completion(&swapchain->base, timing_serial, ust * 1000, &image->base);
uint64_t signal_present_id = image->pending_completions[index].signal_present_id;
if (signal_present_id) {
mtx_lock(&swapchain->present_progress_mutex);
@ -1327,6 +1607,16 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain,
return VK_SUBOPTIMAL_KHR;
}
if (chain->base.present_timing.active) {
/* It's possible that we have multiple monitors and moving windows around change the effective rate.
* Lots of logic reused from platform_x11.c. */
/* TODO: Should we rate-limit this query? */
struct wsi_x11_icd_surface *surface = wsi_x11_get_icd_surface(
(struct wsi_device *)chain->base.wsi, chain->conn, chain->window);
x11_icd_surface_update_present_timing(surface, config->width, config->height);
}
break;
}
@ -1348,13 +1638,14 @@ x11_handle_dri3_present_event(struct x11_swapchain *chain,
case XCB_PRESENT_EVENT_COMPLETE_NOTIFY: {
xcb_present_complete_notify_event_t *complete = (void *) event;
uint64_t ust = MAX2(complete->ust, chain->next_present_ust_lower_bound);
if (complete->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
unsigned i, j;
for (i = 0; i < chain->base.image_count; i++) {
struct x11_image *image = &chain->images[i];
for (j = 0; j < image->present_queued_count; j++) {
if (image->pending_completions[j].serial == complete->serial) {
x11_present_complete(chain, image, j);
x11_present_complete(chain, image, j, complete->msc, ust);
}
}
}
@ -1424,8 +1715,8 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
int64_t divisor = 0;
int64_t remainder = 0;
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
if (!wsi_conn)
return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -1457,6 +1748,7 @@ x11_present_to_x11_dri3(struct x11_swapchain *chain, uint32_t image_index,
(struct x11_image_pending_completion) {
.signal_present_id = image->present_id,
.serial = serial,
.timing_serial = image->timing_request.serial,
};
xcb_void_cookie_t cookie;
@ -1654,7 +1946,7 @@ static VkResult x11_swapchain_read_status_atomic(struct x11_swapchain *chain)
*/
static bool
x11_needs_wait_for_fences(const struct wsi_device *wsi_device,
struct wsi_x11_connection *wsi_conn,
struct wsi_x11_icd_surface *wsi_conn,
VkPresentModeKHR present_mode)
{
if (wsi_conn->is_xwayland && !wsi_device->x11.xwaylandWaitReady) {
@ -1676,7 +1968,7 @@ x11_needs_wait_for_fences(const struct wsi_device *wsi_device,
static bool
x11_requires_mailbox_image_count(const struct wsi_device *device,
struct wsi_x11_connection *wsi_conn,
struct wsi_x11_icd_surface *wsi_conn,
VkPresentModeKHR present_mode)
{
/* If we're resorting to wait for fences, we're assuming a MAILBOX-like model,
@ -1773,6 +2065,26 @@ x11_set_present_mode(struct wsi_swapchain *wsi_chain,
chain->base.present_mode = mode;
}
static void
x11_set_timing_request(struct wsi_swapchain *wsi_chain,
const struct wsi_image_timing_request *request)
{
struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain;
chain->timing_request = *request;
}
static uint64_t
x11_poll_early_refresh(struct wsi_swapchain *wsi_chain, uint64_t *interval)
{
struct x11_swapchain *chain = (struct x11_swapchain *)wsi_chain;
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
/* We don't know yet. */
*interval = 0;
return x11_icd_surface_update_present_timing(wsi_conn, chain->extent.width, chain->extent.height);
}
/**
* Acquire a ready-to-use image from the swapchain.
*
@ -1874,6 +2186,8 @@ x11_queue_present(struct wsi_swapchain *wsi_chain,
chain->images[image_index].present_id = present_id;
/* With KHR_swapchain_maintenance1, the present mode can change per present. */
chain->images[image_index].present_mode = chain->base.present_mode;
chain->images[image_index].timing_request = chain->timing_request;
memset(&chain->timing_request, 0, sizeof(chain->timing_request));
wsi_queue_push(&chain->present_queue, image_index);
return x11_swapchain_read_status_atomic(chain);
@ -1974,6 +2288,125 @@ x11_manage_event_queue(void *state)
return 0;
}
static uint64_t
x11_present_compute_target_msc(struct x11_swapchain *chain,
const struct wsi_image_timing_request *request,
uint64_t minimum_msc)
{
const struct x11_present_timing_entry *entry = &chain->present_timing_window[chain->present_timing_window_index];
bool relative = (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_RELATIVE_TIME_BIT_EXT) != 0;
/* Just use the FIFO derived MSC. From spec on relative:
* "If the swapchain has never been used to present an image, the provided targetTime is ignored." */
if (!request->serial || !request->time || (relative && !entry->ust))
return minimum_msc;
int64_t target_ns;
mtx_lock(&chain->base.present_timing.lock);
/* Present timing is only defined to work with FIFO modes, so we can rely on having
* reliable relative timings, since we block for COMPLETE to come through before we queue up more presents. */
if (relative) {
/* If application is trying to drive us at refresh rate, FIFO will take care of it.
* Don't end up in a situation where we sleep and miss the deadline by mistake. */
if (!chain->has_reliable_msc) {
uint64_t relative_threshold;
if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
relative_threshold = 3 * chain->base.present_timing.refresh_duration / 2;
else
relative_threshold = chain->base.present_timing.refresh_duration;
if (request->time <= relative_threshold) {
mtx_unlock(&chain->base.present_timing.lock);
return minimum_msc;
}
}
target_ns = 1000 * (int64_t)entry->ust + (int64_t)request->time;
} else {
target_ns = (int64_t)request->time;
}
/* Snap to nearest half refresh. This only makes sense for FRR, but it is the application's
* responsibility to not use this for VRR. If this flag is not used, this is strictly a "not before". */
if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT)
target_ns -= (int64_t)chain->base.present_timing.refresh_duration / 2;
if (entry->msc && chain->base.present_timing.refresh_duration != 0 &&
chain->msc_estimate_is_stable && chain->has_reliable_msc) {
/* If we can trust MSC to be a stable FRR heartbeat, we sync to that. */
uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0);
uint64_t periods = (delta_time_ns + chain->base.present_timing.refresh_duration - 1) /
chain->base.present_timing.refresh_duration;
mtx_unlock(&chain->base.present_timing.lock);
/* Xwl cannot understand MSC that jumps by more than 1. It appears that if there are MSC jumps above 1,
* each MSC cycle is padded by 16.6ms or something like that.
* If we want to target specific time, we must sleep to achieve that until Xwl improves.
* Fortunately, we're on a submit thread, so that is mostly an acceptable solution. */
minimum_msc = MAX2(minimum_msc, entry->msc + periods);
} else {
/* If we don't have a stable estimate (e.g. true VRR, or Xwl) we just sleep until deadline.
* This relies on timebase on os_time_nanosleep is MONOTONIC as well as UST being MONOTONIC. */
if (request->flags & VK_PRESENT_TIMING_INFO_PRESENT_AT_NEAREST_REFRESH_CYCLE_BIT_EXT) {
if (!chain->has_reliable_msc && chain->base.present_timing.refresh_duration) {
uint64_t delta_time_ns = MAX2(target_ns - 1000 * (int64_t)entry->ust, 0);
uint64_t periods = delta_time_ns / chain->base.present_timing.refresh_duration;
target_ns = 1000ull * entry->ust + periods * chain->base.present_timing.refresh_duration;
/* Set a minimum target that is very close to the real estimate.
* This way, we ensure that we don't regularly round estimates up in
* chain->next_present_ust_lower_bound. */
target_ns += 63 * chain->base.present_timing.refresh_duration / 64;
}
}
if (chain->has_reliable_msc) {
/* Very regular sleeping can trigger a strange feedback loop where MSC estimates becomes stable enough
* that we accept it as stable MSC. Perturb the rates enough to make it extremely unlikely
* we accept sleeping patterns as ground truth rate, introduce a 50 us error between each timestamp,
* which should avoid the 10 us check reliably. If sleep quantas are not as accurate, it's extremely unlikely
* we get a stable pace anyway. TODO: Is there a more reliable way? */
/* On Xwl we never accept MSC estimates as ground truth, so ignore this perturbation. */
target_ns += 50000ll * (chain->present_timing_window_index & 1) - 25000;
target_ns = MAX2(target_ns, 0);
}
/* If we're on Xwl or VRR X11 and trying to target a specific cycle by sleeping, pull back the sleep a bit.
* We will be racing against time once we wake up to send the request to Xwl -> Wayland -> frame callback -> COMPLETE.
* If target_ns syncs well to a refresh cycle, we speculate that COMPLETE will come through at about target_ns. */
/* To get proper pace on an actual VRR display, we will have to detect if we're presenting too early
* compared to what application actually expected.
* In that case, we need to remove this compensation if we detect that presents come in too early.
* Effectively, we will need to adjust the report UST up if we somehow end up seeing a timestamp too early.
* The relative refresh will feed off this adjustment in a tight loop, so this should be pretty solid
* for both VRR and FRR. Present timing can only be used with FIFO modes, i.e. we will not overwrite this
* until the present is actually complete. */
chain->next_present_ust_lower_bound = target_ns / 1000;
/* We also need to pull back the sleep a bit to account for X.org roundtrip delays.
* Allow up to 4ms of error here. */
int64_t eager_present_ns = MIN2((int64_t)chain->base.present_timing.refresh_duration / 4, 4 * 1000 * 1000);
target_ns -= eager_present_ns;
target_ns = MAX2(target_ns, 0);
mtx_unlock(&chain->base.present_timing.lock);
mtx_unlock(&chain->thread_state_lock);
os_time_nanosleep_until(target_ns);
/* Reacquiring the lock won't change any invariants for us, so this is fine.
* We make sure to check chain->status after this function in case that got updated while we were sleeping. */
mtx_lock(&chain->thread_state_lock);
}
return minimum_msc;
}
/**
* Presentation thread.
*
@ -1991,8 +2424,8 @@ static int
x11_manage_present_queue(void *state)
{
struct x11_swapchain *chain = state;
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
VkResult result = VK_SUCCESS;
u_thread_setname("WSI swapchain queue");
@ -2040,6 +2473,8 @@ x11_manage_present_queue(void *state)
u_cnd_monotonic_wait(&chain->thread_state_cond, &chain->thread_state_lock);
}
target_msc = x11_present_compute_target_msc(chain, &chain->images[image_index].timing_request, target_msc);
if (chain->status < 0) {
mtx_unlock(&chain->thread_state_lock);
break;
@ -2315,7 +2750,7 @@ wsi_x11_recompute_dri3_modifier_hash(blake3_hash *hash, const struct wsi_drm_ima
}
static void
wsi_x11_get_dri3_modifiers(struct wsi_x11_connection *wsi_conn,
wsi_x11_get_dri3_modifiers(struct wsi_x11_icd_surface *wsi_conn,
xcb_connection_t *conn, xcb_window_t window,
uint8_t depth, uint8_t bpp,
uint64_t **modifiers_in, uint32_t *num_modifiers_in,
@ -2402,8 +2837,8 @@ wsi_x11_swapchain_query_dri3_modifiers_changed(struct x11_swapchain *chain)
uint64_t *modifiers[2] = {NULL, NULL};
uint32_t num_modifiers[2] = {0, 0};
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection((struct wsi_device*)chain->base.wsi, chain->conn);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface((struct wsi_device*)chain->base.wsi, chain->conn, chain->window);
xcb_get_geometry_reply_t *geometry =
xcb_get_geometry_reply(chain->conn, xcb_get_geometry(chain->conn, chain->window), NULL);
@ -2551,7 +2986,7 @@ static VkResult x11_wait_for_present(struct wsi_swapchain *wsi_chain,
static unsigned
x11_get_min_image_count_for_present_mode(struct wsi_device *wsi_device,
struct wsi_x11_connection *wsi_conn,
struct wsi_x11_icd_surface *wsi_conn,
VkPresentModeKHR present_mode)
{
uint32_t min_image_count = x11_get_min_image_count(wsi_device, wsi_conn->is_xwayland);
@ -2592,8 +3027,9 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
* representing it.
*/
xcb_connection_t *conn = x11_surface_get_connection(icd_surface);
struct wsi_x11_connection *wsi_conn =
wsi_x11_get_connection(wsi_device, conn);
xcb_window_t window = x11_surface_get_window(icd_surface);
struct wsi_x11_icd_surface *wsi_conn =
wsi_x11_get_icd_surface(wsi_device, conn, window);
if (!wsi_conn)
return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -2613,7 +3049,6 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
}
/* Check that we have a window up-front. It is an error to not have one. */
xcb_window_t window = x11_surface_get_window(icd_surface);
/* Get the geometry of that window. The bit depth of the swapchain will be fitted and the
* chain's images extents should fit it for performance-optimizing flips.
@ -2736,8 +3171,14 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->base.wait_for_present2 = x11_wait_for_present;
chain->base.release_images = x11_release_images;
chain->base.set_present_mode = x11_set_present_mode;
chain->base.set_timing_request = x11_set_timing_request;
chain->base.poll_early_refresh = x11_poll_early_refresh;
chain->base.present_mode = present_mode;
chain->base.image_count = num_images;
/* This is what Xserver is using. We cannot really query it, but we rely on it working. */
chain->base.present_timing.time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
chain->conn = conn;
chain->window = window;
chain->depth = bit_depth;
@ -2749,6 +3190,7 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
chain->has_dri3_modifiers = wsi_conn->has_dri3_modifiers;
chain->has_mit_shm = wsi_conn->has_mit_shm;
chain->has_async_may_tear = present_caps & XCB_PRESENT_CAPABILITY_ASYNC_MAY_TEAR;
chain->has_reliable_msc = !wsi_conn->is_xwayland;
/* When images in the swapchain don't fit the window, X can still present them, but it won't
* happen by flip, only by copy. So this is a suboptimal copy, because if the client would change
@ -2856,6 +3298,9 @@ x11_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
/* It is safe to set it here as only one swapchain can be associated with
* the window, and swapchain creation does the association. At this point
* we know the creation is going to succeed. */
/* If we have present timing, we need to make sure we get a useable estimate for refresh rate
* before we let the window run in full VRR. Once we have locked in the rate, we can enable VRR property. */
wsi_x11_set_adaptive_sync_property(conn, window,
wsi_device->enable_adaptive_sync);
@ -2889,6 +3334,18 @@ fail_alloc:
return result;
}
static uint32_t x11_hash_icd_surface(const void *key)
{
return _mesa_hash_data(key, sizeof(struct x11_icd_surface_key));
}
static bool x11_icd_surface_equal(const void *a_, const void *b_)
{
const struct x11_icd_surface_key *a = a_;
const struct x11_icd_surface_key *b = b_;
return a->conn == b->conn && a->window == b->window;
}
VkResult
wsi_x11_init_wsi(struct wsi_device *wsi_device,
const VkAllocationCallbacks *alloc,
@ -2916,8 +3373,7 @@ wsi_x11_init_wsi(struct wsi_device *wsi_device,
goto fail_alloc;
}
wsi->connections = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
wsi->connections = _mesa_hash_table_create(NULL, x11_hash_icd_surface, x11_icd_surface_equal);
if (!wsi->connections) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail_mutex;
@ -2981,7 +3437,7 @@ wsi_x11_finish_wsi(struct wsi_device *wsi_device,
if (wsi) {
hash_table_foreach(wsi->connections, entry)
wsi_x11_connection_destroy(wsi_device, entry->data);
wsi_x11_icd_surface_destroy(wsi_device, entry->data);
_mesa_hash_table_destroy(wsi->connections, NULL);