The patch changes the following:

* Implement scheduling support for headless present stages.
* Fix a bug where present query stages were added to the internal queue when application did not request any.
* Change the implementation of present timing extension to store device_private_data rather than VkDevice as all queries require it.
* Document the shortcomings of implenting scheduling support for Wayland backends.

Change-Id: I050cc700a88bce476b350caf8cc23dfb551f4a0c 
Signed-off-by: Normunds Rieksts normunds.rieksts@arm.com
This commit is contained in:
Normunds Rieksts 2025-07-30 12:27:09 +00:00 committed by Iason Paraskevopoulos
parent 1016e4d2e9
commit dfe88e3c1c
8 changed files with 225 additions and 55 deletions

View file

@ -52,17 +52,18 @@ const std::array<VkPresentStageFlagBitsEXT, MAX_PRESENT_STAGES> g_present_stages
wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator, VkDevice device, uint32_t num_images)
: m_allocator(allocator)
, m_time_domains(allocator)
, m_device(device)
, m_device(layer::device_private_data::get(device))
, m_query_pool(VK_NULL_HANDLE)
, m_command_pool(VK_NULL_HANDLE)
, m_command_buffer(allocator)
, m_queue_mutex()
, m_queue(allocator)
, m_scheduled_present_targets(allocator)
, m_num_images(num_images)
, m_present_semaphore(allocator)
, m_timestamp_period(0.f)
{
if (!layer::device_private_data::get(m_device).is_present_id_enabled())
if (!m_device.is_present_id_enabled())
{
WSI_LOG_ERROR(VK_EXT_PRESENT_TIMING_EXTENSION_NAME
" enabled but required extension " VK_KHR_PRESENT_ID_EXTENSION_NAME " is not enabled.");
@ -70,28 +71,26 @@ wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator,
VkPhysicalDeviceProperties2KHR physical_device_properties{};
physical_device_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
const auto &dev = layer::device_private_data::get(m_device);
auto &inst = layer::instance_private_data::get(dev.physical_device);
inst.disp.GetPhysicalDeviceProperties2KHR(dev.physical_device, &physical_device_properties);
auto &inst = layer::instance_private_data::get(m_device.physical_device);
inst.disp.GetPhysicalDeviceProperties2KHR(m_device.physical_device, &physical_device_properties);
m_timestamp_period = physical_device_properties.properties.limits.timestampPeriod;
}
wsi_ext_present_timing::~wsi_ext_present_timing()
{
const layer::device_private_data &device_data = layer::device_private_data::get(m_device);
device_data.disp.FreeCommandBuffers(m_device, m_command_pool, m_command_buffer.size(), m_command_buffer.data());
m_device.disp.FreeCommandBuffers(m_device.device, m_command_pool, m_command_buffer.size(), m_command_buffer.data());
for (auto &command_buffer : m_command_buffer)
{
command_buffer = VK_NULL_HANDLE;
}
if (m_command_pool != VK_NULL_HANDLE)
{
device_data.disp.DestroyCommandPool(m_device, m_command_pool, m_allocator.get_original_callbacks());
m_device.disp.DestroyCommandPool(m_device.device, m_command_pool, m_allocator.get_original_callbacks());
m_command_pool = VK_NULL_HANDLE;
}
if (m_query_pool != VK_NULL_HANDLE)
{
device_data.disp.DestroyQueryPool(m_device, m_query_pool, m_allocator.get_original_callbacks());
m_device.disp.DestroyQueryPool(m_device.device, m_query_pool, m_allocator.get_original_callbacks());
m_query_pool = VK_NULL_HANDLE;
}
@ -99,14 +98,18 @@ wsi_ext_present_timing::~wsi_ext_present_timing()
{
if (semaphore != VK_NULL_HANDLE)
{
device_data.disp.DestroySemaphore(m_device, semaphore, m_allocator.get_original_callbacks());
m_device.disp.DestroySemaphore(m_device.device, semaphore, m_allocator.get_original_callbacks());
}
}
}
VkResult wsi_ext_present_timing::init_timing_resources()
{
const layer::device_private_data &device_data = layer::device_private_data::get(m_device);
if (!m_scheduled_present_targets.try_resize(m_num_images))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
if (!m_present_semaphore.try_resize(m_num_images))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -116,8 +119,8 @@ VkResult wsi_ext_present_timing::init_timing_resources()
semaphore = VK_NULL_HANDLE;
VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
if (device_data.disp.CreateSemaphore(m_device, &semaphore_info, m_allocator.get_original_callbacks(),
&semaphore) != VK_SUCCESS)
if (m_device.disp.CreateSemaphore(m_device.device, &semaphore_info, m_allocator.get_original_callbacks(),
&semaphore) != VK_SUCCESS)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
@ -135,24 +138,24 @@ VkResult wsi_ext_present_timing::init_timing_resources()
VkQueryPoolCreateInfo query_pool_info = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, m_num_images, 0
};
TRY_LOG_CALL(device_data.disp.CreateQueryPool(m_device, &query_pool_info, m_allocator.get_original_callbacks(),
&m_query_pool));
TRY_LOG_CALL(m_device.disp.CreateQueryPool(m_device.device, &query_pool_info, m_allocator.get_original_callbacks(),
&m_query_pool));
VkCommandPoolCreateInfo command_pool_info{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 0 };
TRY_LOG_CALL(device_data.disp.CreateCommandPool(m_device, &command_pool_info, m_allocator.get_original_callbacks(),
&m_command_pool));
TRY_LOG_CALL(m_device.disp.CreateCommandPool(m_device.device, &command_pool_info,
m_allocator.get_original_callbacks(), &m_command_pool));
/* Allocate and write the command buffer. */
VkCommandBufferAllocateInfo command_buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
m_command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, m_num_images };
TRY_LOG_CALL(device_data.disp.AllocateCommandBuffers(m_device, &command_buffer_info, m_command_buffer.data()));
TRY_LOG_CALL(m_device.disp.AllocateCommandBuffers(m_device.device, &command_buffer_info, m_command_buffer.data()));
VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, 0, nullptr };
for (size_t image_index = 0; image_index < m_num_images; image_index++)
{
TRY_LOG_CALL(device_data.disp.BeginCommandBuffer(m_command_buffer[image_index], &begin_info));
device_data.disp.CmdResetQueryPool(m_command_buffer[image_index], m_query_pool, image_index, 1);
device_data.disp.CmdWriteTimestamp(m_command_buffer[image_index], VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
m_query_pool, image_index);
TRY_LOG_CALL(device_data.disp.EndCommandBuffer(m_command_buffer[image_index]));
TRY_LOG_CALL(m_device.disp.BeginCommandBuffer(m_command_buffer[image_index], &begin_info));
m_device.disp.CmdResetQueryPool(m_command_buffer[image_index], m_query_pool, image_index, 1);
m_device.disp.CmdWriteTimestamp(m_command_buffer[image_index], VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_query_pool,
image_index);
TRY_LOG_CALL(m_device.disp.EndCommandBuffer(m_command_buffer[image_index]));
}
return VK_SUCCESS;
}
@ -194,9 +197,8 @@ VkResult wsi_ext_present_timing::write_pending_results()
if (slot.is_pending(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT))
{
uint64_t time;
const layer::device_private_data &device_data = layer::device_private_data::get(m_device);
TRY(device_data.disp.GetQueryPoolResults(m_device, m_query_pool, slot.m_image_index, 1, sizeof(time), &time, 0,
VK_QUERY_RESULT_64_BIT));
TRY(m_device.disp.GetQueryPoolResults(m_device.device, m_query_pool, slot.m_image_index, 1, sizeof(time),
&time, 0, VK_QUERY_RESULT_64_BIT));
slot.set_stage_timing(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT, ticks_to_ns(time, m_timestamp_period));
}
if (slot.is_pending(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT))
@ -273,9 +275,8 @@ VkResult wsi_ext_present_timing::queue_submit_queue_end_timing(const layer::devi
return VK_SUCCESS;
}
VkResult wsi_ext_present_timing::add_presentation_entry(const layer::device_private_data &device, VkQueue queue,
uint64_t present_id, uint32_t image_index,
VkPresentStageFlagsEXT present_stage_queries)
VkResult wsi_ext_present_timing::add_presentation_query_entry(VkQueue queue, uint64_t present_id, uint32_t image_index,
VkPresentStageFlagsEXT present_stage_queries)
{
const std::lock_guard<std::mutex> lock(m_queue_mutex);
TRY_LOG_CALL(write_pending_results());
@ -293,8 +294,41 @@ VkResult wsi_ext_present_timing::add_presentation_entry(const layer::device_priv
}
if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)
{
TRY_LOG_CALL(queue_submit_queue_end_timing(device, queue, image_index));
TRY_LOG_CALL(queue_submit_queue_end_timing(m_device, queue, image_index));
}
return VK_SUCCESS;
}
void wsi_ext_present_timing::add_presentation_target_entry(uint32_t image_index,
const VkPresentTimingInfoEXT &timing_info)
{
assert(timing_info.targetPresentStage);
m_scheduled_present_targets[image_index] = scheduled_present_target(timing_info);
}
void wsi_ext_present_timing::remove_presentation_target_entry(uint32_t image_index)
{
m_scheduled_present_targets[image_index] = std::nullopt;
}
std::optional<scheduled_present_target> wsi_ext_present_timing::get_presentation_target_entry(uint32_t image_index)
{
return m_scheduled_present_targets[image_index];
}
VkResult wsi_ext_present_timing::add_presentation_entry(VkQueue queue, uint64_t present_id, uint32_t image_index,
const VkPresentTimingInfoEXT &timing_info)
{
if (timing_info.presentStageQueries)
{
TRY_LOG_CALL(add_presentation_query_entry(queue, present_id, image_index, timing_info.presentStageQueries));
}
if (timing_info.targetPresentStage)
{
add_presentation_target_entry(image_index, timing_info);
}
return VK_SUCCESS;
}

View file

@ -286,6 +286,27 @@ private:
util::vector<util::unique_ptr<swapchain_time_domain>> m_time_domains;
};
/**
* @brief Structure describing a scheduled present target.
*/
struct scheduled_present_target
{
scheduled_present_target(const VkPresentTimingInfoEXT &timing_info)
: m_target_stage(timing_info.targetPresentStage)
, m_time_domain_id(timing_info.timeDomainId)
, m_present_at_nearest_refresh_cycle(timing_info.presentAtNearestRefreshCycle)
, m_present_at_relative_time(timing_info.presentAtRelativeTime)
, m_target_present_time(timing_info.time)
{
}
VkPresentStageFlagsEXT m_target_stage;
uint64_t m_time_domain_id;
bool m_present_at_nearest_refresh_cycle;
bool m_present_at_relative_time;
VkPresentTimeEXT m_target_present_time;
};
/**
* @brief Present timing extension class
*
@ -362,8 +383,44 @@ public:
* @return VK_SUCCESS when the entry was inserted successfully and VK_ERROR_OUT_OF_HOST_MEMORY
* when there is no host memory.
*/
VkResult add_presentation_entry(const layer::device_private_data &device, VkQueue queue, uint64_t present_id,
uint32_t image_index, VkPresentStageFlagsEXT present_stage_queries);
VkResult add_presentation_query_entry(VkQueue queue, uint64_t present_id, uint32_t image_index,
VkPresentStageFlagsEXT present_stage_queries);
/**
* @brief Add a presentation target entry.
*
* @param image_index The index of the image in the swapchain.
* @param timing_info The timing info for the presentation target.
*/
void add_presentation_target_entry(uint32_t image_index, const VkPresentTimingInfoEXT &timing_info);
/**
* @brief Remove a presentation target entry.
*
* @param image_index The index of the image in the swapchain for which to remove the entry.
*/
void remove_presentation_target_entry(uint32_t image_index);
/**
* @brief Get the presentation target entry for @p image_index if any
*
* @param image_index The index of the image in the swapchain.
* @return Scheduled present target if any exists currently for the image.
*/
std::optional<scheduled_present_target> get_presentation_target_entry(uint32_t image_index);
/**
* @brief Add a presentation entry to the present timing queue.
*
* @param queue The Vulkan queue used to submit synchronization commands.
* @param present_id The present id of the current presentation.
* @param image_index The index of the image in the swapchain.
* @param timing_info The timing info for the presentation.
*
* @return VK_SUCCESS when the entry was inserted successfully, error otherwise.
*/
VkResult add_presentation_entry(VkQueue queue, uint64_t present_id, uint32_t image_index,
const VkPresentTimingInfoEXT &timing_info);
/**
* @brief Set the time for a stage, if it exists and is pending.
@ -451,9 +508,9 @@ private:
swapchain_time_domains m_time_domains;
/**
* @brief The Vulkan device.
* @brief The Vulkan layer device.
*/
VkDevice m_device;
layer::device_private_data &m_device;
/**
* @brief Query pool to allocate for present stage timing queries.
@ -484,6 +541,11 @@ private:
*/
util::vector<swapchain_presentation_entry> m_queue;
/**
* @brief The presentation target entries.
*/
util::vector<std::optional<scheduled_present_target>> m_scheduled_present_targets;
/**
* @brief The number of images in the swapchain.
*/

View file

@ -120,4 +120,35 @@ VkResult wsi_ext_present_timing_headless::get_swapchain_timing_properties(
return VK_SUCCESS;
}
std::optional<uint64_t> wsi_ext_present_timing_headless::get_current_clock_time_ns() const
{
if (!m_monotonic_domain.has_value())
{
return std::nullopt;
}
clockid_t clockid = CLOCK_MONOTONIC_RAW;
switch (*m_monotonic_domain)
{
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
clockid = CLOCK_MONOTONIC_RAW;
break;
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
clockid = CLOCK_MONOTONIC;
break;
default:
return std::nullopt;
}
struct timespec now = {};
if (clock_gettime(clockid, &now) != 0)
{
WSI_LOG_ERROR("Failed to get time of clock %d, error: %d (%s)", clockid, errno, strerror(errno));
return std::nullopt;
}
return now.tv_sec * static_cast<uint64_t>(1e9) + now.tv_nsec;
}
#endif

View file

@ -60,6 +60,13 @@ public:
return m_monotonic_domain;
}
/**
* @brief Get the current clock time by using clock_gettime with the monotonic time domain
*
* @return Current time in specified domain or std::nullopt in case of error.
*/
std::optional<uint64_t> get_current_clock_time_ns() const;
private:
wsi_ext_present_timing_headless(const util::allocator &allocator, VkDevice device, uint32_t num_images,
std::optional<VkTimeDomainEXT> monotonic_domain);

View file

@ -232,7 +232,7 @@ VkResult surface_properties::get_present_timing_surface_caps(
{
present_timing_surface_caps->presentTimingSupported = VK_TRUE;
present_timing_surface_caps->presentAtAbsoluteTimeSupported = VK_TRUE;
present_timing_surface_caps->presentAtRelativeTimeSupported = VK_TRUE;
present_timing_surface_caps->presentAtRelativeTimeSupported = VK_FALSE;
VkPresentStageFlagsEXT monotonic_present_stages_supported = 0;
std::array monotonic_domains = {

View file

@ -209,6 +209,43 @@ VkResult swapchain::create_swapchain_image(VkImageCreateInfo image_create_info,
void swapchain::present_image(const pending_present_request &pending_present)
{
#if VULKAN_WSI_LAYER_EXPERIMENTAL
auto *ext_present_timing = get_swapchain_extension<wsi_ext_present_timing_headless>();
if (ext_present_timing)
{
auto presentation_target = ext_present_timing->get_presentation_target_entry(pending_present.image_index);
if (presentation_target)
{
const VkPresentStageFlagsEXT supported_target_stages = VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT |
VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT |
VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT;
if ((presentation_target->m_target_stage & supported_target_stages) != 0)
{
/* No support for relative presentation mode currently */
assert(!presentation_target->m_present_at_relative_time);
if (!presentation_target->m_present_at_relative_time)
{
/* No need to check whether we need to present at nearest refresh cycle since this backend is not
limited by the refresh cycles. */
uint64_t absolute_future_present_time_ns = presentation_target->m_target_present_time.targetPresentTime;
auto current_time_ns = ext_present_timing->get_current_clock_time_ns();
if (*current_time_ns < absolute_future_present_time_ns)
{
/* Sleep until we can schedule the image for completion.
* This is OK as the sleep should only be dispatched on the page_flip thread and not on main. */
assert(m_page_flip_thread_run);
int64_t time_diff = absolute_future_present_time_ns - *current_time_ns;
std::this_thread::sleep_for(std::chrono::nanoseconds(time_diff));
}
}
}
}
ext_present_timing->remove_presentation_target_entry(pending_present.image_index);
}
#endif
if (m_device_data.is_present_id_enabled())
{
auto *ext_present_id = get_swapchain_extension<wsi_ext_present_id>(true);
@ -216,29 +253,23 @@ void swapchain::present_image(const pending_present_request &pending_present)
}
#if VULKAN_WSI_LAYER_EXPERIMENTAL
auto *ext_present_timing = get_swapchain_extension<wsi_ext_present_timing_headless>(false);
if (ext_present_timing && ext_present_timing->get_monotonic_domain().has_value())
{
clockid_t clockid = ext_present_timing->get_monotonic_domain().value() == VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT ?
CLOCK_MONOTONIC :
CLOCK_MONOTONIC_RAW;
struct timespec now = {};
if (clock_gettime(clockid, &now) != 0)
auto current_time = ext_present_timing->get_current_clock_time_ns();
if (!current_time.has_value())
{
WSI_LOG_ERROR("Failed to get time of clock %d, error: %d (%s)", clockid, errno, strerror(errno));
/* Set all times to 0 as we were not able to query them. */
current_time = 0;
}
else
VkPresentStageFlagBitsEXT stages[] = {
VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT,
VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT,
VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT,
};
for (auto stage : stages)
{
uint64_t time = now.tv_sec * 1e9 + now.tv_nsec;
VkPresentStageFlagBitsEXT stages[] = {
VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT,
VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT,
VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT,
};
for (auto stage : stages)
{
ext_present_timing->set_pending_stage_time(pending_present.image_index, stage, time);
}
ext_present_timing->set_pending_stage_time(pending_present.image_index, stage, *current_time);
}
}
#endif

View file

@ -674,8 +674,7 @@ VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *pr
{
auto *ext_present_timing = get_swapchain_extension<wsi::wsi_ext_present_timing>(true);
TRY_LOG_CALL(ext_present_timing->add_presentation_entry(
m_device_data, queue, submit_info.pending_present.present_id, submit_info.pending_present.image_index,
present_timing_info->presentStageQueries));
queue, submit_info.pending_present.present_id, submit_info.pending_present.image_index, *present_timing_info));
}
#endif
TRY(notify_presentation_engine(submit_info.pending_present));

View file

@ -448,6 +448,12 @@ VkResult surface_properties::get_present_timing_surface_caps(
present_timing_surface_caps->presentAtAbsoluteTimeSupported = VK_FALSE;
present_timing_surface_caps->presentAtRelativeTimeSupported = VK_FALSE;
present_timing_surface_caps->presentStageQueries = VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT;
/* The extension supports scheduling targets only on FIFO & FIFO_RELAXED modes. We currently only have
support for scheduling presents when using the presentation thread. While FIFO runs on Wayland in
threaded mode, FIFO_RELAXED does not. If you are adding any supported stage to presentStageTargets,
make sure to check that swapchain cannot be created with present timing support on present modes that
do not use presentation thread unless support has been added in other ways. */
present_timing_surface_caps->presentStageTargets = 0;
if (specific_surface->get_presentation_time_interface() != nullptr)