Acquiring timings for VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT

This commit is contained in:
Ginu Jacob 2025-05-28 15:59:16 +00:00 committed by Iason Paraskevopoulos
parent 20f64a2e48
commit 5d4b84408a
15 changed files with 808 additions and 101 deletions

View file

@ -108,10 +108,15 @@ wsi_layer_vkGetPastPresentationTimingEXT(
VkDevice device, const VkPastPresentationTimingInfoEXT *pPastPresentationTimingInfo,
VkPastPresentationTimingPropertiesEXT *pPastPresentationTimingProperties) VWL_API_POST
{
UNUSED(device);
UNUSED(pPastPresentationTimingInfo);
UNUSED(pPastPresentationTimingProperties);
VkResult result = VK_SUCCESS;
return result;
assert(pPastPresentationTimingInfo != nullptr);
auto &device_data = layer::device_private_data::get(device);
if (!device_data.layer_owns_swapchain(pPastPresentationTimingInfo->swapchain))
{
return device_data.disp.GetPastPresentationTimingEXT(device, pPastPresentationTimingInfo,
pPastPresentationTimingProperties);
}
auto *sc = reinterpret_cast<wsi::swapchain_base *>(pPastPresentationTimingInfo->swapchain);
auto *ext = sc->get_swapchain_extension<wsi::wsi_ext_present_timing>(true);
return ext->get_past_presentation_results(pPastPresentationTimingProperties);
}
#endif /* VULKAN_WSI_LAYER_EXPERIMENTAL */

View file

@ -356,7 +356,8 @@ private:
EP(GetSwapchainTimeDomainPropertiesEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, ) \
EP(GetSwapchainTimingPropertiesEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, ) \
EP(SetSwapchainPresentTimingQueueSizeEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, ) \
EP(WaitForPresentKHR, VK_KHR_PRESENT_WAIT_EXTENSION_NAME, API_VERSION_MAX, false, )
EP(WaitForPresentKHR, VK_KHR_PRESENT_WAIT_EXTENSION_NAME, API_VERSION_MAX, false, ) \
EP(GetPastPresentationTimingEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, )
#else
#define DEVICE_ENTRYPOINTS_LIST_EXPERIMENTAL(EP)
#endif
@ -390,6 +391,11 @@ private:
EP(ResetFences, "", VK_API_VERSION_1_0, true, ) \
EP(WaitForFences, "", VK_API_VERSION_1_0, true, ) \
EP(DestroyDevice, "", VK_API_VERSION_1_0, true, ) \
EP(CmdResetQueryPool, "", VK_API_VERSION_1_0, true, ) \
EP(CmdWriteTimestamp, "", VK_API_VERSION_1_0, true, ) \
EP(CreateQueryPool, "", VK_API_VERSION_1_0, true, ) \
EP(DestroyQueryPool, "", VK_API_VERSION_1_0, true, ) \
EP(GetQueryPoolResults, "", VK_API_VERSION_1_0, true, ) \
/* VK_KHR_swapchain */ \
EP(CreateSwapchainKHR, VK_KHR_SWAPCHAIN_EXTENSION_NAME, API_VERSION_MAX, false, ) \
EP(DestroySwapchainKHR, VK_KHR_SWAPCHAIN_EXTENSION_NAME, API_VERSION_MAX, false, ) \

View file

@ -181,6 +181,10 @@ typedef VkResult(VKAPI_PTR *PFN_vkGetSwapchainTimingPropertiesEXT)(
typedef VkResult(VKAPI_PTR *PFN_vkSetSwapchainPresentTimingQueueSizeEXT)(VkDevice device, VkSwapchainKHR swapchain,
uint32_t size);
typedef VkResult(VKAPI_PTR *PFN_vkGetPastPresentationTimingEXT)(
VkDevice device, const VkPastPresentationTimingInfoEXT *pPastPresentationTimingInfo,
VkPastPresentationTimingPropertiesEXT *pPastPresentationTimingProperties);
VWL_VKAPI_CALL(VkResult)
wsi_layer_vkSetSwapchainPresentTimingQueueSizeEXT(VkDevice device, VkSwapchainKHR swapchain,
uint32_t size) VWL_API_POST;

View file

@ -27,6 +27,7 @@
*
* @brief Contains the implentation for the VK_EXT_present_timing extension.
*/
#include <array>
#include <cassert>
#include <wsi/swapchain_base.hpp>
@ -35,16 +36,141 @@
#if VULKAN_WSI_LAYER_EXPERIMENTAL
namespace wsi
{
/* VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT,
* VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT,
* VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT,
* VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT
*/
static constexpr size_t MAX_PRESENT_STAGES = 4;
const std::array<VkPresentStageFlagBitsEXT, MAX_PRESENT_STAGES> g_present_stages = {
VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT, VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT,
VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT, VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT
};
wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator)
wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator, VkDevice device, uint32_t num_images)
: m_allocator(allocator)
, m_queue(allocator)
, m_time_domains(allocator)
, m_device(device)
, m_query_pool(VK_NULL_HANDLE)
, m_command_pool(VK_NULL_HANDLE)
, m_command_buffer(allocator)
, m_queue(allocator)
, m_num_images(num_images)
, m_present_semaphore(allocator)
{
}
wsi_ext_present_timing::~wsi_ext_present_timing()
{
const layer::device_private_data &device_data = layer::device_private_data::get(m_device);
device_data.disp.FreeCommandBuffers(m_device, m_command_pool, m_command_buffer.size(), m_command_buffer.data());
for (auto &command_buffer : m_command_buffer)
{
command_buffer = VK_NULL_HANDLE;
}
if (m_command_pool != VK_NULL_HANDLE)
{
device_data.disp.DestroyCommandPool(m_device, m_command_pool, m_allocator.get_original_callbacks());
m_command_pool = VK_NULL_HANDLE;
}
if (m_query_pool != VK_NULL_HANDLE)
{
device_data.disp.DestroyQueryPool(m_device, m_query_pool, m_allocator.get_original_callbacks());
m_query_pool = VK_NULL_HANDLE;
}
for (auto semaphore : m_present_semaphore)
{
if (semaphore != VK_NULL_HANDLE)
{
device_data.disp.DestroySemaphore(m_device, semaphore, m_allocator.get_original_callbacks());
}
}
}
VkResult wsi_ext_present_timing::init_timing_resources()
{
const layer::device_private_data &device_data = layer::device_private_data::get(m_device);
if (!m_present_semaphore.try_resize(m_num_images))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto &semaphore : m_present_semaphore)
{
semaphore = VK_NULL_HANDLE;
VkSemaphoreCreateInfo semaphore_info = {};
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
if (device_data.disp.CreateSemaphore(m_device, &semaphore_info, m_allocator.get_original_callbacks(),
&semaphore) != VK_SUCCESS)
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
}
/* Resize the command buffer to the number of images. */
if (!m_command_buffer.try_resize(m_num_images))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto &command_buffer : m_command_buffer)
{
command_buffer = VK_NULL_HANDLE;
}
/* Allocate the command pool and query pool. */
VkQueryPoolCreateInfo query_pool_info = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, m_num_images, 0
};
TRY_LOG_CALL(device_data.disp.CreateQueryPool(m_device, &query_pool_info, m_allocator.get_original_callbacks(),
&m_query_pool));
VkCommandPoolCreateInfo command_pool_info{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 0 };
TRY_LOG_CALL(device_data.disp.CreateCommandPool(m_device, &command_pool_info, m_allocator.get_original_callbacks(),
&m_command_pool));
/* Allocate and write the command buffer. */
VkCommandBufferAllocateInfo command_buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
m_command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, m_num_images };
TRY_LOG_CALL(device_data.disp.AllocateCommandBuffers(m_device, &command_buffer_info, m_command_buffer.data()));
VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, 0, nullptr };
for (size_t image_index = 0; image_index < m_num_images; image_index++)
{
TRY_LOG_CALL(device_data.disp.BeginCommandBuffer(m_command_buffer[image_index], &begin_info));
device_data.disp.CmdResetQueryPool(m_command_buffer[image_index], m_query_pool, image_index, 1);
device_data.disp.CmdWriteTimestamp(m_command_buffer[image_index], VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
m_query_pool, image_index);
TRY_LOG_CALL(device_data.disp.EndCommandBuffer(m_command_buffer[image_index]));
}
return VK_SUCCESS;
}
VkResult wsi_ext_present_timing::get_queue_end_timing_to_queue(uint32_t image_index)
{
for (auto &slot : m_queue)
{
if ((slot.m_image_index == image_index) && slot.is_pending(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT))
{
uint64_t time;
auto stage_timing_optional = slot.get_stage_timing(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT);
const layer::device_private_data &device_data = layer::device_private_data::get(m_device);
TRY(device_data.disp.GetQueryPoolResults(m_device, m_query_pool, image_index, 1, sizeof(time), &time, 0,
VK_QUERY_RESULT_64_BIT));
stage_timing_optional->get().m_time.store(time);
/* For an image index, there can only be one entry in the internal queue with pending results. */
break;
}
}
return VK_SUCCESS;
}
VkResult wsi_ext_present_timing::query_present_queue_end_timings()
{
for (uint32_t image_index = 0; image_index < m_num_images; ++image_index)
{
VkResult result = get_queue_end_timing_to_queue(image_index);
if ((result != VK_SUCCESS) && (result != VK_NOT_READY))
{
return result;
}
}
return VK_SUCCESS;
}
VkResult wsi_ext_present_timing::present_timing_queue_set_size(size_t queue_size)
@ -60,17 +186,17 @@ VkResult wsi_ext_present_timing::present_timing_queue_set_size(size_t queue_size
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto iter : m_queue.m_timings)
for (auto &iter : m_queue)
{
if (iter.is_outstanding)
if (iter.has_outstanding_stages())
{
if (!presentation_timing.try_push_back(iter))
if (!presentation_timing.try_push_back(std::move(iter)))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
}
}
m_queue.m_timings.swap(presentation_timing);
m_queue.swap(presentation_timing);
return VK_SUCCESS;
}
@ -78,9 +204,9 @@ size_t wsi_ext_present_timing::present_timing_get_num_outstanding_results()
{
size_t num_outstanding = 0;
for (const auto &iter : m_queue.m_timings)
for (auto &iter : m_queue)
{
if (iter.is_outstanding)
if (iter.has_outstanding_stages())
{
num_outstanding++;
}
@ -88,12 +214,47 @@ size_t wsi_ext_present_timing::present_timing_get_num_outstanding_results()
return num_outstanding;
}
VkResult wsi_ext_present_timing::add_presentation_entry(const wsi::swapchain_presentation_entry &presentation_entry)
VkResult wsi_ext_present_timing::queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue,
uint32_t image_index)
{
if (!m_queue.m_timings.try_push_back(presentation_entry))
assert(image_index < m_command_buffer.size());
command_buffer_data command_buffer_data(&m_command_buffer[image_index], 1);
VkSemaphore present_timing_semaphore = get_image_present_semaphore(image_index);
queue_submit_semaphores present_timing_semaphores = {
&present_timing_semaphore,
1,
nullptr,
0,
};
TRY_LOG_CALL(sync_queue_submit(device, queue, VK_NULL_HANDLE, present_timing_semaphores, command_buffer_data));
return VK_SUCCESS;
}
VkResult wsi_ext_present_timing::add_presentation_entry(const layer::device_private_data &device, VkQueue queue,
uint64_t present_id, uint32_t image_index,
VkPresentStageFlagsEXT present_stage_queries)
{
if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)
{
/* Get results for the previous presentation. The queue end stage of
* the previous presentation for the same image must had
* finished when the same image is going to be presented again. */
TRY_LOG_CALL(get_queue_end_timing_to_queue(image_index));
}
/* Keep the internal queue to the limit defined by the application. */
if (m_queue.size() == m_queue.capacity())
{
return VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT;
}
wsi::swapchain_presentation_entry presentation_entry(present_stage_queries, present_id, image_index);
if (!m_queue.try_push_back(std::move(presentation_entry)))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)
{
TRY_LOG_CALL(queue_submit_queue_end_timing(device, queue, image_index));
}
return VK_SUCCESS;
}
@ -102,6 +263,235 @@ swapchain_time_domains &wsi_ext_present_timing::get_swapchain_time_domains()
return m_time_domains;
}
VkSemaphore wsi_ext_present_timing::get_image_present_semaphore(uint32_t image_index)
{
return m_present_semaphore[image_index];
}
uint32_t wsi_ext_present_timing::get_num_available_results()
{
uint32_t num_pending_results = 0;
for (auto &slot : m_queue)
{
if (slot.has_completed_stages())
{
num_pending_results++;
}
}
return num_pending_results;
}
VkResult wsi_ext_present_timing::get_past_presentation_results(
VkPastPresentationTimingPropertiesEXT *past_present_timing_properties)
{
assert(past_present_timing_properties != nullptr);
/* Get any outstanding timings in the query pool to the internal queue. */
TRY_LOG_CALL(query_present_queue_end_timings());
if ((past_present_timing_properties->presentationTimingCount == 0) ||
(past_present_timing_properties->pPresentationTimings == nullptr))
{
past_present_timing_properties->presentationTimingCount = get_num_available_results();
return VK_SUCCESS;
}
/* When application request entries with multiple zero present ids or combination of zero and
* non-zero present ids, this field helps avoiding the same slot getting copied to the results.
*/
for (auto &slot : m_queue)
{
slot.copied = false;
}
/* When application request entries with presentIds in an order where there are presentId=0
* requested earlier than presentId!=0, then the incoming pointer get filled with first available
* slots when handling the zero presentIds. Later when non-zero presentIds are handled, if the
* matching slot was already copied to the output, then no slot will be copied for that.
* This creates a situation where a fewer results being responded for that particular request
* compared to the amount that would have achieved with handling non-zeros first and zeros later. */
uint32_t count_results = 0;
for (uint32_t i = 0; i < past_present_timing_properties->presentationTimingCount; ++i)
{
bool timings_found = false;
if (count_results == past_present_timing_properties->presentationTimingCount)
{
if (count_results < get_num_available_results())
{
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
VkPastPresentationTimingEXT &timing = past_present_timing_properties->pPresentationTimings[i];
for (auto slot = m_queue.begin(); slot != m_queue.end();)
{
if ((!(*slot).copied) && (*slot).has_completed_stages())
{
/* There will be only one slot in the queue per presentId. */
if ((timing.presentId == 0) || (timing.presentId == (*slot).m_present_id))
{
assert(timing.presentStageCount >= (*slot).m_num_present_stages);
if (((*slot).populate(timing)))
{
count_results++;
(*slot).copied = true;
timings_found = true;
if (timing.reportComplete)
{
slot = m_queue.erase(slot);
continue;
}
}
}
}
slot++;
}
/* When the timings are not filled, reset the count to zero. */
if (!timings_found)
{
timing.presentStageCount = 0;
}
}
if ((count_results < past_present_timing_properties->presentationTimingCount) ||
(count_results < get_num_available_results()))
{
past_present_timing_properties->presentationTimingCount = count_results;
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
swapchain_presentation_entry::swapchain_presentation_entry(VkPresentStageFlagsEXT present_stage_queries,
uint64_t present_id, uint32_t image_index)
: m_target_stages(0)
, m_present_id(present_id)
, m_image_index(image_index)
, m_num_present_stages(0)
{
if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)
{
m_queue_end_timing = swapchain_presentation_timing();
m_num_present_stages++;
}
if (present_stage_queries & VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT)
{
m_latch_timing = swapchain_presentation_timing();
m_num_present_stages++;
}
if (present_stage_queries & VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT)
{
m_first_pixel_out_timing = swapchain_presentation_timing();
m_num_present_stages++;
}
if (present_stage_queries & VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT)
{
m_first_pixel_visible_timing = swapchain_presentation_timing();
m_num_present_stages++;
}
}
bool swapchain_presentation_entry::is_pending(VkPresentStageFlagBitsEXT stage)
{
auto stage_timing_optional = get_stage_timing(stage);
if (stage_timing_optional.has_value() && (stage_timing_optional->get().m_time.load() == 0))
{
return true;
}
return false;
}
bool swapchain_presentation_entry::is_complete(VkPresentStageFlagBitsEXT stage)
{
auto stage_timing_optional = get_stage_timing(stage);
if (stage_timing_optional.has_value() && (stage_timing_optional->get().m_time.load() != 0))
{
return true;
}
return false;
}
bool swapchain_presentation_entry::has_outstanding_stages()
{
/* Check if any of the requested stages is pending to be completed. */
return (is_pending(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) ||
is_pending(VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT) ||
is_pending(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT) ||
is_pending(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT));
}
bool swapchain_presentation_entry::has_completed_stages()
{
/* Check if any of the requested stages is complete. */
return (is_complete(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) ||
is_complete(VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT) ||
is_complete(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT) ||
is_complete(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT));
}
std::optional<std::reference_wrapper<swapchain_presentation_timing>> swapchain_presentation_entry::get_stage_timing(
VkPresentStageFlagBitsEXT stage)
{
switch (stage)
{
case VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT:
if (m_queue_end_timing.has_value())
{
return *m_queue_end_timing;
}
break;
case VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT:
if (m_latch_timing.has_value())
{
return *m_latch_timing;
}
break;
case VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT:
if (m_first_pixel_out_timing.has_value())
{
return *m_first_pixel_out_timing;
}
break;
case VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT:
if (m_first_pixel_visible_timing.has_value())
{
return *m_first_pixel_visible_timing;
}
break;
default:
assert(0);
}
return std::nullopt;
}
bool swapchain_presentation_entry::populate(VkPastPresentationTimingEXT &timing)
{
uint64_t stage_index = 0;
for (const auto &stage : g_present_stages)
{
auto stage_timing_optional = get_stage_timing(stage);
if (!stage_timing_optional.has_value())
{
continue;
}
uint64_t time = stage_timing_optional->get().m_time.load();
if (time > 0)
{
timing.timeDomainId = stage_timing_optional->get().m_timedomain_id;
timing.pPresentStages[stage_index].stage = stage;
timing.pPresentStages[stage_index++].time = time;
}
}
timing.presentStageCount = stage_index;
/* If atleast one entry is made to the timings, update the other fields. */
if (stage_index != 0)
{
/* and all requested stages in the entry had been responded,
* set the report complete to true. */
timing.presentId = m_present_id;
/* All the available stages are now populated. If there are no more outstanding stages,
* then the report is complete and the slot can be freed. */
timing.reportComplete = !has_outstanding_stages();
return true;
}
return false;
}
VkResult swapchain_time_domains::calibrate(VkPresentStageFlagBitsEXT present_stage,
swapchain_calibrated_time *calibrated_time)
{

View file

@ -32,12 +32,16 @@
#pragma once
#include <layer/wsi_layer_experimental.hpp>
#include <layer/private_data.hpp>
#include <util/custom_allocator.hpp>
#include <util/macros.hpp>
#include <atomic>
#include <iterator>
#include <type_traits>
#include <array>
#include <optional>
#include <functional>
#include "wsi_extension.hpp"
@ -45,6 +49,39 @@
namespace wsi
{
/**
* @brief Swapchain presentation timing
*
* This structure is used to keep the timing parameters for various presentation stages.
*
*/
struct swapchain_presentation_timing
{
uint64_t m_timedomain_id{ 0 };
/* Using atomics to enforce sequentially consistent ordering */
std::atomic<uint64_t> m_time{ 0 };
swapchain_presentation_timing()
: m_timedomain_id(0)
, m_time(0)
{
}
swapchain_presentation_timing(swapchain_presentation_timing &&rhs) noexcept
{
m_timedomain_id = rhs.m_timedomain_id;
m_time.store(rhs.m_time.load());
}
swapchain_presentation_timing &operator=(swapchain_presentation_timing &&rhs) noexcept
{
m_timedomain_id = rhs.m_timedomain_id;
m_time.store(rhs.m_time.load());
return *this;
}
swapchain_presentation_timing(const swapchain_presentation_timing &) = delete;
swapchain_presentation_timing &operator=(const swapchain_presentation_timing &) = delete;
};
/**
* @brief Swapchain presentation entry
*
@ -54,39 +91,108 @@ namespace wsi
struct swapchain_presentation_entry
{
/**
* Whether this entry is an outstanding result or not.
* The target stages for the presentation entry.
*/
bool is_outstanding{ false };
/**
* The present id.
*/
uint64_t present_id{ 0 };
};
VkPresentStageFlagsEXT m_target_stages{ 0 };
/**
* @brief Timings queue
*
* This structure is used to keep the parameters related to the presentation timing queue.
*
* The present id. Zero is a valid value for present id.
*/
struct timings_queue
{
timings_queue(const util::allocator &allocator)
: m_timings(allocator)
{
}
uint64_t m_present_id{ 0 };
util::vector<swapchain_presentation_entry> m_timings;
/**
* The image index of the entry in the swapchain.
*/
uint32_t m_image_index{ 0 };
/**
* The number of requested stages for this entry.
*/
size_t m_num_present_stages;
/**
* When serving a get past presentation timings request, this field
* keep the status of whether the slot had already been copied to
* the results.
*/
bool copied;
/**
* The variables to keep timing stages.
*/
std::optional<swapchain_presentation_timing> m_queue_end_timing;
std::optional<swapchain_presentation_timing> m_latch_timing;
std::optional<swapchain_presentation_timing> m_first_pixel_out_timing;
std::optional<swapchain_presentation_timing> m_first_pixel_visible_timing;
swapchain_presentation_entry(VkPresentStageFlagsEXT present_stage_queries, uint64_t present_id,
uint32_t image_index);
swapchain_presentation_entry(swapchain_presentation_entry &&) noexcept = default;
swapchain_presentation_entry &operator=(swapchain_presentation_entry &&) noexcept = default;
swapchain_presentation_entry(const swapchain_presentation_entry &) = delete;
swapchain_presentation_entry &operator=(const swapchain_presentation_entry &) = delete;
/**
* @brief This API returns true when the requested stage timing is pending.
*
* @param stage The stage to get the status for.
*
* @return true when the stage is pending and false otherwise.
*/
bool is_pending(VkPresentStageFlagBitsEXT stage);
/**
* @brief This API returns true when the requested stage timing is completed.
*
* @param stage The stage to get the status for.
*
* @return true when the stage is completed and false otherwise.
*/
bool is_complete(VkPresentStageFlagBitsEXT stage);
/**
* @brief This API returns true when there are outstanding stages and false otherwise.
*
* @return true when there are outstanding stages and false otherwise.
*/
bool has_outstanding_stages();
/**
* @brief This API returns true when there are completed stages and false otherwise.
*
* @return true when there are completed stages and false otherwise.
*/
bool has_completed_stages();
/**
* @brief This API populates the timing parameters from the swapchain_presentation_entry for all stages.
*
* @param timing Reference to the timing to be populated.
*
* @return true when atleast one stage is populated from the swapchain_presentation_entry and false otherwise.
*/
bool populate(VkPastPresentationTimingEXT &timing);
/**
* @brief This API retuns and optional reference to a particular stage of the swapchain_presentation_entry.
*
* @param stage The stage to get the timing for.
*
* @return optional reference to the particular stage, std::nullopt if the stage doesn't exit.
*/
std::optional<std::reference_wrapper<swapchain_presentation_timing>> get_stage_timing(
VkPresentStageFlagBitsEXT stage);
};
// Predefined struct for calibrated time
/* Predefined struct for calibrated time */
struct swapchain_calibrated_time
{
VkTimeDomainKHR time_domain;
uint64_t offset;
};
// Base struct for swapchain time domain
/* Base struct for swapchain time domain */
class swapchain_time_domain
{
public:
@ -188,9 +294,10 @@ public:
template <typename T, std::size_t N>
static util::unique_ptr<T> create(const util::allocator &allocator,
std::array<util::unique_ptr<wsi::vulkan_time_domain>, N> &domains)
std::array<util::unique_ptr<wsi::vulkan_time_domain>, N> &domains, VkDevice device,
uint32_t num_images)
{
auto present_timing = allocator.make_unique<T>(allocator);
auto present_timing = allocator.make_unique<T>(allocator, device, num_images);
for (auto &domain : domains)
{
if (!present_timing->get_swapchain_time_domains().add_time_domain(std::move(domain)))
@ -199,14 +306,24 @@ public:
return nullptr;
}
}
if (present_timing->init_timing_resources() != VK_SUCCESS)
{
WSI_LOG_ERROR("Failed to initialize present timing.");
return nullptr;
}
return present_timing;
}
/**
* @brief Constructor for the wsi_ext_present_timing class.
*
* @param allocator Reference to the custom allocator.
* @param device The device to which the swapchain belongs.
* @param num_images Number of images in the swapchain.
*
*/
wsi_ext_present_timing(const util::allocator &allocator);
wsi_ext_present_timing(const util::allocator &allocator, VkDevice device, uint32_t num_images);
/**
* @brief Destructor for the wsi_ext_present_timing class.
@ -239,14 +356,35 @@ public:
/**
* @brief Add a presentation entry to the present timing queue.
*
* This API pushes a presentation entry to the present timing queue.
*
* @param sc_presentation_entry Reference to the presentation entry to be added.
* @param device The device private data.
* @param queue The Vulkan queue used to submit synchronization commands.
* @param present_id The present id of the current presentation.
* @param image_index The index of the image in the swapchain.
* @param present_stage_queries The present stages application had requested timings for.
*
* @return VK_SUCCESS when the entry was inserted successfully and VK_ERROR_OUT_OF_HOST_MEMORY
* when there is no host memory.
*/
VkResult add_presentation_entry(const wsi::swapchain_presentation_entry &sc_presentation_entry);
VkResult add_presentation_entry(const layer::device_private_data &device, VkQueue queue, uint64_t present_id,
uint32_t image_index, VkPresentStageFlagsEXT present_stage_queries);
/**
* @brief Get the image's present semaphore.
*
* @param image_index Image's index
*
* @return the image's present semaphore.
*/
VkSemaphore get_image_present_semaphore(uint32_t image_index);
/**
* @brief Get the results of the past presentation from the internal queue.
*
* @param past_present_timing_properties Pointer for returing results.
*
* @return VK_SUCCESS when the requested results are returned, VK_INCOMPLETE when returning fewer results.
*/
VkResult get_past_presentation_results(VkPastPresentationTimingPropertiesEXT *past_present_timing_properties);
/**
* @brief Get the swapchain time domains
@ -271,15 +409,90 @@ protected:
const util::allocator m_allocator;
private:
/**
* @brief The presentation timing queue.
*/
timings_queue m_queue;
/**
* @brief Handle the backend specific time domains for each present stage.
*/
swapchain_time_domains m_time_domains;
/**
* @brief The Vulkan device.
*/
VkDevice m_device;
/**
* @brief Query pool to allocate for present stage timing queries.
*/
VkQueryPool m_query_pool;
/**
* @brief The command pool for allocating the buffers for the present stage timings.
*/
VkCommandPool m_command_pool;
/**
* @brief The command buffer for the present stage timings.
*/
util::vector<VkCommandBuffer> m_command_buffer;
/**
* @brief The presentation timing queue.
*/
util::vector<swapchain_presentation_entry> m_queue;
/**
* @brief The number of images in the swapchain.
*/
uint32_t m_num_images;
/**
* @brief Semaphore per image.
*/
util::vector<VkSemaphore> m_present_semaphore;
/**
* @brief This API does the queue submission for getting the queue end timing.
*
* @param device The device private data.
* @param queue The Vulkan queue used to submit synchronization commands.
* @param image_index The index of the image in the swapchain.
*
* @return VK_SUCCESS when the submission is successfully and error otherwise.
*/
VkResult queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue,
uint32_t image_index);
/**
* @brief This API initializes the resources for timing query such as the
* command buffer, command pool and query pool.
*
* @return VK_SUCCESS if the initialization is successful and error if otherwise.
*/
VkResult init_timing_resources();
/**
* @brief This API is called to query the queue end timings for a particular image index
* and store it in the internal queue.
*
* @param image_index The index of the image in the swapchain.
*
* @return VK_SUCCESS if the query is successful and error if otherwise.
*/
VkResult get_queue_end_timing_to_queue(uint32_t image_index);
/**
* @brief This API is called to get all the timings in the query pool to the
* internal queue and tries clearing it.
*
* @return VK_SUCCESS if the records are copied successfully or partially.
*/
VkResult query_present_queue_end_timings();
/**
* @brief Get the number of results that are available in the internal queue.
*
* @return The number of available results.
*/
uint32_t get_num_available_results();
};
} /* namespace wsi */

View file

@ -35,8 +35,9 @@
#include "present_timing_handler.hpp"
#include "layer/private_data.hpp"
wsi_ext_present_timing_headless::wsi_ext_present_timing_headless(const util::allocator &allocator)
: wsi::wsi_ext_present_timing(allocator)
wsi_ext_present_timing_headless::wsi_ext_present_timing_headless(const util::allocator &allocator, VkDevice device,
uint32_t num_images)
: wsi::wsi_ext_present_timing(allocator, device, num_images)
{
}
/**
@ -87,7 +88,7 @@ static std::optional<bool> is_time_domain_clock_monotonic_raw_supported(const Vk
}
util::unique_ptr<wsi_ext_present_timing_headless> wsi_ext_present_timing_headless::create(
const VkDevice &device, const util::allocator &allocator)
const VkDevice &device, const util::allocator &allocator, uint32_t num_images)
{
/*
* Select the hardware raw monotonic clock domain (unaffected by NTP or adjtime adjustments)
@ -115,7 +116,8 @@ util::unique_ptr<wsi_ext_present_timing_headless> wsi_ext_present_timing_headles
monotonic_time_domain)
};
return wsi_ext_present_timing::create<wsi_ext_present_timing_headless>(allocator, time_domains_array);
return wsi_ext_present_timing::create<wsi_ext_present_timing_headless>(allocator, time_domains_array, device,
num_images);
}
VkResult wsi_ext_present_timing_headless::get_swapchain_timing_properties(

View file

@ -42,13 +42,14 @@ class wsi_ext_present_timing_headless : public wsi::wsi_ext_present_timing
{
public:
static util::unique_ptr<wsi_ext_present_timing_headless> create(const VkDevice &device,
const util::allocator &allocator);
const util::allocator &allocator,
uint32_t num_images);
VkResult get_swapchain_timing_properties(uint64_t &timing_properties_counter,
VkSwapchainTimingPropertiesEXT &timing_properties) override;
private:
wsi_ext_present_timing_headless(const util::allocator &allocator);
wsi_ext_present_timing_headless(const util::allocator &allocator, VkDevice device, uint32_t num_images);
/* Allow util::allocator to access the private constructor */
friend util::allocator;

View file

@ -101,7 +101,8 @@ VkResult swapchain::add_required_extensions(VkDevice device, const VkSwapchainCr
bool swapchain_support_enabled = swapchain_create_info->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT;
if (swapchain_support_enabled)
{
if (!add_swapchain_extension(wsi_ext_present_timing_headless::create(device, m_allocator)))
if (!add_swapchain_extension(
wsi_ext_present_timing_headless::create(device, m_allocator, swapchain_create_info->minImageCount)))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}

View file

@ -587,16 +587,6 @@ VkResult swapchain_base::notify_presentation_engine(const pending_present_reques
VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *present_info,
const swapchain_presentation_parameters &submit_info)
{
#if VULKAN_WSI_LAYER_EXPERIMENTAL
auto *ext = get_swapchain_extension<wsi::wsi_ext_present_timing>();
if (ext)
{
wsi::swapchain_presentation_entry presentation_entry = {};
presentation_entry.present_id = submit_info.pending_present.present_id;
TRY_LOG_CALL(ext->add_presentation_entry(presentation_entry));
}
#endif
if (submit_info.switch_presentation_mode)
{
/* Assert when a presentation mode switch is requested and the swapchain_maintenance1 extension which implements this is not available */
@ -624,6 +614,7 @@ VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *pr
}
void *submission_pnext = nullptr;
uint32_t count_signal_semaphores = 0;
std::optional<VkFrameBoundaryEXT> frame_boundary;
/* Do not handle the event if it was handled before reaching this point */
if (submit_info.handle_present_frame_boundary_event)
@ -637,14 +628,32 @@ VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *pr
submission_pnext = &frame_boundary.value();
}
}
if (submit_info.present_fence != VK_NULL_HANDLE)
{
signal_semaphores[count_signal_semaphores++] =
m_swapchain_images[submit_info.pending_present.image_index].present_fence_wait;
}
#if VULKAN_WSI_LAYER_EXPERIMENTAL
const VkPresentTimingInfoEXT *present_timing_info = VK_NULL_HANDLE;
const auto *present_timings_info =
util::find_extension<VkPresentTimingsInfoEXT>(VK_STRUCTURE_TYPE_PRESENT_TIMINGS_INFO_EXT, present_info->pNext);
if (present_timings_info != VK_NULL_HANDLE)
{
present_timing_info = present_timings_info->pTimingInfos;
assert(present_timing_info != VK_NULL_HANDLE);
if (present_timing_info->presentStageQueries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)
{
auto *ext_present_timing = get_swapchain_extension<wsi::wsi_ext_present_timing>(true);
signal_semaphores[count_signal_semaphores++] =
ext_present_timing->get_image_present_semaphore(submit_info.pending_present.image_index);
}
}
#endif
queue_submit_semaphores semaphores = {
wait_semaphores,
sem_count,
(submit_info.present_fence != VK_NULL_HANDLE) ?
&m_swapchain_images[submit_info.pending_present.image_index].present_fence_wait :
nullptr,
(submit_info.present_fence != VK_NULL_HANDLE) ? 1u : 0,
count_signal_semaphores > 0 ? signal_semaphores.data() : nullptr,
count_signal_semaphores,
};
TRY_LOG_CALL(image_set_present_payload(m_swapchain_images[submit_info.pending_present.image_index], queue,
semaphores, submission_pnext));
@ -662,6 +671,15 @@ VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *pr
TRY(notify_presentation_engine(submit_info.pending_present));
#if VULKAN_WSI_LAYER_EXPERIMENTAL
if (present_timing_info != VK_NULL_HANDLE)
{
auto *ext_present_timing = get_swapchain_extension<wsi::wsi_ext_present_timing>(true);
TRY_LOG_CALL(ext_present_timing->add_presentation_entry(
m_device_data, queue, submit_info.pending_present.present_id, submit_info.pending_present.image_index,
present_timing_info->presentStageQueries));
}
#endif
return VK_SUCCESS;
}

View file

@ -124,6 +124,14 @@ struct swapchain_presentation_parameters
#endif
};
enum signal_semaphores_type
{
SIGNAL_SEMAPHORE_PRESENT_FENCE,
#if VULKAN_WSI_LAYER_EXPERIMENTAL
SIGNAL_SEMAPHORE_PRESENT_TIMING,
#endif
SIGNAL_SEMAPHORE_MAX_NUM,
};
/**
* @brief Base swapchain class
*
@ -635,11 +643,6 @@ private:
*/
VkResult m_error_state;
/**
* @brief Wait for a buffer to become free.
*/
VkResult wait_for_free_buffer(uint64_t timeout);
/**
* @brief A semaphore to be signalled once a free image becomes available.
*
@ -650,6 +653,30 @@ private:
*/
util::timed_semaphore m_free_image_semaphore;
/**
* @brief A flag to track if swapchain has started presenting.
*/
bool m_started_presenting;
/**
* @brief Holds the swapchain extensions and related functionalities.
*/
wsi_ext_maintainer m_extensions;
/**
* @brief Holds the VkImageCreateInfo and backend specific image create info extensions.
*/
swapchain_image_creator m_image_creator;
/** @brief Signal semaphores for queue submit.
*/
std::array<VkSemaphore, SIGNAL_SEMAPHORE_MAX_NUM> signal_semaphores;
/**
* @brief Wait for a buffer to become free.
*/
VkResult wait_for_free_buffer(uint64_t timeout);
/**
* @brief Per swapchain thread function that handles page flipping.
*
@ -711,21 +738,6 @@ private:
*/
VkResult notify_presentation_engine(const pending_present_request &submit_info);
/**
* @brief A flag to track if swapchain has started presenting.
*/
bool m_started_presenting;
/**
* @brief Holds the swapchain extensions and related functionalities.
*/
wsi_ext_maintainer m_extensions;
/**
* @brief Holds the VkImageCreateInfo and backend specific image create info extensions.
*/
swapchain_image_creator m_image_creator;
/**
* @brief Initialize m_image_creator.
*

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024 Arm Limited.
* Copyright (c) 2021-2025 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@ -207,4 +207,27 @@ VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue que
return VK_SUCCESS;
}
VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue queue, VkFence fence,
const queue_submit_semaphores &semaphores, const command_buffer_data &command_buffer_data)
{
util::vector<VkPipelineStageFlags> pipeline_stage_flags_vector{ util::allocator(
device.get_allocator(), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND) };
if (!pipeline_stage_flags_vector.try_resize(semaphores.wait_semaphores_count))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
std::fill(pipeline_stage_flags_vector.begin(), pipeline_stage_flags_vector.end(),
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
/* When the semaphore that comes in is signalled, we know that all work is done. So, we do not
* want to block any future Vulkan queue work on it. So, we pass in BOTTOM_OF_PIPE bit as the
* wait flag.
*/
VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr,
semaphores.wait_semaphores_count, semaphores.wait_semaphores,
pipeline_stage_flags_vector.data(), command_buffer_data.m_command_buffer_count,
command_buffer_data.m_command_buffers, semaphores.signal_semaphores_count,
semaphores.signal_semaphores };
TRY(device.disp.QueueSubmit(queue, 1, &submit_info, fence));
return VK_SUCCESS;
}
} /* namespace wsi */

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024 Arm Limited.
* Copyright (c) 2021-2025 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@ -53,6 +53,17 @@ struct queue_submit_semaphores
uint32_t signal_semaphores_count;
};
struct command_buffer_data
{
VkCommandBuffer *m_command_buffers{ nullptr };
uint32_t m_command_buffer_count{ 0 };
command_buffer_data(VkCommandBuffer *command_buffers, uint32_t command_buffer_count)
: m_command_buffers(command_buffers)
, m_command_buffer_count(command_buffer_count)
{
}
};
/**
* Synchronization using a Vulkan Fence object.
*/
@ -198,4 +209,19 @@ private:
*/
VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue queue, VkFence fence,
const queue_submit_semaphores &semaphores, const void *submission_pnext = nullptr);
/**
* @brief Submit queue operation for synchronization.
*
* @param device The device private data for the fence.
* @param queue The Vulkan queue that may be used to submit synchronization commands.
* @param fence The fence to be signalled, it could be VK_NULL_HANDLE in the absence
* of a fence to be signalled.
* @param semaphores The wait and signal semaphores.
* @param command_buffer_data Data of command buffer to be submitted.
*
* @return VK_SUCCESS on success, an appropiate error code otherwise.
*/
VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue queue, VkFence fence,
const queue_submit_semaphores &semaphores, const command_buffer_data &command_buffer_data);
} /* namespace wsi */

View file

@ -31,13 +31,15 @@
#include "present_timing_handler.hpp"
#include <array>
wsi_ext_present_timing_wayland::wsi_ext_present_timing_wayland(const util::allocator &allocator)
: wsi_ext_present_timing(allocator)
wsi_ext_present_timing_wayland::wsi_ext_present_timing_wayland(const util::allocator &allocator, VkDevice device,
uint32_t num_images)
: wsi_ext_present_timing(allocator, device, num_images)
{
}
util::unique_ptr<wsi_ext_present_timing_wayland> wsi_ext_present_timing_wayland::create(
VkTimeDomainKHR image_first_pixel_visible_time_domain, const util::allocator &allocator)
VkTimeDomainKHR image_first_pixel_visible_time_domain, const util::allocator &allocator, VkDevice device,
uint32_t num_images)
{
std::array<util::unique_ptr<wsi::vulkan_time_domain>, 2> time_domains_array = {
allocator.make_unique<wsi::vulkan_time_domain>(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT,
@ -46,7 +48,8 @@ util::unique_ptr<wsi_ext_present_timing_wayland> wsi_ext_present_timing_wayland:
image_first_pixel_visible_time_domain)
};
return wsi_ext_present_timing::create<wsi_ext_present_timing_wayland>(allocator, time_domains_array);
return wsi_ext_present_timing::create<wsi_ext_present_timing_wayland>(allocator, time_domains_array, device,
num_images);
}
VkResult wsi_ext_present_timing_wayland::get_swapchain_timing_properties(

View file

@ -42,13 +42,14 @@ class wsi_ext_present_timing_wayland : public wsi::wsi_ext_present_timing
{
public:
static util::unique_ptr<wsi_ext_present_timing_wayland> create(VkTimeDomainKHR image_first_pixel_visible_time_domain,
const util::allocator &allocator);
const util::allocator &allocator, VkDevice device,
uint32_t num_images);
VkResult get_swapchain_timing_properties(uint64_t &timing_properties_counter,
VkSwapchainTimingPropertiesEXT &timing_properties) override;
private:
wsi_ext_present_timing_wayland(const util::allocator &allocator);
wsi_ext_present_timing_wayland(const util::allocator &allocator, VkDevice device, uint32_t num_images);
/* Allow util::allocator to access the private constructor */
friend util::allocator;

View file

@ -146,7 +146,9 @@ VkResult swapchain::add_required_extensions(VkDevice device, const VkSwapchainCr
image_first_pixel_visible_time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
}
if (!add_swapchain_extension(
wsi_ext_present_timing_wayland::create(image_first_pixel_visible_time_domain, m_allocator)))
wsi_ext_present_timing_wayland::create(image_first_pixel_visible_time_domain, m_allocator, m_device,
swapchain_create_info->minImageCount)))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}