diff --git a/layer/present_timing_api.cpp b/layer/present_timing_api.cpp index e9ed2d0..62b75d5 100644 --- a/layer/present_timing_api.cpp +++ b/layer/present_timing_api.cpp @@ -108,10 +108,15 @@ wsi_layer_vkGetPastPresentationTimingEXT( VkDevice device, const VkPastPresentationTimingInfoEXT *pPastPresentationTimingInfo, VkPastPresentationTimingPropertiesEXT *pPastPresentationTimingProperties) VWL_API_POST { - UNUSED(device); - UNUSED(pPastPresentationTimingInfo); - UNUSED(pPastPresentationTimingProperties); - VkResult result = VK_SUCCESS; - return result; + assert(pPastPresentationTimingInfo != nullptr); + auto &device_data = layer::device_private_data::get(device); + if (!device_data.layer_owns_swapchain(pPastPresentationTimingInfo->swapchain)) + { + return device_data.disp.GetPastPresentationTimingEXT(device, pPastPresentationTimingInfo, + pPastPresentationTimingProperties); + } + auto *sc = reinterpret_cast(pPastPresentationTimingInfo->swapchain); + auto *ext = sc->get_swapchain_extension(true); + return ext->get_past_presentation_results(pPastPresentationTimingProperties); } #endif /* VULKAN_WSI_LAYER_EXPERIMENTAL */ diff --git a/layer/private_data.hpp b/layer/private_data.hpp index d342fb6..04dccc7 100644 --- a/layer/private_data.hpp +++ b/layer/private_data.hpp @@ -356,7 +356,8 @@ private: EP(GetSwapchainTimeDomainPropertiesEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, ) \ EP(GetSwapchainTimingPropertiesEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, ) \ EP(SetSwapchainPresentTimingQueueSizeEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, ) \ - EP(WaitForPresentKHR, VK_KHR_PRESENT_WAIT_EXTENSION_NAME, API_VERSION_MAX, false, ) + EP(WaitForPresentKHR, VK_KHR_PRESENT_WAIT_EXTENSION_NAME, API_VERSION_MAX, false, ) \ + EP(GetPastPresentationTimingEXT, VK_EXT_PRESENT_TIMING_EXTENSION_NAME, API_VERSION_MAX, false, ) #else #define DEVICE_ENTRYPOINTS_LIST_EXPERIMENTAL(EP) #endif @@ -390,6 +391,11 @@ private: EP(ResetFences, "", VK_API_VERSION_1_0, true, ) \ EP(WaitForFences, "", VK_API_VERSION_1_0, true, ) \ EP(DestroyDevice, "", VK_API_VERSION_1_0, true, ) \ + EP(CmdResetQueryPool, "", VK_API_VERSION_1_0, true, ) \ + EP(CmdWriteTimestamp, "", VK_API_VERSION_1_0, true, ) \ + EP(CreateQueryPool, "", VK_API_VERSION_1_0, true, ) \ + EP(DestroyQueryPool, "", VK_API_VERSION_1_0, true, ) \ + EP(GetQueryPoolResults, "", VK_API_VERSION_1_0, true, ) \ /* VK_KHR_swapchain */ \ EP(CreateSwapchainKHR, VK_KHR_SWAPCHAIN_EXTENSION_NAME, API_VERSION_MAX, false, ) \ EP(DestroySwapchainKHR, VK_KHR_SWAPCHAIN_EXTENSION_NAME, API_VERSION_MAX, false, ) \ diff --git a/layer/wsi_layer_experimental.hpp b/layer/wsi_layer_experimental.hpp index c013e77..972ae64 100644 --- a/layer/wsi_layer_experimental.hpp +++ b/layer/wsi_layer_experimental.hpp @@ -181,6 +181,10 @@ typedef VkResult(VKAPI_PTR *PFN_vkGetSwapchainTimingPropertiesEXT)( typedef VkResult(VKAPI_PTR *PFN_vkSetSwapchainPresentTimingQueueSizeEXT)(VkDevice device, VkSwapchainKHR swapchain, uint32_t size); +typedef VkResult(VKAPI_PTR *PFN_vkGetPastPresentationTimingEXT)( + VkDevice device, const VkPastPresentationTimingInfoEXT *pPastPresentationTimingInfo, + VkPastPresentationTimingPropertiesEXT *pPastPresentationTimingProperties); + VWL_VKAPI_CALL(VkResult) wsi_layer_vkSetSwapchainPresentTimingQueueSizeEXT(VkDevice device, VkSwapchainKHR swapchain, uint32_t size) VWL_API_POST; diff --git a/wsi/extensions/present_timing.cpp b/wsi/extensions/present_timing.cpp index c324847..b05889b 100644 --- a/wsi/extensions/present_timing.cpp +++ b/wsi/extensions/present_timing.cpp @@ -27,6 +27,7 @@ * * @brief Contains the implentation for the VK_EXT_present_timing extension. */ +#include #include #include @@ -35,16 +36,141 @@ #if VULKAN_WSI_LAYER_EXPERIMENTAL namespace wsi { +/* VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT, + * VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT, + * VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT, + * VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT + */ +static constexpr size_t MAX_PRESENT_STAGES = 4; +const std::array g_present_stages = { + VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT, VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT, + VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT, VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT +}; -wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator) +wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator, VkDevice device, uint32_t num_images) : m_allocator(allocator) - , m_queue(allocator) , m_time_domains(allocator) + , m_device(device) + , m_query_pool(VK_NULL_HANDLE) + , m_command_pool(VK_NULL_HANDLE) + , m_command_buffer(allocator) + , m_queue(allocator) + , m_num_images(num_images) + , m_present_semaphore(allocator) { } wsi_ext_present_timing::~wsi_ext_present_timing() { + const layer::device_private_data &device_data = layer::device_private_data::get(m_device); + device_data.disp.FreeCommandBuffers(m_device, m_command_pool, m_command_buffer.size(), m_command_buffer.data()); + for (auto &command_buffer : m_command_buffer) + { + command_buffer = VK_NULL_HANDLE; + } + if (m_command_pool != VK_NULL_HANDLE) + { + device_data.disp.DestroyCommandPool(m_device, m_command_pool, m_allocator.get_original_callbacks()); + m_command_pool = VK_NULL_HANDLE; + } + if (m_query_pool != VK_NULL_HANDLE) + { + device_data.disp.DestroyQueryPool(m_device, m_query_pool, m_allocator.get_original_callbacks()); + m_query_pool = VK_NULL_HANDLE; + } + + for (auto semaphore : m_present_semaphore) + { + if (semaphore != VK_NULL_HANDLE) + { + device_data.disp.DestroySemaphore(m_device, semaphore, m_allocator.get_original_callbacks()); + } + } +} + +VkResult wsi_ext_present_timing::init_timing_resources() +{ + const layer::device_private_data &device_data = layer::device_private_data::get(m_device); + if (!m_present_semaphore.try_resize(m_num_images)) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + for (auto &semaphore : m_present_semaphore) + { + semaphore = VK_NULL_HANDLE; + VkSemaphoreCreateInfo semaphore_info = {}; + semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + if (device_data.disp.CreateSemaphore(m_device, &semaphore_info, m_allocator.get_original_callbacks(), + &semaphore) != VK_SUCCESS) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + /* Resize the command buffer to the number of images. */ + if (!m_command_buffer.try_resize(m_num_images)) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + for (auto &command_buffer : m_command_buffer) + { + command_buffer = VK_NULL_HANDLE; + } + /* Allocate the command pool and query pool. */ + VkQueryPoolCreateInfo query_pool_info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, m_num_images, 0 + }; + TRY_LOG_CALL(device_data.disp.CreateQueryPool(m_device, &query_pool_info, m_allocator.get_original_callbacks(), + &m_query_pool)); + VkCommandPoolCreateInfo command_pool_info{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr, + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 0 }; + TRY_LOG_CALL(device_data.disp.CreateCommandPool(m_device, &command_pool_info, m_allocator.get_original_callbacks(), + &m_command_pool)); + /* Allocate and write the command buffer. */ + VkCommandBufferAllocateInfo command_buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr, + m_command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, m_num_images }; + TRY_LOG_CALL(device_data.disp.AllocateCommandBuffers(m_device, &command_buffer_info, m_command_buffer.data())); + VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, 0, nullptr }; + for (size_t image_index = 0; image_index < m_num_images; image_index++) + { + TRY_LOG_CALL(device_data.disp.BeginCommandBuffer(m_command_buffer[image_index], &begin_info)); + device_data.disp.CmdResetQueryPool(m_command_buffer[image_index], m_query_pool, image_index, 1); + device_data.disp.CmdWriteTimestamp(m_command_buffer[image_index], VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + m_query_pool, image_index); + TRY_LOG_CALL(device_data.disp.EndCommandBuffer(m_command_buffer[image_index])); + } + return VK_SUCCESS; +} + +VkResult wsi_ext_present_timing::get_queue_end_timing_to_queue(uint32_t image_index) +{ + for (auto &slot : m_queue) + { + if ((slot.m_image_index == image_index) && slot.is_pending(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)) + { + uint64_t time; + auto stage_timing_optional = slot.get_stage_timing(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT); + const layer::device_private_data &device_data = layer::device_private_data::get(m_device); + TRY(device_data.disp.GetQueryPoolResults(m_device, m_query_pool, image_index, 1, sizeof(time), &time, 0, + VK_QUERY_RESULT_64_BIT)); + stage_timing_optional->get().m_time.store(time); + /* For an image index, there can only be one entry in the internal queue with pending results. */ + break; + } + } + return VK_SUCCESS; +} + +VkResult wsi_ext_present_timing::query_present_queue_end_timings() +{ + for (uint32_t image_index = 0; image_index < m_num_images; ++image_index) + { + VkResult result = get_queue_end_timing_to_queue(image_index); + if ((result != VK_SUCCESS) && (result != VK_NOT_READY)) + { + return result; + } + } + return VK_SUCCESS; } VkResult wsi_ext_present_timing::present_timing_queue_set_size(size_t queue_size) @@ -60,17 +186,17 @@ VkResult wsi_ext_present_timing::present_timing_queue_set_size(size_t queue_size { return VK_ERROR_OUT_OF_HOST_MEMORY; } - for (auto iter : m_queue.m_timings) + for (auto &iter : m_queue) { - if (iter.is_outstanding) + if (iter.has_outstanding_stages()) { - if (!presentation_timing.try_push_back(iter)) + if (!presentation_timing.try_push_back(std::move(iter))) { return VK_ERROR_OUT_OF_HOST_MEMORY; } } } - m_queue.m_timings.swap(presentation_timing); + m_queue.swap(presentation_timing); return VK_SUCCESS; } @@ -78,9 +204,9 @@ size_t wsi_ext_present_timing::present_timing_get_num_outstanding_results() { size_t num_outstanding = 0; - for (const auto &iter : m_queue.m_timings) + for (auto &iter : m_queue) { - if (iter.is_outstanding) + if (iter.has_outstanding_stages()) { num_outstanding++; } @@ -88,12 +214,47 @@ size_t wsi_ext_present_timing::present_timing_get_num_outstanding_results() return num_outstanding; } -VkResult wsi_ext_present_timing::add_presentation_entry(const wsi::swapchain_presentation_entry &presentation_entry) +VkResult wsi_ext_present_timing::queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue, + uint32_t image_index) { - if (!m_queue.m_timings.try_push_back(presentation_entry)) + assert(image_index < m_command_buffer.size()); + command_buffer_data command_buffer_data(&m_command_buffer[image_index], 1); + VkSemaphore present_timing_semaphore = get_image_present_semaphore(image_index); + queue_submit_semaphores present_timing_semaphores = { + &present_timing_semaphore, + 1, + nullptr, + 0, + }; + TRY_LOG_CALL(sync_queue_submit(device, queue, VK_NULL_HANDLE, present_timing_semaphores, command_buffer_data)); + return VK_SUCCESS; +} + +VkResult wsi_ext_present_timing::add_presentation_entry(const layer::device_private_data &device, VkQueue queue, + uint64_t present_id, uint32_t image_index, + VkPresentStageFlagsEXT present_stage_queries) +{ + if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) + { + /* Get results for the previous presentation. The queue end stage of + * the previous presentation for the same image must had + * finished when the same image is going to be presented again. */ + TRY_LOG_CALL(get_queue_end_timing_to_queue(image_index)); + } + /* Keep the internal queue to the limit defined by the application. */ + if (m_queue.size() == m_queue.capacity()) + { + return VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT; + } + wsi::swapchain_presentation_entry presentation_entry(present_stage_queries, present_id, image_index); + if (!m_queue.try_push_back(std::move(presentation_entry))) { return VK_ERROR_OUT_OF_HOST_MEMORY; } + if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) + { + TRY_LOG_CALL(queue_submit_queue_end_timing(device, queue, image_index)); + } return VK_SUCCESS; } @@ -102,6 +263,235 @@ swapchain_time_domains &wsi_ext_present_timing::get_swapchain_time_domains() return m_time_domains; } +VkSemaphore wsi_ext_present_timing::get_image_present_semaphore(uint32_t image_index) +{ + return m_present_semaphore[image_index]; +} + +uint32_t wsi_ext_present_timing::get_num_available_results() +{ + uint32_t num_pending_results = 0; + for (auto &slot : m_queue) + { + if (slot.has_completed_stages()) + { + num_pending_results++; + } + } + return num_pending_results; +} + +VkResult wsi_ext_present_timing::get_past_presentation_results( + VkPastPresentationTimingPropertiesEXT *past_present_timing_properties) +{ + assert(past_present_timing_properties != nullptr); + /* Get any outstanding timings in the query pool to the internal queue. */ + TRY_LOG_CALL(query_present_queue_end_timings()); + if ((past_present_timing_properties->presentationTimingCount == 0) || + (past_present_timing_properties->pPresentationTimings == nullptr)) + { + past_present_timing_properties->presentationTimingCount = get_num_available_results(); + return VK_SUCCESS; + } + /* When application request entries with multiple zero present ids or combination of zero and + * non-zero present ids, this field helps avoiding the same slot getting copied to the results. + */ + for (auto &slot : m_queue) + { + slot.copied = false; + } + /* When application request entries with presentIds in an order where there are presentId=0 + * requested earlier than presentId!=0, then the incoming pointer get filled with first available + * slots when handling the zero presentIds. Later when non-zero presentIds are handled, if the + * matching slot was already copied to the output, then no slot will be copied for that. + * This creates a situation where a fewer results being responded for that particular request + * compared to the amount that would have achieved with handling non-zeros first and zeros later. */ + uint32_t count_results = 0; + for (uint32_t i = 0; i < past_present_timing_properties->presentationTimingCount; ++i) + { + bool timings_found = false; + if (count_results == past_present_timing_properties->presentationTimingCount) + { + if (count_results < get_num_available_results()) + { + return VK_INCOMPLETE; + } + return VK_SUCCESS; + } + VkPastPresentationTimingEXT &timing = past_present_timing_properties->pPresentationTimings[i]; + for (auto slot = m_queue.begin(); slot != m_queue.end();) + { + if ((!(*slot).copied) && (*slot).has_completed_stages()) + { + /* There will be only one slot in the queue per presentId. */ + if ((timing.presentId == 0) || (timing.presentId == (*slot).m_present_id)) + { + assert(timing.presentStageCount >= (*slot).m_num_present_stages); + if (((*slot).populate(timing))) + { + count_results++; + (*slot).copied = true; + timings_found = true; + if (timing.reportComplete) + { + slot = m_queue.erase(slot); + continue; + } + } + } + } + slot++; + } + /* When the timings are not filled, reset the count to zero. */ + if (!timings_found) + { + timing.presentStageCount = 0; + } + } + if ((count_results < past_present_timing_properties->presentationTimingCount) || + (count_results < get_num_available_results())) + { + past_present_timing_properties->presentationTimingCount = count_results; + return VK_INCOMPLETE; + } + return VK_SUCCESS; +} + +swapchain_presentation_entry::swapchain_presentation_entry(VkPresentStageFlagsEXT present_stage_queries, + uint64_t present_id, uint32_t image_index) + : m_target_stages(0) + , m_present_id(present_id) + , m_image_index(image_index) + , m_num_present_stages(0) +{ + if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) + { + m_queue_end_timing = swapchain_presentation_timing(); + m_num_present_stages++; + } + if (present_stage_queries & VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT) + { + m_latch_timing = swapchain_presentation_timing(); + m_num_present_stages++; + } + if (present_stage_queries & VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT) + { + m_first_pixel_out_timing = swapchain_presentation_timing(); + m_num_present_stages++; + } + if (present_stage_queries & VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT) + { + m_first_pixel_visible_timing = swapchain_presentation_timing(); + m_num_present_stages++; + } +} + +bool swapchain_presentation_entry::is_pending(VkPresentStageFlagBitsEXT stage) +{ + auto stage_timing_optional = get_stage_timing(stage); + if (stage_timing_optional.has_value() && (stage_timing_optional->get().m_time.load() == 0)) + { + return true; + } + return false; +} + +bool swapchain_presentation_entry::is_complete(VkPresentStageFlagBitsEXT stage) +{ + auto stage_timing_optional = get_stage_timing(stage); + if (stage_timing_optional.has_value() && (stage_timing_optional->get().m_time.load() != 0)) + { + return true; + } + return false; +} + +bool swapchain_presentation_entry::has_outstanding_stages() +{ + /* Check if any of the requested stages is pending to be completed. */ + return (is_pending(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) || + is_pending(VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT) || + is_pending(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT) || + is_pending(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT)); +} + +bool swapchain_presentation_entry::has_completed_stages() +{ + /* Check if any of the requested stages is complete. */ + return (is_complete(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) || + is_complete(VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT) || + is_complete(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT) || + is_complete(VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT)); +} + +std::optional> swapchain_presentation_entry::get_stage_timing( + VkPresentStageFlagBitsEXT stage) +{ + switch (stage) + { + case VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT: + if (m_queue_end_timing.has_value()) + { + return *m_queue_end_timing; + } + break; + case VK_PRESENT_STAGE_IMAGE_LATCHED_BIT_EXT: + if (m_latch_timing.has_value()) + { + return *m_latch_timing; + } + break; + case VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_OUT_BIT_EXT: + if (m_first_pixel_out_timing.has_value()) + { + return *m_first_pixel_out_timing; + } + break; + case VK_PRESENT_STAGE_IMAGE_FIRST_PIXEL_VISIBLE_BIT_EXT: + if (m_first_pixel_visible_timing.has_value()) + { + return *m_first_pixel_visible_timing; + } + break; + default: + assert(0); + } + return std::nullopt; +} + +bool swapchain_presentation_entry::populate(VkPastPresentationTimingEXT &timing) +{ + uint64_t stage_index = 0; + for (const auto &stage : g_present_stages) + { + auto stage_timing_optional = get_stage_timing(stage); + if (!stage_timing_optional.has_value()) + { + continue; + } + uint64_t time = stage_timing_optional->get().m_time.load(); + if (time > 0) + { + timing.timeDomainId = stage_timing_optional->get().m_timedomain_id; + timing.pPresentStages[stage_index].stage = stage; + timing.pPresentStages[stage_index++].time = time; + } + } + timing.presentStageCount = stage_index; + /* If atleast one entry is made to the timings, update the other fields. */ + if (stage_index != 0) + { + /* and all requested stages in the entry had been responded, + * set the report complete to true. */ + timing.presentId = m_present_id; + /* All the available stages are now populated. If there are no more outstanding stages, + * then the report is complete and the slot can be freed. */ + timing.reportComplete = !has_outstanding_stages(); + return true; + } + return false; +} + VkResult swapchain_time_domains::calibrate(VkPresentStageFlagBitsEXT present_stage, swapchain_calibrated_time *calibrated_time) { diff --git a/wsi/extensions/present_timing.hpp b/wsi/extensions/present_timing.hpp index b7b89c0..365286f 100644 --- a/wsi/extensions/present_timing.hpp +++ b/wsi/extensions/present_timing.hpp @@ -32,12 +32,16 @@ #pragma once #include +#include #include #include +#include #include #include #include +#include +#include #include "wsi_extension.hpp" @@ -45,6 +49,39 @@ namespace wsi { +/** + * @brief Swapchain presentation timing + * + * This structure is used to keep the timing parameters for various presentation stages. + * + */ +struct swapchain_presentation_timing +{ + uint64_t m_timedomain_id{ 0 }; + /* Using atomics to enforce sequentially consistent ordering */ + std::atomic m_time{ 0 }; + + swapchain_presentation_timing() + : m_timedomain_id(0) + , m_time(0) + { + } + swapchain_presentation_timing(swapchain_presentation_timing &&rhs) noexcept + { + m_timedomain_id = rhs.m_timedomain_id; + m_time.store(rhs.m_time.load()); + } + swapchain_presentation_timing &operator=(swapchain_presentation_timing &&rhs) noexcept + { + m_timedomain_id = rhs.m_timedomain_id; + m_time.store(rhs.m_time.load()); + return *this; + } + + swapchain_presentation_timing(const swapchain_presentation_timing &) = delete; + swapchain_presentation_timing &operator=(const swapchain_presentation_timing &) = delete; +}; + /** * @brief Swapchain presentation entry * @@ -54,39 +91,108 @@ namespace wsi struct swapchain_presentation_entry { /** - * Whether this entry is an outstanding result or not. + * The target stages for the presentation entry. */ - bool is_outstanding{ false }; + VkPresentStageFlagsEXT m_target_stages{ 0 }; + /** - * The present id. + * The present id. Zero is a valid value for present id. */ - uint64_t present_id{ 0 }; + uint64_t m_present_id{ 0 }; + + /** + * The image index of the entry in the swapchain. + */ + uint32_t m_image_index{ 0 }; + + /** + * The number of requested stages for this entry. + */ + size_t m_num_present_stages; + + /** + * When serving a get past presentation timings request, this field + * keep the status of whether the slot had already been copied to + * the results. + */ + bool copied; + + /** + * The variables to keep timing stages. + */ + std::optional m_queue_end_timing; + std::optional m_latch_timing; + std::optional m_first_pixel_out_timing; + std::optional m_first_pixel_visible_timing; + + swapchain_presentation_entry(VkPresentStageFlagsEXT present_stage_queries, uint64_t present_id, + uint32_t image_index); + swapchain_presentation_entry(swapchain_presentation_entry &&) noexcept = default; + swapchain_presentation_entry &operator=(swapchain_presentation_entry &&) noexcept = default; + + swapchain_presentation_entry(const swapchain_presentation_entry &) = delete; + swapchain_presentation_entry &operator=(const swapchain_presentation_entry &) = delete; + + /** + * @brief This API returns true when the requested stage timing is pending. + * + * @param stage The stage to get the status for. + * + * @return true when the stage is pending and false otherwise. + */ + bool is_pending(VkPresentStageFlagBitsEXT stage); + + /** + * @brief This API returns true when the requested stage timing is completed. + * + * @param stage The stage to get the status for. + * + * @return true when the stage is completed and false otherwise. + */ + bool is_complete(VkPresentStageFlagBitsEXT stage); + + /** + * @brief This API returns true when there are outstanding stages and false otherwise. + * + * @return true when there are outstanding stages and false otherwise. + */ + bool has_outstanding_stages(); + + /** + * @brief This API returns true when there are completed stages and false otherwise. + * + * @return true when there are completed stages and false otherwise. + */ + bool has_completed_stages(); + + /** + * @brief This API populates the timing parameters from the swapchain_presentation_entry for all stages. + * + * @param timing Reference to the timing to be populated. + * + * @return true when atleast one stage is populated from the swapchain_presentation_entry and false otherwise. + */ + bool populate(VkPastPresentationTimingEXT &timing); + + /** + * @brief This API retuns and optional reference to a particular stage of the swapchain_presentation_entry. + * + * @param stage The stage to get the timing for. + * + * @return optional reference to the particular stage, std::nullopt if the stage doesn't exit. + */ + std::optional> get_stage_timing( + VkPresentStageFlagBitsEXT stage); }; -/** - * @brief Timings queue - * - * This structure is used to keep the parameters related to the presentation timing queue. - * - */ -struct timings_queue -{ - timings_queue(const util::allocator &allocator) - : m_timings(allocator) - { - } - - util::vector m_timings; -}; - -// Predefined struct for calibrated time +/* Predefined struct for calibrated time */ struct swapchain_calibrated_time { VkTimeDomainKHR time_domain; uint64_t offset; }; -// Base struct for swapchain time domain +/* Base struct for swapchain time domain */ class swapchain_time_domain { public: @@ -188,9 +294,10 @@ public: template static util::unique_ptr create(const util::allocator &allocator, - std::array, N> &domains) + std::array, N> &domains, VkDevice device, + uint32_t num_images) { - auto present_timing = allocator.make_unique(allocator); + auto present_timing = allocator.make_unique(allocator, device, num_images); for (auto &domain : domains) { if (!present_timing->get_swapchain_time_domains().add_time_domain(std::move(domain))) @@ -199,14 +306,24 @@ public: return nullptr; } } + if (present_timing->init_timing_resources() != VK_SUCCESS) + { + WSI_LOG_ERROR("Failed to initialize present timing."); + return nullptr; + } return present_timing; } /** * @brief Constructor for the wsi_ext_present_timing class. + * + * @param allocator Reference to the custom allocator. + * @param device The device to which the swapchain belongs. + * @param num_images Number of images in the swapchain. + * */ - wsi_ext_present_timing(const util::allocator &allocator); + wsi_ext_present_timing(const util::allocator &allocator, VkDevice device, uint32_t num_images); /** * @brief Destructor for the wsi_ext_present_timing class. @@ -239,14 +356,35 @@ public: /** * @brief Add a presentation entry to the present timing queue. * - * This API pushes a presentation entry to the present timing queue. - * - * @param sc_presentation_entry Reference to the presentation entry to be added. + * @param device The device private data. + * @param queue The Vulkan queue used to submit synchronization commands. + * @param present_id The present id of the current presentation. + * @param image_index The index of the image in the swapchain. + * @param present_stage_queries The present stages application had requested timings for. * * @return VK_SUCCESS when the entry was inserted successfully and VK_ERROR_OUT_OF_HOST_MEMORY * when there is no host memory. */ - VkResult add_presentation_entry(const wsi::swapchain_presentation_entry &sc_presentation_entry); + VkResult add_presentation_entry(const layer::device_private_data &device, VkQueue queue, uint64_t present_id, + uint32_t image_index, VkPresentStageFlagsEXT present_stage_queries); + + /** + * @brief Get the image's present semaphore. + * + * @param image_index Image's index + * + * @return the image's present semaphore. + */ + VkSemaphore get_image_present_semaphore(uint32_t image_index); + + /** + * @brief Get the results of the past presentation from the internal queue. + * + * @param past_present_timing_properties Pointer for returing results. + * + * @return VK_SUCCESS when the requested results are returned, VK_INCOMPLETE when returning fewer results. + */ + VkResult get_past_presentation_results(VkPastPresentationTimingPropertiesEXT *past_present_timing_properties); /** * @brief Get the swapchain time domains @@ -271,15 +409,90 @@ protected: const util::allocator m_allocator; private: - /** - * @brief The presentation timing queue. - */ - timings_queue m_queue; - /** * @brief Handle the backend specific time domains for each present stage. */ swapchain_time_domains m_time_domains; + + /** + * @brief The Vulkan device. + */ + VkDevice m_device; + + /** + * @brief Query pool to allocate for present stage timing queries. + */ + VkQueryPool m_query_pool; + + /** + * @brief The command pool for allocating the buffers for the present stage timings. + */ + VkCommandPool m_command_pool; + + /** + * @brief The command buffer for the present stage timings. + */ + util::vector m_command_buffer; + + /** + * @brief The presentation timing queue. + */ + util::vector m_queue; + + /** + * @brief The number of images in the swapchain. + */ + uint32_t m_num_images; + + /** + * @brief Semaphore per image. + */ + util::vector m_present_semaphore; + + /** + * @brief This API does the queue submission for getting the queue end timing. + * + * @param device The device private data. + * @param queue The Vulkan queue used to submit synchronization commands. + * @param image_index The index of the image in the swapchain. + * + * @return VK_SUCCESS when the submission is successfully and error otherwise. + */ + VkResult queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue, + uint32_t image_index); + + /** + * @brief This API initializes the resources for timing query such as the + * command buffer, command pool and query pool. + * + * @return VK_SUCCESS if the initialization is successful and error if otherwise. + */ + VkResult init_timing_resources(); + + /** + * @brief This API is called to query the queue end timings for a particular image index + * and store it in the internal queue. + * + * @param image_index The index of the image in the swapchain. + * + * @return VK_SUCCESS if the query is successful and error if otherwise. + */ + VkResult get_queue_end_timing_to_queue(uint32_t image_index); + + /** + * @brief This API is called to get all the timings in the query pool to the + * internal queue and tries clearing it. + * + * @return VK_SUCCESS if the records are copied successfully or partially. + */ + VkResult query_present_queue_end_timings(); + + /** + * @brief Get the number of results that are available in the internal queue. + * + * @return The number of available results. + */ + uint32_t get_num_available_results(); }; } /* namespace wsi */ diff --git a/wsi/headless/present_timing_handler.cpp b/wsi/headless/present_timing_handler.cpp index 73fd084..61bf9f6 100644 --- a/wsi/headless/present_timing_handler.cpp +++ b/wsi/headless/present_timing_handler.cpp @@ -35,8 +35,9 @@ #include "present_timing_handler.hpp" #include "layer/private_data.hpp" -wsi_ext_present_timing_headless::wsi_ext_present_timing_headless(const util::allocator &allocator) - : wsi::wsi_ext_present_timing(allocator) +wsi_ext_present_timing_headless::wsi_ext_present_timing_headless(const util::allocator &allocator, VkDevice device, + uint32_t num_images) + : wsi::wsi_ext_present_timing(allocator, device, num_images) { } /** @@ -87,7 +88,7 @@ static std::optional is_time_domain_clock_monotonic_raw_supported(const Vk } util::unique_ptr wsi_ext_present_timing_headless::create( - const VkDevice &device, const util::allocator &allocator) + const VkDevice &device, const util::allocator &allocator, uint32_t num_images) { /* * Select the hardware raw monotonic clock domain (unaffected by NTP or adjtime adjustments) @@ -115,7 +116,8 @@ util::unique_ptr wsi_ext_present_timing_headles monotonic_time_domain) }; - return wsi_ext_present_timing::create(allocator, time_domains_array); + return wsi_ext_present_timing::create(allocator, time_domains_array, device, + num_images); } VkResult wsi_ext_present_timing_headless::get_swapchain_timing_properties( diff --git a/wsi/headless/present_timing_handler.hpp b/wsi/headless/present_timing_handler.hpp index 3c70d58..e4773ef 100644 --- a/wsi/headless/present_timing_handler.hpp +++ b/wsi/headless/present_timing_handler.hpp @@ -42,13 +42,14 @@ class wsi_ext_present_timing_headless : public wsi::wsi_ext_present_timing { public: static util::unique_ptr create(const VkDevice &device, - const util::allocator &allocator); + const util::allocator &allocator, + uint32_t num_images); VkResult get_swapchain_timing_properties(uint64_t &timing_properties_counter, VkSwapchainTimingPropertiesEXT &timing_properties) override; private: - wsi_ext_present_timing_headless(const util::allocator &allocator); + wsi_ext_present_timing_headless(const util::allocator &allocator, VkDevice device, uint32_t num_images); /* Allow util::allocator to access the private constructor */ friend util::allocator; diff --git a/wsi/headless/swapchain.cpp b/wsi/headless/swapchain.cpp index 02022e6..3e7f41c 100644 --- a/wsi/headless/swapchain.cpp +++ b/wsi/headless/swapchain.cpp @@ -101,7 +101,8 @@ VkResult swapchain::add_required_extensions(VkDevice device, const VkSwapchainCr bool swapchain_support_enabled = swapchain_create_info->flags & VK_SWAPCHAIN_CREATE_PRESENT_TIMING_BIT_EXT; if (swapchain_support_enabled) { - if (!add_swapchain_extension(wsi_ext_present_timing_headless::create(device, m_allocator))) + if (!add_swapchain_extension( + wsi_ext_present_timing_headless::create(device, m_allocator, swapchain_create_info->minImageCount))) { return VK_ERROR_OUT_OF_HOST_MEMORY; } diff --git a/wsi/swapchain_base.cpp b/wsi/swapchain_base.cpp index 5653d46..718a1bf 100644 --- a/wsi/swapchain_base.cpp +++ b/wsi/swapchain_base.cpp @@ -587,16 +587,6 @@ VkResult swapchain_base::notify_presentation_engine(const pending_present_reques VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *present_info, const swapchain_presentation_parameters &submit_info) { -#if VULKAN_WSI_LAYER_EXPERIMENTAL - auto *ext = get_swapchain_extension(); - if (ext) - { - wsi::swapchain_presentation_entry presentation_entry = {}; - presentation_entry.present_id = submit_info.pending_present.present_id; - TRY_LOG_CALL(ext->add_presentation_entry(presentation_entry)); - } -#endif - if (submit_info.switch_presentation_mode) { /* Assert when a presentation mode switch is requested and the swapchain_maintenance1 extension which implements this is not available */ @@ -624,6 +614,7 @@ VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *pr } void *submission_pnext = nullptr; + uint32_t count_signal_semaphores = 0; std::optional frame_boundary; /* Do not handle the event if it was handled before reaching this point */ if (submit_info.handle_present_frame_boundary_event) @@ -637,14 +628,32 @@ VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *pr submission_pnext = &frame_boundary.value(); } } - + if (submit_info.present_fence != VK_NULL_HANDLE) + { + signal_semaphores[count_signal_semaphores++] = + m_swapchain_images[submit_info.pending_present.image_index].present_fence_wait; + } +#if VULKAN_WSI_LAYER_EXPERIMENTAL + const VkPresentTimingInfoEXT *present_timing_info = VK_NULL_HANDLE; + const auto *present_timings_info = + util::find_extension(VK_STRUCTURE_TYPE_PRESENT_TIMINGS_INFO_EXT, present_info->pNext); + if (present_timings_info != VK_NULL_HANDLE) + { + present_timing_info = present_timings_info->pTimingInfos; + assert(present_timing_info != VK_NULL_HANDLE); + if (present_timing_info->presentStageQueries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) + { + auto *ext_present_timing = get_swapchain_extension(true); + signal_semaphores[count_signal_semaphores++] = + ext_present_timing->get_image_present_semaphore(submit_info.pending_present.image_index); + } + } +#endif queue_submit_semaphores semaphores = { wait_semaphores, sem_count, - (submit_info.present_fence != VK_NULL_HANDLE) ? - &m_swapchain_images[submit_info.pending_present.image_index].present_fence_wait : - nullptr, - (submit_info.present_fence != VK_NULL_HANDLE) ? 1u : 0, + count_signal_semaphores > 0 ? signal_semaphores.data() : nullptr, + count_signal_semaphores, }; TRY_LOG_CALL(image_set_present_payload(m_swapchain_images[submit_info.pending_present.image_index], queue, semaphores, submission_pnext)); @@ -662,6 +671,15 @@ VkResult swapchain_base::queue_present(VkQueue queue, const VkPresentInfoKHR *pr TRY(notify_presentation_engine(submit_info.pending_present)); +#if VULKAN_WSI_LAYER_EXPERIMENTAL + if (present_timing_info != VK_NULL_HANDLE) + { + auto *ext_present_timing = get_swapchain_extension(true); + TRY_LOG_CALL(ext_present_timing->add_presentation_entry( + m_device_data, queue, submit_info.pending_present.present_id, submit_info.pending_present.image_index, + present_timing_info->presentStageQueries)); + } +#endif return VK_SUCCESS; } diff --git a/wsi/swapchain_base.hpp b/wsi/swapchain_base.hpp index 809cc77..b86e9f4 100644 --- a/wsi/swapchain_base.hpp +++ b/wsi/swapchain_base.hpp @@ -124,6 +124,14 @@ struct swapchain_presentation_parameters #endif }; +enum signal_semaphores_type +{ + SIGNAL_SEMAPHORE_PRESENT_FENCE, +#if VULKAN_WSI_LAYER_EXPERIMENTAL + SIGNAL_SEMAPHORE_PRESENT_TIMING, +#endif + SIGNAL_SEMAPHORE_MAX_NUM, +}; /** * @brief Base swapchain class * @@ -635,11 +643,6 @@ private: */ VkResult m_error_state; - /** - * @brief Wait for a buffer to become free. - */ - VkResult wait_for_free_buffer(uint64_t timeout); - /** * @brief A semaphore to be signalled once a free image becomes available. * @@ -650,6 +653,30 @@ private: */ util::timed_semaphore m_free_image_semaphore; + /** + * @brief A flag to track if swapchain has started presenting. + */ + bool m_started_presenting; + + /** + * @brief Holds the swapchain extensions and related functionalities. + */ + wsi_ext_maintainer m_extensions; + + /** + * @brief Holds the VkImageCreateInfo and backend specific image create info extensions. + */ + swapchain_image_creator m_image_creator; + + /** @brief Signal semaphores for queue submit. + */ + std::array signal_semaphores; + + /** + * @brief Wait for a buffer to become free. + */ + VkResult wait_for_free_buffer(uint64_t timeout); + /** * @brief Per swapchain thread function that handles page flipping. * @@ -711,21 +738,6 @@ private: */ VkResult notify_presentation_engine(const pending_present_request &submit_info); - /** - * @brief A flag to track if swapchain has started presenting. - */ - bool m_started_presenting; - - /** - * @brief Holds the swapchain extensions and related functionalities. - */ - wsi_ext_maintainer m_extensions; - - /** - * @brief Holds the VkImageCreateInfo and backend specific image create info extensions. - */ - swapchain_image_creator m_image_creator; - /** * @brief Initialize m_image_creator. * diff --git a/wsi/synchronization.cpp b/wsi/synchronization.cpp index b4b5d25..cc9e3e2 100644 --- a/wsi/synchronization.cpp +++ b/wsi/synchronization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024 Arm Limited. + * Copyright (c) 2021-2025 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -207,4 +207,27 @@ VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue que return VK_SUCCESS; } +VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue queue, VkFence fence, + const queue_submit_semaphores &semaphores, const command_buffer_data &command_buffer_data) +{ + util::vector pipeline_stage_flags_vector{ util::allocator( + device.get_allocator(), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND) }; + if (!pipeline_stage_flags_vector.try_resize(semaphores.wait_semaphores_count)) + { + return VK_ERROR_OUT_OF_HOST_MEMORY; + } + std::fill(pipeline_stage_flags_vector.begin(), pipeline_stage_flags_vector.end(), + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + /* When the semaphore that comes in is signalled, we know that all work is done. So, we do not + * want to block any future Vulkan queue work on it. So, we pass in BOTTOM_OF_PIPE bit as the + * wait flag. + */ + VkSubmitInfo submit_info = { VK_STRUCTURE_TYPE_SUBMIT_INFO, nullptr, + semaphores.wait_semaphores_count, semaphores.wait_semaphores, + pipeline_stage_flags_vector.data(), command_buffer_data.m_command_buffer_count, + command_buffer_data.m_command_buffers, semaphores.signal_semaphores_count, + semaphores.signal_semaphores }; + TRY(device.disp.QueueSubmit(queue, 1, &submit_info, fence)); + return VK_SUCCESS; +} } /* namespace wsi */ diff --git a/wsi/synchronization.hpp b/wsi/synchronization.hpp index 51f2018..ca2c7ac 100644 --- a/wsi/synchronization.hpp +++ b/wsi/synchronization.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2024 Arm Limited. + * Copyright (c) 2021-2025 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -53,6 +53,17 @@ struct queue_submit_semaphores uint32_t signal_semaphores_count; }; +struct command_buffer_data +{ + VkCommandBuffer *m_command_buffers{ nullptr }; + uint32_t m_command_buffer_count{ 0 }; + command_buffer_data(VkCommandBuffer *command_buffers, uint32_t command_buffer_count) + : m_command_buffers(command_buffers) + , m_command_buffer_count(command_buffer_count) + { + } +}; + /** * Synchronization using a Vulkan Fence object. */ @@ -198,4 +209,19 @@ private: */ VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue queue, VkFence fence, const queue_submit_semaphores &semaphores, const void *submission_pnext = nullptr); + +/** + * @brief Submit queue operation for synchronization. + * + * @param device The device private data for the fence. + * @param queue The Vulkan queue that may be used to submit synchronization commands. + * @param fence The fence to be signalled, it could be VK_NULL_HANDLE in the absence + * of a fence to be signalled. + * @param semaphores The wait and signal semaphores. + * @param command_buffer_data Data of command buffer to be submitted. + * + * @return VK_SUCCESS on success, an appropiate error code otherwise. + */ +VkResult sync_queue_submit(const layer::device_private_data &device, VkQueue queue, VkFence fence, + const queue_submit_semaphores &semaphores, const command_buffer_data &command_buffer_data); } /* namespace wsi */ diff --git a/wsi/wayland/present_timing_handler.cpp b/wsi/wayland/present_timing_handler.cpp index fb7b580..7058c9d 100644 --- a/wsi/wayland/present_timing_handler.cpp +++ b/wsi/wayland/present_timing_handler.cpp @@ -31,13 +31,15 @@ #include "present_timing_handler.hpp" #include -wsi_ext_present_timing_wayland::wsi_ext_present_timing_wayland(const util::allocator &allocator) - : wsi_ext_present_timing(allocator) +wsi_ext_present_timing_wayland::wsi_ext_present_timing_wayland(const util::allocator &allocator, VkDevice device, + uint32_t num_images) + : wsi_ext_present_timing(allocator, device, num_images) { } util::unique_ptr wsi_ext_present_timing_wayland::create( - VkTimeDomainKHR image_first_pixel_visible_time_domain, const util::allocator &allocator) + VkTimeDomainKHR image_first_pixel_visible_time_domain, const util::allocator &allocator, VkDevice device, + uint32_t num_images) { std::array, 2> time_domains_array = { allocator.make_unique(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT, @@ -46,7 +48,8 @@ util::unique_ptr wsi_ext_present_timing_wayland: image_first_pixel_visible_time_domain) }; - return wsi_ext_present_timing::create(allocator, time_domains_array); + return wsi_ext_present_timing::create(allocator, time_domains_array, device, + num_images); } VkResult wsi_ext_present_timing_wayland::get_swapchain_timing_properties( diff --git a/wsi/wayland/present_timing_handler.hpp b/wsi/wayland/present_timing_handler.hpp index c7e19d9..02c1808 100644 --- a/wsi/wayland/present_timing_handler.hpp +++ b/wsi/wayland/present_timing_handler.hpp @@ -42,13 +42,14 @@ class wsi_ext_present_timing_wayland : public wsi::wsi_ext_present_timing { public: static util::unique_ptr create(VkTimeDomainKHR image_first_pixel_visible_time_domain, - const util::allocator &allocator); + const util::allocator &allocator, VkDevice device, + uint32_t num_images); VkResult get_swapchain_timing_properties(uint64_t &timing_properties_counter, VkSwapchainTimingPropertiesEXT &timing_properties) override; private: - wsi_ext_present_timing_wayland(const util::allocator &allocator); + wsi_ext_present_timing_wayland(const util::allocator &allocator, VkDevice device, uint32_t num_images); /* Allow util::allocator to access the private constructor */ friend util::allocator; diff --git a/wsi/wayland/swapchain.cpp b/wsi/wayland/swapchain.cpp index b7299c3..db87f4a 100644 --- a/wsi/wayland/swapchain.cpp +++ b/wsi/wayland/swapchain.cpp @@ -146,7 +146,9 @@ VkResult swapchain::add_required_extensions(VkDevice device, const VkSwapchainCr image_first_pixel_visible_time_domain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR; } if (!add_swapchain_extension( - wsi_ext_present_timing_wayland::create(image_first_pixel_visible_time_domain, m_allocator))) + + wsi_ext_present_timing_wayland::create(image_first_pixel_visible_time_domain, m_allocator, m_device, + swapchain_create_info->minImageCount))) { return VK_ERROR_OUT_OF_HOST_MEMORY; }