Support nonzero present timing queue family

Adds a per-device check for a 'best' queue family to
use for present timing, rather than always using index 0.

wsi_ext_present_timing holds resources (such as the command
buffer) per queue family, rather than supporting only one. However,
currently, only one queue family is supported at a time.
In future, we can hook vkGetDeviceQueue calls to map
VkQueues to their family index. Doing this will transparently
give present timing support for multiple queue families.

Signed-off-by: Alex Bates <alex.bates@arm.com>
Change-Id: I5becb29dfc4a082e301031e0c693acd23eb95a51
This commit is contained in:
Alex Bates 2025-10-06 10:27:34 +00:00 committed by Rosen Zhelev
parent e1d77594fb
commit ace729f7f8
14 changed files with 367 additions and 111 deletions

View file

@ -40,6 +40,7 @@
#include "util/extension_list.hpp" #include "util/extension_list.hpp"
#include "util/custom_allocator.hpp" #include "util/custom_allocator.hpp"
#include "wsi/wsi_factory.hpp" #include "wsi/wsi_factory.hpp"
#include "wsi/extensions/present_timing.hpp"
#include "util/log.hpp" #include "util/log.hpp"
#include "util/macros.hpp" #include "util/macros.hpp"
#include "util/helpers.hpp" #include "util/helpers.hpp"
@ -558,16 +559,15 @@ wsi_layer_vkGetPhysicalDeviceFeatures2(VkPhysicalDevice physical_device,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_TIMING_FEATURES_EXT, pFeatures->pNext); VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_TIMING_FEATURES_EXT, pFeatures->pNext);
if (present_timing_features != nullptr) if (present_timing_features != nullptr)
{ {
VkPhysicalDeviceProperties2KHR physical_device_properties{}; bool support;
physical_device_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; if (wsi::wsi_ext_present_timing::physical_device_has_supported_queue_family(physical_device, support) !=
instance.disp.GetPhysicalDeviceProperties2KHR(physical_device, &physical_device_properties); VK_SUCCESS)
/* The presentTimingSupported is set based on whether the device can support timestamp queries {
* and the graphics, compute pipelines can support time stamps. Only the graphics and compute pipelines WSI_LOG_ERROR("Failed to query physical device for present timing support");
* are checked here which means queue present if happens on a different queue family, support = false;
* the time stamps might not be supported. */ }
present_timing_features->presentTiming =
((physical_device_properties.properties.limits.timestampPeriod != 0) && present_timing_features->presentTiming = support ? VK_TRUE : VK_FALSE;
physical_device_properties.properties.limits.timestampComputeAndGraphics);
present_timing_features->presentAtAbsoluteTime = VK_TRUE; present_timing_features->presentAtAbsoluteTime = VK_TRUE;
present_timing_features->presentAtRelativeTime = VK_TRUE; present_timing_features->presentAtRelativeTime = VK_TRUE;
} }

View file

@ -25,6 +25,7 @@
#include <vulkan/vulkan.h> #include <vulkan/vulkan.h>
#include "private_data.hpp" #include "private_data.hpp"
#include "vulkan/vulkan_core.h"
#include "wsi/wsi_factory.hpp" #include "wsi/wsi_factory.hpp"
#include "wsi/surface.hpp" #include "wsi/surface.hpp"
#include "wsi/unsupported_surfaces.hpp" #include "wsi/unsupported_surfaces.hpp"
@ -477,10 +478,64 @@ device_private_data::device_private_data(instance_private_data &inst_data, VkPhy
, present_wait2_enabled { false } , present_wait2_enabled { false }
, present_id2_enabled { false } , present_id2_enabled { false }
, present_mode_fifo_latest_ready_enabled { false } , present_mode_fifo_latest_ready_enabled { false }
, best_queue_family_index(instance_data.get_best_queue_family(phys_dev))
/* clang-format on */ /* clang-format on */
{ {
} }
util::vector<VkQueueFamilyProperties2> instance_private_data::get_queue_family_properties(VkPhysicalDevice phys_dev)
{
uint32_t count = 0;
disp.GetPhysicalDeviceQueueFamilyProperties2KHR(phys_dev, &count, nullptr);
assert(count > 0);
util::vector<VkQueueFamilyProperties2> properties(allocator);
if (!properties.try_resize(count))
{
WSI_LOG_ERROR("Failed to allocate VkQueueFamilyProperties2[%u]", count);
return properties;
}
for (size_t i = 0; i < count; ++i)
{
properties[i].sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2;
properties[i].pNext = nullptr;
}
disp.GetPhysicalDeviceQueueFamilyProperties2KHR(phys_dev, &count, properties.data());
return properties;
}
uint32_t instance_private_data::get_best_queue_family(VkPhysicalDevice phys_dev)
{
const auto families = get_queue_family_properties(phys_dev);
if (families.empty())
{
/* Allocation failed. 0 is a valid return value as there must be at least one queue family. */
return 0;
}
uint32_t best_score = 0;
uint32_t best_timestamp_bits = 0; /* Tiebreaker for same score */
uint32_t best_index = 0;
for (uint32_t i = 0; i < families.size(); ++i)
{
const auto &props = families[i].queueFamilyProperties;
/* Prefer graphics + compute, then graphics, then compute */
VkQueueFlags mask = props.queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT);
uint32_t score = (mask & VK_QUEUE_GRAPHICS_BIT ? 2 : 0) + (mask & VK_QUEUE_COMPUTE_BIT ? 1 : 0);
if (score > best_score || (score == best_score && props.timestampValidBits > best_timestamp_bits))
{
best_score = score;
best_timestamp_bits = props.timestampValidBits;
best_index = i;
}
}
return best_index;
}
VkResult device_private_data::associate(VkDevice dev, instance_private_data &inst_data, VkPhysicalDevice phys_dev, VkResult device_private_data::associate(VkDevice dev, instance_private_data &inst_data, VkPhysicalDevice phys_dev,
device_dispatch_table table, PFN_vkSetDeviceLoaderData set_loader_data, device_dispatch_table table, PFN_vkSetDeviceLoaderData set_loader_data,
const util::allocator &allocator) const util::allocator &allocator)

View file

@ -666,12 +666,37 @@ public:
* @brief Check if a physical device supports controlling image compression. * @brief Check if a physical device supports controlling image compression.
* *
* @param phys_dev The physical device to query. * @param phys_dev The physical device to query.
*
* @return Whether image compression control is supported by the ICD. * @return Whether image compression control is supported by the ICD.
*/ */
bool has_image_compression_support(VkPhysicalDevice phys_dev); bool has_image_compression_support(VkPhysicalDevice phys_dev);
bool has_frame_boundary_support(VkPhysicalDevice phys_dev); bool has_frame_boundary_support(VkPhysicalDevice phys_dev);
/**
* @brief Queries the properties of all queue families of a physical device.
*
* @param phys_dev The physical device to query.
*
* @return A vector of VkQueueFamilyProperties2 chains. If it is empty, allocation failed.
*/
util::vector<VkQueueFamilyProperties2> get_queue_family_properties(VkPhysicalDevice phys_dev);
/**
* @brief Gets the index of the 'best' queue family.
*
* Queries queue family properties and returns the index of the family that:
* - Supports graphics and compute; or
* - Supports graphics; or
* - Supports compute
* And, as a tiebreaker, has the largest timestampValidBits.
*
* @param phys_dev The physical device to query.
*
* @return The index of the best queue family.
*/
uint32_t get_best_queue_family(VkPhysicalDevice phys_dev);
/** /**
* @brief Get the instance allocator * @brief Get the instance allocator
* *
@ -995,6 +1020,14 @@ public:
*/ */
bool is_present_wait2_enabled(); bool is_present_wait2_enabled();
/**
* @brief Gets the queue family index used for present timing on this device.
*/
uint32_t get_best_queue_family_index() const
{
return best_queue_family_index;
}
private: private:
/* Allow util::allocator to access the private constructor */ /* Allow util::allocator to access the private constructor */
friend util::allocator; friend util::allocator;
@ -1082,6 +1115,11 @@ private:
* *
*/ */
bool present_mode_fifo_latest_ready_enabled{ false }; bool present_mode_fifo_latest_ready_enabled{ false };
/**
* @brief Most suitable queue family for WSI operations.
*/
uint32_t best_queue_family_index;
}; };
} /* namespace layer */ } /* namespace layer */

View file

@ -156,13 +156,14 @@ wsi_layer_vkGetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice,
VkSurfaceKHR surface, VkBool32 *pSupported) VWL_API_POST VkSurfaceKHR surface, VkBool32 *pSupported) VWL_API_POST
{ {
auto &instance = layer::instance_private_data::get(physicalDevice); auto &instance = layer::instance_private_data::get(physicalDevice);
if (instance.should_layer_handle_surface(physicalDevice, surface)) if (!instance.should_layer_handle_surface(physicalDevice, surface))
{ {
*pSupported = VK_TRUE; /* The surface must have been created by a layer below us. */
return VK_SUCCESS; return instance.disp.GetPhysicalDeviceSurfaceSupportKHR(physicalDevice, queueFamilyIndex, surface, pSupported);
} }
return instance.disp.GetPhysicalDeviceSurfaceSupportKHR(physicalDevice, queueFamilyIndex, surface, pSupported); *pSupported = queueFamilyIndex == instance.get_best_queue_family(physicalDevice);
return VK_SUCCESS;
} }
VWL_VKAPI_CALL(void) VWL_VKAPI_CALL(void)

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2020-2022, 2024 Arm Limited. * Copyright (c) 2020-2022, 2024-2025 Arm Limited.
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -302,6 +302,20 @@ public:
using base = std::vector<T, custom_allocator<T>>; using base = std::vector<T, custom_allocator<T>>;
using base::base; using base::base;
/** Must be constructed with a custom_allocator. */
vector() = delete;
/* Support moves despite noncopyable */
vector(vector &&other) noexcept
: base(std::move(other))
{
}
vector &operator=(vector &&other) noexcept
{
base::operator=(std::move(other));
return *this;
}
/* Delete all methods that can cause allocation failure, i.e. can throw std::bad_alloc. /* Delete all methods that can cause allocation failure, i.e. can throw std::bad_alloc.
* *
* Rationale: we want to force users to use our corresponding try_... method instead: * Rationale: we want to force users to use our corresponding try_... method instead:
@ -393,6 +407,22 @@ public:
return false; return false;
} }
} }
/**
* @brief noexcept version of std::vector::at.
*
* @param index The index to access.
*
* @return Pointer to the element at the given index. nullptr if the index is out of bounds.
*/
T *try_at(size_t index) noexcept
{
if (index >= base::size())
{
return nullptr;
}
return &base::operator[](index);
}
}; };
} /* namespace util */ } /* namespace util */

View file

@ -29,6 +29,7 @@
*/ */
#include "custom_mutex.hpp" #include "custom_mutex.hpp"
#include <cstring>
namespace util namespace util
{ {
@ -42,6 +43,7 @@ bool mutex::lock() noexcept
} }
catch (const std::system_error &) catch (const std::system_error &)
{ {
WSI_LOG_WARNING("Failed to lock mutex: error %d (%s)", errno, std::strerror(errno));
return false; return false;
} }
} }
@ -77,6 +79,7 @@ bool recursive_mutex::lock() noexcept
} }
catch (const std::system_error &) catch (const std::system_error &)
{ {
WSI_LOG_WARNING("Failed to lock recursive_mutex: error %d (%s)", errno, std::strerror(errno));
return false; return false;
} }
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021-2022, 2024 Arm Limited. * Copyright (c) 2021-2022, 2024-2025 Arm Limited.
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -46,6 +46,9 @@ class unordered_map : public std::unordered_map<Key, Value, Hash, Comparator, Al
using iterator = typename base::iterator; using iterator = typename base::iterator;
public: public:
/** Must be constructed with a custom_allocator. */
unordered_map() = delete;
/** /**
* Delete all member functions that can cause allocation failure by throwing std::bad_alloc. * Delete all member functions that can cause allocation failure by throwing std::bad_alloc.
*/ */
@ -88,6 +91,27 @@ public:
} }
} }
/**
* @brief Like std::unordered_map.try_emplace but doesn't throw on out of memory errors.
*
* @param args The arguments to be forwarded to the constructor of the value type.
* @return std::pair<iterator, bool> If successful, the pair will contain
* the same return value as from std::unordered_map.emplace, otherwise
* if out of memory, the function returns { base::end(), false }.
*/
template <class... Args>
std::pair<iterator, bool> try_emplace(Args &&...args)
{
try
{
return base::try_emplace(std::forward<Args>(args)...);
}
catch (std::bad_alloc &e)
{
return { base::end(), false };
}
}
/** /**
* @brief Like std::unordered_map.reserve but doesn't throw on out of memory errors. * @brief Like std::unordered_map.reserve but doesn't throw on out of memory errors.
* *

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021-2022, 2024 Arm Limited. * Copyright (c) 2021-2022, 2024-2025 Arm Limited.
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -47,6 +47,9 @@ class unordered_set : public std::unordered_set<Key, Hash, Comparator, Allocator
using iterator = typename base::iterator; using iterator = typename base::iterator;
public: public:
/** Must be constructed with a custom_allocator. */
unordered_set() = delete;
/** /**
* Delete all member functions that can cause allocation failure by throwing std::bad_alloc. * Delete all member functions that can cause allocation failure by throwing std::bad_alloc.
*/ */

View file

@ -53,16 +53,14 @@ wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator,
: m_allocator(allocator) : m_allocator(allocator)
, m_time_domains(allocator) , m_time_domains(allocator)
, m_device(layer::device_private_data::get(device)) , m_device(layer::device_private_data::get(device))
, m_query_pool(VK_NULL_HANDLE) , m_queue(allocator)
, m_command_pool(VK_NULL_HANDLE)
, m_command_buffer(allocator)
, m_device_timestamp_cached(allocator) , m_device_timestamp_cached(allocator)
, m_queue_mutex() , m_queue_mutex()
, m_queue(allocator)
, m_scheduled_present_targets(allocator) , m_scheduled_present_targets(allocator)
, m_num_images(num_images) , m_num_images(num_images)
, m_present_semaphore(allocator) , m_present_semaphore(allocator)
, m_timestamp_period(0.f) , m_timestamp_period(0.f)
, m_queue_family_resources(allocator, m_device)
{ {
if (!m_device.is_present_id_enabled()) if (!m_device.is_present_id_enabled())
{ {
@ -79,22 +77,6 @@ wsi_ext_present_timing::wsi_ext_present_timing(const util::allocator &allocator,
wsi_ext_present_timing::~wsi_ext_present_timing() wsi_ext_present_timing::~wsi_ext_present_timing()
{ {
m_device.disp.FreeCommandBuffers(m_device.device, m_command_pool, m_command_buffer.size(), m_command_buffer.data());
for (auto &command_buffer : m_command_buffer)
{
command_buffer = VK_NULL_HANDLE;
}
if (m_command_pool != VK_NULL_HANDLE)
{
m_device.disp.DestroyCommandPool(m_device.device, m_command_pool, m_allocator.get_original_callbacks());
m_command_pool = VK_NULL_HANDLE;
}
if (m_query_pool != VK_NULL_HANDLE)
{
m_device.disp.DestroyQueryPool(m_device.device, m_query_pool, m_allocator.get_original_callbacks());
m_query_pool = VK_NULL_HANDLE;
}
for (const auto &semaphore : m_present_semaphore) for (const auto &semaphore : m_present_semaphore)
{ {
if (semaphore != VK_NULL_HANDLE) if (semaphore != VK_NULL_HANDLE)
@ -110,7 +92,6 @@ VkResult wsi_ext_present_timing::init_timing_resources()
{ {
return VK_ERROR_OUT_OF_HOST_MEMORY; return VK_ERROR_OUT_OF_HOST_MEMORY;
} }
if (!m_present_semaphore.try_resize(m_num_images)) if (!m_present_semaphore.try_resize(m_num_images))
{ {
return VK_ERROR_OUT_OF_HOST_MEMORY; return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -126,43 +107,7 @@ VkResult wsi_ext_present_timing::init_timing_resources()
return VK_ERROR_OUT_OF_HOST_MEMORY; return VK_ERROR_OUT_OF_HOST_MEMORY;
} }
} }
/* Resize the command buffer to the number of images. */ TRY_LOG_CALL(m_queue_family_resources.init(m_device.get_best_queue_family_index(), m_num_images));
if (!m_command_buffer.try_resize(m_num_images))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
/* Resize cached device timestamp records to the number of images. */
if (!m_device_timestamp_cached.try_resize(m_num_images, 0ULL))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto &command_buffer : m_command_buffer)
{
command_buffer = VK_NULL_HANDLE;
}
/* Allocate the command pool and query pool. */
VkQueryPoolCreateInfo query_pool_info = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, m_num_images, 0
};
TRY_LOG_CALL(m_device.disp.CreateQueryPool(m_device.device, &query_pool_info, m_allocator.get_original_callbacks(),
&m_query_pool));
VkCommandPoolCreateInfo command_pool_info{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 0 };
TRY_LOG_CALL(m_device.disp.CreateCommandPool(m_device.device, &command_pool_info,
m_allocator.get_original_callbacks(), &m_command_pool));
/* Allocate and write the command buffer. */
VkCommandBufferAllocateInfo command_buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
m_command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, m_num_images };
TRY_LOG_CALL(m_device.disp.AllocateCommandBuffers(m_device.device, &command_buffer_info, m_command_buffer.data()));
VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, 0, nullptr };
for (size_t image_index = 0; image_index < m_num_images; image_index++)
{
TRY_LOG_CALL(m_device.disp.BeginCommandBuffer(m_command_buffer[image_index], &begin_info));
m_device.disp.CmdResetQueryPool(m_command_buffer[image_index], m_query_pool, image_index, 1);
m_device.disp.CmdWriteTimestamp(m_command_buffer[image_index], VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_query_pool,
image_index);
TRY_LOG_CALL(m_device.disp.EndCommandBuffer(m_command_buffer[image_index]));
}
return VK_SUCCESS; return VK_SUCCESS;
} }
@ -185,12 +130,22 @@ static inline uint64_t ticks_to_ns(uint64_t ticks, const float &timestamp_period
swapchain_presentation_timing *wsi_ext_present_timing::get_pending_stage_timing(uint32_t image_index, swapchain_presentation_timing *wsi_ext_present_timing::get_pending_stage_timing(uint32_t image_index,
VkPresentStageFlagBitsEXT stage) VkPresentStageFlagBitsEXT stage)
{
if (auto *entry = get_pending_stage_entry(image_index, stage))
{
return &entry->get_stage_timing(stage)->get();
}
return nullptr;
}
swapchain_presentation_entry *wsi_ext_present_timing::get_pending_stage_entry(uint32_t image_index,
VkPresentStageFlagBitsEXT stage)
{ {
for (auto &entry : m_queue) for (auto &entry : m_queue)
{ {
if (entry.m_image_index == image_index && entry.is_pending(stage)) if (entry.m_image_index == image_index && entry.is_pending(stage))
{ {
return &entry.get_stage_timing(stage)->get(); return &entry;
} }
} }
return nullptr; return nullptr;
@ -202,9 +157,16 @@ VkResult wsi_ext_present_timing::write_pending_results()
{ {
if (slot.is_pending(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)) if (slot.is_pending(VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT))
{ {
/* Resize cached device timestamp records to the number of images. */
if (!m_device_timestamp_cached.try_resize(m_num_images, 0ULL))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
uint64_t timestamp; uint64_t timestamp;
VkResult res = m_device.disp.GetQueryPoolResults(m_device.device, m_query_pool, slot.m_image_index, 1, VkResult res = m_device.disp.GetQueryPoolResults(m_device.device, m_queue_family_resources.m_query_pool,
sizeof(timestamp), &timestamp, 0, VK_QUERY_RESULT_64_BIT); slot.m_image_index, 1, sizeof(timestamp), &timestamp, 0,
VK_QUERY_RESULT_64_BIT);
if (res != VK_SUCCESS && res != VK_NOT_READY) if (res != VK_SUCCESS && res != VK_NOT_READY)
{ {
return res; return res;
@ -281,8 +243,8 @@ size_t wsi_ext_present_timing::present_timing_get_num_outstanding_results()
VkResult wsi_ext_present_timing::queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue, VkResult wsi_ext_present_timing::queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue,
uint32_t image_index) uint32_t image_index)
{ {
assert(image_index < m_command_buffer.size()); assert(image_index < m_queue_family_resources.m_command_buffer.size());
command_buffer_data command_buffer_data(&m_command_buffer[image_index], 1); command_buffer_data command_buffer_data(&m_queue_family_resources.m_command_buffer[image_index], 1);
VkSemaphore present_timing_semaphore = get_image_present_semaphore(image_index); VkSemaphore present_timing_semaphore = get_image_present_semaphore(image_index);
queue_submit_semaphores present_timing_semaphores = { queue_submit_semaphores present_timing_semaphores = {
&present_timing_semaphore, &present_timing_semaphore,
@ -310,8 +272,8 @@ VkResult wsi_ext_present_timing::add_presentation_query_entry(VkQueue queue, uin
{ {
return VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT; return VK_ERROR_PRESENT_TIMING_QUEUE_FULL_EXT;
} }
wsi::swapchain_presentation_entry presentation_entry(target_time, present_stage_queries, present_id, image_index,
wsi::swapchain_presentation_entry presentation_entry(target_time, present_stage_queries, present_id, image_index); m_device.get_best_queue_family_index());
if (!m_queue.try_push_back(std::move(presentation_entry))) if (!m_queue.try_push_back(std::move(presentation_entry)))
{ {
return VK_ERROR_OUT_OF_HOST_MEMORY; return VK_ERROR_OUT_OF_HOST_MEMORY;
@ -495,14 +457,33 @@ bool wsi_ext_present_timing::is_stage_pending_for_image_index(uint32_t image_ind
return (get_pending_stage_timing(image_index, present_stage) != nullptr); return (get_pending_stage_timing(image_index, present_stage) != nullptr);
} }
VkResult wsi_ext_present_timing::physical_device_has_supported_queue_family(VkPhysicalDevice physical_device, bool &out)
{
auto &instance = layer::instance_private_data::get(physical_device);
const auto all_props = instance.get_queue_family_properties(physical_device);
if (all_props.empty())
{
out = false;
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
out = std::any_of(all_props.begin(), all_props.end(), [](const VkQueueFamilyProperties2 &props) {
return (props.queueFamilyProperties.queueFlags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)) &&
(props.queueFamilyProperties.timestampValidBits > 0);
});
return VK_SUCCESS;
}
swapchain_presentation_entry::swapchain_presentation_entry(uint64_t target_time, swapchain_presentation_entry::swapchain_presentation_entry(uint64_t target_time,
VkPresentStageFlagsEXT present_stage_queries, VkPresentStageFlagsEXT present_stage_queries,
uint64_t present_id, uint32_t image_index) uint64_t present_id, uint32_t image_index,
uint32_t queue_family)
: m_target_time(target_time) : m_target_time(target_time)
, m_target_stages(0) , m_target_stages(0)
, m_present_id(present_id) , m_present_id(present_id)
, m_image_index(image_index) , m_image_index(image_index)
, m_num_present_stages(0) , m_num_present_stages(0)
, m_queue_family(queue_family)
{ {
if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT) if (present_stage_queries & VK_PRESENT_STAGE_QUEUE_OPERATIONS_END_BIT_EXT)
{ {

View file

@ -33,6 +33,7 @@
#include <layer/wsi_layer_experimental.hpp> #include <layer/wsi_layer_experimental.hpp>
#include <layer/private_data.hpp> #include <layer/private_data.hpp>
#include <wsi/swapchain_base.hpp>
#include <util/custom_allocator.hpp> #include <util/custom_allocator.hpp>
#include <util/custom_mutex.hpp> #include <util/custom_mutex.hpp>
#include <util/macros.hpp> #include <util/macros.hpp>
@ -126,6 +127,18 @@ struct swapchain_presentation_entry
*/ */
size_t m_num_present_stages; size_t m_num_present_stages;
/**
* The queue family used to submit synchronization commands for this entry.
*/
uint32_t m_queue_family{ 0 };
/**
* When serving a get past presentation timings request, this field
* keeps the status of whether the slot had already been copied to
* the results.
*/
bool copied;
/** /**
* The variables to keep timing stages. * The variables to keep timing stages.
*/ */
@ -135,7 +148,7 @@ struct swapchain_presentation_entry
std::optional<swapchain_presentation_timing> m_first_pixel_visible_timing; std::optional<swapchain_presentation_timing> m_first_pixel_visible_timing;
swapchain_presentation_entry(uint64_t target_time, VkPresentStageFlagsEXT present_stage_queries, uint64_t present_id, swapchain_presentation_entry(uint64_t target_time, VkPresentStageFlagsEXT present_stage_queries, uint64_t present_id,
uint32_t image_index); uint32_t image_index, uint32_t queue_family);
swapchain_presentation_entry(swapchain_presentation_entry &&) noexcept = default; swapchain_presentation_entry(swapchain_presentation_entry &&) noexcept = default;
swapchain_presentation_entry &operator=(swapchain_presentation_entry &&) noexcept = default; swapchain_presentation_entry &operator=(swapchain_presentation_entry &&) noexcept = default;
@ -293,6 +306,97 @@ private:
util::vector<util::unique_ptr<swapchain_time_domain>> m_time_domains; util::vector<util::unique_ptr<swapchain_time_domain>> m_time_domains;
}; };
/**
* @brief Resources specific to a particular queue family index.
*/
class queue_family_resources
{
public:
queue_family_resources(util::allocator allocator, layer::device_private_data &device)
: m_command_pool(VK_NULL_HANDLE)
, m_command_buffer(allocator)
, m_query_pool(VK_NULL_HANDLE)
, m_allocator(allocator)
, m_device(device)
{
}
~queue_family_resources()
{
if (m_command_pool != VK_NULL_HANDLE)
{
m_device.disp.FreeCommandBuffers(m_device.device, m_command_pool, m_command_buffer.size(),
m_command_buffer.data());
m_device.disp.DestroyCommandPool(m_device.device, m_command_pool, m_allocator.get_original_callbacks());
m_command_pool = VK_NULL_HANDLE;
}
if (m_query_pool != VK_NULL_HANDLE)
{
m_device.disp.DestroyQueryPool(m_device.device, m_query_pool, m_allocator.get_original_callbacks());
m_query_pool = VK_NULL_HANDLE;
}
}
VkResult init(uint32_t queue_family_index, uint32_t num_images)
{
/* Resize the command buffer to the number of images. */
if (!m_command_buffer.try_resize(num_images))
{
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
for (auto &command_buffer : m_command_buffer)
{
command_buffer = VK_NULL_HANDLE;
}
/* Allocate the command pool and query pool. */
VkQueryPoolCreateInfo query_pool_info = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, nullptr, 0, VK_QUERY_TYPE_TIMESTAMP, num_images, 0
};
TRY_LOG_CALL(m_device.disp.CreateQueryPool(m_device.device, &query_pool_info,
m_allocator.get_original_callbacks(), &m_query_pool));
VkCommandPoolCreateInfo command_pool_info{ VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, nullptr,
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queue_family_index };
TRY_LOG_CALL(m_device.disp.CreateCommandPool(m_device.device, &command_pool_info,
m_allocator.get_original_callbacks(), &m_command_pool));
/* Allocate and write the command buffer. */
VkCommandBufferAllocateInfo command_buffer_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, nullptr,
m_command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, num_images };
TRY_LOG_CALL(
m_device.disp.AllocateCommandBuffers(m_device.device, &command_buffer_info, m_command_buffer.data()));
VkCommandBufferBeginInfo begin_info = { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, nullptr, 0, nullptr };
for (size_t image_index = 0; image_index < num_images; image_index++)
{
TRY_LOG_CALL(m_device.disp.BeginCommandBuffer(m_command_buffer[image_index], &begin_info));
m_device.disp.CmdResetQueryPool(m_command_buffer[image_index], m_query_pool, image_index, 1);
m_device.disp.CmdWriteTimestamp(m_command_buffer[image_index], VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
m_query_pool, image_index);
TRY_LOG_CALL(m_device.disp.EndCommandBuffer(m_command_buffer[image_index]));
}
return VK_SUCCESS;
}
/**
* @brief The command pool for allocating the buffers for the present stage timings.
*/
VkCommandPool m_command_pool;
/**
* @brief The command buffer for the present stage timings.
*/
util::vector<VkCommandBuffer> m_command_buffer;
/**
* @brief Query pool to allocate for present stage timing queries.
*/
VkQueryPool m_query_pool;
private:
util::allocator m_allocator;
layer::device_private_data &m_device;
};
/** /**
* @brief Structure describing a scheduled present target. * @brief Structure describing a scheduled present target.
*/ */
@ -350,9 +454,9 @@ public:
/** /**
* @brief Constructor for the wsi_ext_present_timing class. * @brief Constructor for the wsi_ext_present_timing class.
* *
* @param allocator Reference to the custom allocator. * @param allocator Reference to the custom allocator.
* @param device The device to which the swapchain belongs. * @param device The device to which the swapchain belongs.
* @param num_images Number of images in the swapchain. * @param num_images Number of images in the swapchain.
* *
*/ */
wsi_ext_present_timing(const util::allocator &allocator, VkDevice device, uint32_t num_images); wsi_ext_present_timing(const util::allocator &allocator, VkDevice device, uint32_t num_images);
@ -504,6 +608,17 @@ public:
virtual VkResult get_pixel_out_timing_to_queue( virtual VkResult get_pixel_out_timing_to_queue(
uint32_t image_index, std::optional<std::reference_wrapper<swapchain_presentation_timing>> stage_timing_optional); uint32_t image_index, std::optional<std::reference_wrapper<swapchain_presentation_timing>> stage_timing_optional);
/**
* @brief Determines if present_timing is supported by the physical device.
*
* @param physical_device The physical device to check.
* @param out Reference to a boolean that will be set to true if the physical device has a queue family that supports
* presentation and timestamp queries, false otherwise.
*
* @return VK_SUCCESS iff out set.
*/
static VkResult physical_device_has_supported_queue_family(VkPhysicalDevice physical_device, bool &out);
protected: protected:
/** /**
* @brief User provided memory allocation callbacks. * @brief User provided memory allocation callbacks.
@ -522,19 +637,9 @@ private:
layer::device_private_data &m_device; layer::device_private_data &m_device;
/** /**
* @brief Query pool to allocate for present stage timing queries. * @brief The presentation timing queue.
*/ */
VkQueryPool m_query_pool; util::vector<swapchain_presentation_entry> m_queue;
/**
* @brief The command pool for allocating the buffers for the present stage timings.
*/
VkCommandPool m_command_pool;
/**
* @brief The command buffer for the present stage timings.
*/
util::vector<VkCommandBuffer> m_command_buffer;
/** /**
* @brief Stores the device timestamp recorded from the previous * @brief Stores the device timestamp recorded from the previous
@ -552,11 +657,6 @@ private:
*/ */
util::mutex m_queue_mutex; util::mutex m_queue_mutex;
/**
* @brief The presentation timing queue.
*/
util::vector<swapchain_presentation_entry> m_queue;
/** /**
* @brief The presentation target entries. * @brief The presentation target entries.
*/ */
@ -577,6 +677,11 @@ private:
*/ */
float m_timestamp_period; float m_timestamp_period;
/**
* @brief Resources associated with the 'best' queue family.
*/
queue_family_resources m_queue_family_resources;
/** /**
* @brief Perform a queue submission for getting the queue end timing. * @brief Perform a queue submission for getting the queue end timing.
* *
@ -584,7 +689,7 @@ private:
* @param queue The Vulkan queue used to submit synchronization commands. * @param queue The Vulkan queue used to submit synchronization commands.
* @param image_index The index of the image in the swapchain. * @param image_index The index of the image in the swapchain.
* *
* @return VK_SUCCESS when the submission is successfully and error otherwise. * @return VK_SUCCESS when the submission is successful and error otherwise.
*/ */
VkResult queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue, VkResult queue_submit_queue_end_timing(const layer::device_private_data &device, VkQueue queue,
uint32_t image_index); uint32_t image_index);
@ -616,6 +721,22 @@ private:
*/ */
swapchain_presentation_timing *get_pending_stage_timing(uint32_t image_index, VkPresentStageFlagBitsEXT stage); swapchain_presentation_timing *get_pending_stage_timing(uint32_t image_index, VkPresentStageFlagBitsEXT stage);
/**
* @pre Caller must hold m_queue_mutex for the call and lifetime of the returned pointer.
*
* @brief Search for a pending presentation entry.
*
* For an image index, there can only be one entry in the queue with pending stages.
* This does not take a present ID because zero is a valid, nonunique value and thus cannot uniquely identify an
* entry.
*
* @param image_index The index of the image in the present queue.
* @param stage The present stage to get the entry for.
*
* @return Pointer to the entry, or nullptr if it is not found or the stage is not pending.
*/
swapchain_presentation_entry *get_pending_stage_entry(uint32_t image_index, VkPresentStageFlagBitsEXT stage);
/** /**
* @pre Caller must hold m_queue_mutex * @pre Caller must hold m_queue_mutex
* *

View file

@ -43,9 +43,8 @@ wsi_ext_present_timing_headless::wsi_ext_present_timing_headless(const util::all
} }
util::unique_ptr<wsi_ext_present_timing_headless> wsi_ext_present_timing_headless::create( util::unique_ptr<wsi_ext_present_timing_headless> wsi_ext_present_timing_headless::create(
const VkDevice &device, const util::allocator &allocator, uint32_t num_images) const util::allocator &allocator, const VkDevice &device, uint32_t num_images)
{ {
auto &dev_data = layer::device_private_data::get(device); auto &dev_data = layer::device_private_data::get(device);
/* /*

View file

@ -32,6 +32,7 @@
#if VULKAN_WSI_LAYER_EXPERIMENTAL #if VULKAN_WSI_LAYER_EXPERIMENTAL
#include <wsi/extensions/present_timing.hpp> #include <wsi/extensions/present_timing.hpp>
#include <wsi/swapchain_base.hpp>
/** /**
* @brief Present timing extension class * @brief Present timing extension class
@ -41,9 +42,8 @@
class wsi_ext_present_timing_headless : public wsi::wsi_ext_present_timing class wsi_ext_present_timing_headless : public wsi::wsi_ext_present_timing
{ {
public: public:
static util::unique_ptr<wsi_ext_present_timing_headless> create(const VkDevice &device, static util::unique_ptr<wsi_ext_present_timing_headless> create(const util::allocator &allocator,
const util::allocator &allocator, const VkDevice &device, uint32_t num_images);
uint32_t num_images);
VkResult get_swapchain_timing_properties(uint64_t &timing_properties_counter, VkResult get_swapchain_timing_properties(uint64_t &timing_properties_counter,
VkSwapchainTimingPropertiesEXT &timing_properties) override; VkSwapchainTimingPropertiesEXT &timing_properties) override;

View file

@ -102,7 +102,7 @@ VkResult swapchain::add_required_extensions(VkDevice device, const VkSwapchainCr
if (swapchain_support_enabled) if (swapchain_support_enabled)
{ {
if (!add_swapchain_extension( if (!add_swapchain_extension(
wsi_ext_present_timing_headless::create(device, m_allocator, swapchain_create_info->minImageCount))) wsi_ext_present_timing_headless::create(m_allocator, device, swapchain_create_info->minImageCount)))
{ {
return VK_ERROR_OUT_OF_HOST_MEMORY; return VK_ERROR_OUT_OF_HOST_MEMORY;
} }

View file

@ -135,6 +135,7 @@ enum signal_semaphores_type
#endif #endif
SIGNAL_SEMAPHORE_MAX_NUM, SIGNAL_SEMAPHORE_MAX_NUM,
}; };
/** /**
* @brief Base swapchain class * @brief Base swapchain class
* *