mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 05:00:09 +01:00
vulkan/overlay: add pipeline statistic & timestamps support
v2: switch to VkBase{In,Out}Structure
v3: Add timestamps at begin/end of primary command buffers to estimate
gpu time spent per submission (Lionel)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Acked-by: Eric Engestrom <eric.engestrom@intel.com> (v2)
This commit is contained in:
parent
4438188f49
commit
ea7a6fa980
2 changed files with 382 additions and 54 deletions
|
|
@ -34,6 +34,7 @@
|
||||||
|
|
||||||
#include "util/debug.h"
|
#include "util/debug.h"
|
||||||
#include "util/hash_table.h"
|
#include "util/hash_table.h"
|
||||||
|
#include "util/list.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
#include "util/os_time.h"
|
#include "util/os_time.h"
|
||||||
#include "util/simple_mtx.h"
|
#include "util/simple_mtx.h"
|
||||||
|
|
@ -47,6 +48,7 @@ struct instance_data {
|
||||||
VkInstance instance;
|
VkInstance instance;
|
||||||
|
|
||||||
struct overlay_params params;
|
struct overlay_params params;
|
||||||
|
bool pipeline_statistics_enabled;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct frame_stat {
|
struct frame_stat {
|
||||||
|
|
@ -83,9 +85,12 @@ struct command_buffer_data {
|
||||||
|
|
||||||
VkCommandBuffer cmd_buffer;
|
VkCommandBuffer cmd_buffer;
|
||||||
VkQueryPool pipeline_query_pool;
|
VkQueryPool pipeline_query_pool;
|
||||||
|
VkQueryPool timestamp_query_pool;
|
||||||
uint32_t query_index;
|
uint32_t query_index;
|
||||||
|
|
||||||
struct frame_stat stats;
|
struct frame_stat stats;
|
||||||
|
|
||||||
|
struct list_head link; /* link into queue_data::running_command_buffer */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Mapped from VkQueue */
|
/* Mapped from VkQueue */
|
||||||
|
|
@ -95,6 +100,11 @@ struct queue_data {
|
||||||
VkQueue queue;
|
VkQueue queue;
|
||||||
VkQueueFlags flags;
|
VkQueueFlags flags;
|
||||||
uint32_t family_index;
|
uint32_t family_index;
|
||||||
|
uint64_t timestamp_mask;
|
||||||
|
|
||||||
|
VkFence queries_fence;
|
||||||
|
|
||||||
|
struct list_head running_command_buffer;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Mapped from VkSwapchainKHR */
|
/* Mapped from VkSwapchainKHR */
|
||||||
|
|
@ -142,7 +152,6 @@ struct swapchain_data {
|
||||||
VkBuffer upload_font_buffer;
|
VkBuffer upload_font_buffer;
|
||||||
VkDeviceMemory upload_font_buffer_mem;
|
VkDeviceMemory upload_font_buffer_mem;
|
||||||
|
|
||||||
VkFence fence;
|
|
||||||
VkSemaphore submission_semaphore;
|
VkSemaphore submission_semaphore;
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
|
|
@ -158,6 +167,7 @@ struct swapchain_data {
|
||||||
double fps;
|
double fps;
|
||||||
|
|
||||||
enum overlay_param_enabled stat_selector;
|
enum overlay_param_enabled stat_selector;
|
||||||
|
double time_dividor;
|
||||||
struct frame_stat stats_min, stats_max;
|
struct frame_stat stats_min, stats_max;
|
||||||
struct frame_stat frames_stats[200];
|
struct frame_stat frames_stats[200];
|
||||||
|
|
||||||
|
|
@ -168,6 +178,20 @@ struct swapchain_data {
|
||||||
struct frame_stat accumulated_stats;
|
struct frame_stat accumulated_stats;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const VkQueryPipelineStatisticFlags overlay_query_flags =
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT |
|
||||||
|
VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
|
||||||
|
#define OVERLAY_QUERY_COUNT (11)
|
||||||
|
|
||||||
static struct hash_table *vk_object_to_data = NULL;
|
static struct hash_table *vk_object_to_data = NULL;
|
||||||
static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP;
|
static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP;
|
||||||
|
|
||||||
|
|
@ -251,8 +275,46 @@ static VkLayerDeviceCreateInfo *get_device_chain_info(const VkDeviceCreateInfo *
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct VkBaseOutStructure *
|
||||||
|
clone_chain(const struct VkBaseInStructure *chain)
|
||||||
|
{
|
||||||
|
struct VkBaseOutStructure *head = NULL, *tail = NULL;
|
||||||
|
|
||||||
|
vk_foreach_struct_const(item, chain) {
|
||||||
|
size_t item_size = vk_structure_type_size(item);
|
||||||
|
struct VkBaseOutStructure *new_item =
|
||||||
|
(struct VkBaseOutStructure *)malloc(item_size);;
|
||||||
|
|
||||||
|
memcpy(new_item, item, item_size);
|
||||||
|
|
||||||
|
if (!head)
|
||||||
|
head = new_item;
|
||||||
|
if (tail)
|
||||||
|
tail->pNext = new_item;
|
||||||
|
tail = new_item;
|
||||||
|
}
|
||||||
|
|
||||||
|
return head;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
free_chain(struct VkBaseOutStructure *chain)
|
||||||
|
{
|
||||||
|
while (chain) {
|
||||||
|
void *node = chain;
|
||||||
|
chain = chain->pNext;
|
||||||
|
free(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
|
|
||||||
|
static void check_vk_result(VkResult err)
|
||||||
|
{
|
||||||
|
if (err != VK_SUCCESS)
|
||||||
|
printf("ERROR!\n");
|
||||||
|
}
|
||||||
|
|
||||||
static struct instance_data *new_instance_data(VkInstance instance)
|
static struct instance_data *new_instance_data(VkInstance instance)
|
||||||
{
|
{
|
||||||
struct instance_data *data = rzalloc(NULL, struct instance_data);
|
struct instance_data *data = rzalloc(NULL, struct instance_data);
|
||||||
|
|
@ -311,15 +373,35 @@ static struct queue_data *new_queue_data(VkQueue queue,
|
||||||
data->device = device_data;
|
data->device = device_data;
|
||||||
data->queue = queue;
|
data->queue = queue;
|
||||||
data->flags = family_props->queueFlags;
|
data->flags = family_props->queueFlags;
|
||||||
|
data->timestamp_mask = (1ul << family_props->timestampValidBits) - 1;
|
||||||
data->family_index = family_index;
|
data->family_index = family_index;
|
||||||
|
LIST_INITHEAD(&data->running_command_buffer);
|
||||||
map_object(data->queue, data);
|
map_object(data->queue, data);
|
||||||
|
|
||||||
|
/* Fence synchronizing access to queries on that queue. */
|
||||||
|
VkFenceCreateInfo fence_info = {};
|
||||||
|
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||||
|
fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
|
||||||
|
VkResult err = device_data->vtable.CreateFence(device_data->device,
|
||||||
|
&fence_info,
|
||||||
|
NULL,
|
||||||
|
&data->queries_fence);
|
||||||
|
check_vk_result(err);
|
||||||
|
|
||||||
if (data->flags & VK_QUEUE_GRAPHICS_BIT)
|
if (data->flags & VK_QUEUE_GRAPHICS_BIT)
|
||||||
device_data->graphic_queue = data;
|
device_data->graphic_queue = data;
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void destroy_queue(struct queue_data *data)
|
||||||
|
{
|
||||||
|
struct device_data *device_data = data->device;
|
||||||
|
device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL);
|
||||||
|
unmap_object(data->queue);
|
||||||
|
ralloc_free(data);
|
||||||
|
}
|
||||||
|
|
||||||
static void device_map_queues(struct device_data *data,
|
static void device_map_queues(struct device_data *data,
|
||||||
const VkDeviceCreateInfo *pCreateInfo)
|
const VkDeviceCreateInfo *pCreateInfo)
|
||||||
{
|
{
|
||||||
|
|
@ -360,7 +442,7 @@ static void device_map_queues(struct device_data *data,
|
||||||
static void device_unmap_queues(struct device_data *data)
|
static void device_unmap_queues(struct device_data *data)
|
||||||
{
|
{
|
||||||
for (uint32_t i = 0; i < data->n_queues; i++)
|
for (uint32_t i = 0; i < data->n_queues; i++)
|
||||||
unmap_object(data->queues[i]->queue);
|
destroy_queue(data->queues[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void destroy_device_data(struct device_data *data)
|
static void destroy_device_data(struct device_data *data)
|
||||||
|
|
@ -372,12 +454,19 @@ static void destroy_device_data(struct device_data *data)
|
||||||
/**/
|
/**/
|
||||||
static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer,
|
static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_buffer,
|
||||||
VkCommandBufferLevel level,
|
VkCommandBufferLevel level,
|
||||||
|
VkQueryPool pipeline_query_pool,
|
||||||
|
VkQueryPool timestamp_query_pool,
|
||||||
|
uint32_t query_index,
|
||||||
struct device_data *device_data)
|
struct device_data *device_data)
|
||||||
{
|
{
|
||||||
struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data);
|
struct command_buffer_data *data = rzalloc(NULL, struct command_buffer_data);
|
||||||
data->device = device_data;
|
data->device = device_data;
|
||||||
data->cmd_buffer = cmd_buffer;
|
data->cmd_buffer = cmd_buffer;
|
||||||
data->level = level;
|
data->level = level;
|
||||||
|
data->pipeline_query_pool = pipeline_query_pool;
|
||||||
|
data->timestamp_query_pool = timestamp_query_pool;
|
||||||
|
data->query_index = query_index;
|
||||||
|
list_inithead(&data->link);
|
||||||
map_object((void *) data->cmd_buffer, data);
|
map_object((void *) data->cmd_buffer, data);
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
@ -385,10 +474,10 @@ static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_b
|
||||||
static void destroy_command_buffer_data(struct command_buffer_data *data)
|
static void destroy_command_buffer_data(struct command_buffer_data *data)
|
||||||
{
|
{
|
||||||
unmap_object((void *) data->cmd_buffer);
|
unmap_object((void *) data->cmd_buffer);
|
||||||
|
list_delinit(&data->link);
|
||||||
ralloc_free(data);
|
ralloc_free(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**/
|
/**/
|
||||||
static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
|
static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
|
||||||
struct device_data *device_data)
|
struct device_data *device_data)
|
||||||
|
|
@ -461,7 +550,7 @@ static float get_time_stat(void *_data, int _idx)
|
||||||
_idx + data->n_frames;
|
_idx + data->n_frames;
|
||||||
idx %= ARRAY_SIZE(data->frames_stats);
|
idx %= ARRAY_SIZE(data->frames_stats);
|
||||||
/* Time stats are in us. */
|
/* Time stats are in us. */
|
||||||
return data->frames_stats[idx].stats[data->stat_selector] / 1000.0f;
|
return data->frames_stats[idx].stats[data->stat_selector] / data->time_dividor;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float get_stat(void *_data, int _idx)
|
static float get_stat(void *_data, int _idx)
|
||||||
|
|
@ -548,11 +637,15 @@ static void compute_swapchain_display(struct swapchain_data *data)
|
||||||
char hash[40];
|
char hash[40];
|
||||||
snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]);
|
snprintf(hash, sizeof(hash), "##%s", overlay_param_names[s]);
|
||||||
data->stat_selector = (enum overlay_param_enabled) s;
|
data->stat_selector = (enum overlay_param_enabled) s;
|
||||||
|
data->time_dividor = 1000.0f;
|
||||||
|
if (s == OVERLAY_PARAM_ENABLED_gpu_timing)
|
||||||
|
data->time_dividor = 1000000.0f;
|
||||||
|
|
||||||
if (s == OVERLAY_PARAM_ENABLED_frame_timing ||
|
if (s == OVERLAY_PARAM_ENABLED_frame_timing ||
|
||||||
s == OVERLAY_PARAM_ENABLED_acquire_timing) {
|
s == OVERLAY_PARAM_ENABLED_acquire_timing ||
|
||||||
double min_time = data->stats_min.stats[s] / 1000.0f;
|
s == OVERLAY_PARAM_ENABLED_gpu_timing) {
|
||||||
double max_time = data->stats_max.stats[s] / 1000.0f;
|
double min_time = data->stats_min.stats[s] / data->time_dividor;
|
||||||
|
double max_time = data->stats_max.stats[s] / data->time_dividor;
|
||||||
ImGui::PlotHistogram(hash, get_time_stat, data,
|
ImGui::PlotHistogram(hash, get_time_stat, data,
|
||||||
ARRAY_SIZE(data->frames_stats), 0,
|
ARRAY_SIZE(data->frames_stats), 0,
|
||||||
NULL, min_time, max_time,
|
NULL, min_time, max_time,
|
||||||
|
|
@ -730,7 +823,10 @@ static void CreateOrResizeBuffer(struct device_data *data,
|
||||||
*buffer_size = new_size;
|
*buffer_size = new_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void render_swapchain_display(struct swapchain_data *data, unsigned image_index)
|
static void render_swapchain_display(struct swapchain_data *data,
|
||||||
|
const VkSemaphore *wait_semaphores,
|
||||||
|
unsigned n_wait_semaphores,
|
||||||
|
unsigned image_index)
|
||||||
{
|
{
|
||||||
ImDrawData* draw_data = ImGui::GetDrawData();
|
ImDrawData* draw_data = ImGui::GetDrawData();
|
||||||
if (draw_data->TotalVtxCount == 0)
|
if (draw_data->TotalVtxCount == 0)
|
||||||
|
|
@ -922,12 +1018,12 @@ static void render_swapchain_display(struct swapchain_data *data, unsigned image
|
||||||
submit_info.commandBufferCount = 1;
|
submit_info.commandBufferCount = 1;
|
||||||
submit_info.pCommandBuffers = &command_buffer;
|
submit_info.pCommandBuffers = &command_buffer;
|
||||||
submit_info.pWaitDstStageMask = &stage_wait;
|
submit_info.pWaitDstStageMask = &stage_wait;
|
||||||
|
submit_info.waitSemaphoreCount = n_wait_semaphores;
|
||||||
|
submit_info.pWaitSemaphores = wait_semaphores;
|
||||||
submit_info.signalSemaphoreCount = 1;
|
submit_info.signalSemaphoreCount = 1;
|
||||||
submit_info.pSignalSemaphores = &data->submission_semaphore;
|
submit_info.pSignalSemaphores = &data->submission_semaphore;
|
||||||
|
|
||||||
device_data->vtable.WaitForFences(device_data->device, 1, &data->fence, VK_TRUE, UINT64_MAX);
|
device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
|
||||||
device_data->vtable.ResetFences(device_data->device, 1, &data->fence);
|
|
||||||
device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, data->fence);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const uint32_t overlay_vert_spv[] = {
|
static const uint32_t overlay_vert_spv[] = {
|
||||||
|
|
@ -1317,13 +1413,6 @@ static void setup_swapchain_data(struct swapchain_data *data,
|
||||||
|
|
||||||
data->frame_data[i].command_buffer = cmd_bufs[i];
|
data->frame_data[i].command_buffer = cmd_bufs[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Submission fence */
|
|
||||||
VkFenceCreateInfo fence_info = {};
|
|
||||||
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
|
||||||
fence_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
|
|
||||||
VK_CHECK(device_data->vtable.CreateFence(device_data->device, &fence_info,
|
|
||||||
NULL, &data->fence));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void shutdown_swapchain_data(struct swapchain_data *data)
|
static void shutdown_swapchain_data(struct swapchain_data *data)
|
||||||
|
|
@ -1352,7 +1441,6 @@ static void shutdown_swapchain_data(struct swapchain_data *data)
|
||||||
}
|
}
|
||||||
device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL);
|
device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL);
|
||||||
|
|
||||||
device_data->vtable.DestroyFence(device_data->device, data->fence, NULL);
|
|
||||||
if (data->submission_semaphore)
|
if (data->submission_semaphore)
|
||||||
device_data->vtable.DestroySemaphore(device_data->device, data->submission_semaphore, NULL);
|
device_data->vtable.DestroySemaphore(device_data->device, data->submission_semaphore, NULL);
|
||||||
|
|
||||||
|
|
@ -1376,13 +1464,15 @@ static void shutdown_swapchain_data(struct swapchain_data *data)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void before_present(struct swapchain_data *swapchain_data,
|
static void before_present(struct swapchain_data *swapchain_data,
|
||||||
|
const VkSemaphore *wait_semaphores,
|
||||||
|
unsigned n_wait_semaphores,
|
||||||
unsigned imageIndex)
|
unsigned imageIndex)
|
||||||
{
|
{
|
||||||
snapshot_swapchain_frame(swapchain_data);
|
snapshot_swapchain_frame(swapchain_data);
|
||||||
|
|
||||||
if (swapchain_data->n_frames > 0) {
|
if (swapchain_data->n_frames > 0) {
|
||||||
compute_swapchain_display(swapchain_data);
|
compute_swapchain_display(swapchain_data);
|
||||||
render_swapchain_display(swapchain_data, imageIndex);
|
render_swapchain_display(swapchain_data, wait_semaphores, n_wait_semaphores, imageIndex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1419,36 +1509,91 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_QueuePresentKHR(
|
||||||
{
|
{
|
||||||
struct queue_data *queue_data = FIND_QUEUE_DATA(queue);
|
struct queue_data *queue_data = FIND_QUEUE_DATA(queue);
|
||||||
struct device_data *device_data = queue_data->device;
|
struct device_data *device_data = queue_data->device;
|
||||||
|
uint32_t query_results[OVERLAY_QUERY_COUNT];
|
||||||
|
|
||||||
/* If we present on the graphic queue this layer is using to draw an
|
if (list_length(&queue_data->running_command_buffer) > 0) {
|
||||||
* overlay, we don't need more than submitting the overlay draw prior to
|
/* Before getting the query results, make sure the operations have
|
||||||
* present.
|
* completed.
|
||||||
*/
|
*/
|
||||||
if (queue_data == device_data->graphic_queue) {
|
VkResult err = device_data->vtable.ResetFences(device_data->device,
|
||||||
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
|
1, &queue_data->queries_fence);
|
||||||
struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]);
|
check_vk_result(err);
|
||||||
before_present(swapchain_data, pPresentInfo->pImageIndices[i]);
|
err = device_data->vtable.QueueSubmit(queue, 0, NULL, queue_data->queries_fence);
|
||||||
|
check_vk_result(err);
|
||||||
|
err = device_data->vtable.WaitForFences(device_data->device,
|
||||||
|
1, &queue_data->queries_fence,
|
||||||
|
VK_FALSE, UINT64_MAX);
|
||||||
|
check_vk_result(err);
|
||||||
|
|
||||||
|
/* Now get the results. */
|
||||||
|
list_for_each_entry_safe(struct command_buffer_data, cmd_buffer_data,
|
||||||
|
&queue_data->running_command_buffer, link) {
|
||||||
|
list_delinit(&cmd_buffer_data->link);
|
||||||
|
|
||||||
|
if (cmd_buffer_data->pipeline_query_pool) {
|
||||||
|
memset(query_results, 0, sizeof(query_results));
|
||||||
|
err =
|
||||||
|
device_data->vtable.GetQueryPoolResults(device_data->device,
|
||||||
|
cmd_buffer_data->pipeline_query_pool,
|
||||||
|
cmd_buffer_data->query_index, 1,
|
||||||
|
sizeof(uint32_t) * OVERLAY_QUERY_COUNT,
|
||||||
|
query_results, 0, VK_QUERY_RESULT_WAIT_BIT);
|
||||||
|
check_vk_result(err);
|
||||||
|
|
||||||
|
for (uint32_t i = OVERLAY_PARAM_ENABLED_vertices;
|
||||||
|
i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) {
|
||||||
|
device_data->frame_stats.stats[i] += query_results[i - OVERLAY_PARAM_ENABLED_vertices];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (cmd_buffer_data->timestamp_query_pool) {
|
||||||
|
uint64_t gpu_timestamps[2] = { 0 };
|
||||||
|
err =
|
||||||
|
device_data->vtable.GetQueryPoolResults(device_data->device,
|
||||||
|
cmd_buffer_data->timestamp_query_pool,
|
||||||
|
cmd_buffer_data->query_index * 2, 2,
|
||||||
|
2 * sizeof(uint64_t), gpu_timestamps, sizeof(uint64_t),
|
||||||
|
VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_64_BIT);
|
||||||
|
check_vk_result(err);
|
||||||
|
|
||||||
|
gpu_timestamps[0] &= queue_data->timestamp_mask;
|
||||||
|
gpu_timestamps[1] &= queue_data->timestamp_mask;
|
||||||
|
device_data->frame_stats.stats[OVERLAY_PARAM_ENABLED_gpu_timing] +=
|
||||||
|
(gpu_timestamps[1] - gpu_timestamps[0]) *
|
||||||
|
device_data->properties.limits.timestampPeriod;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return queue_data->device->vtable.QueuePresentKHR(queue, pPresentInfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Otherwise we need to do cross queue synchronization to tie the overlay
|
/* Otherwise we need to add our overlay drawing semaphore to the list of
|
||||||
* draw into the present queue.
|
* semaphores to wait on. If we don't do that the presented picture might
|
||||||
|
* be have incomplete overlay drawings.
|
||||||
*/
|
*/
|
||||||
VkPresentInfoKHR present_info = *pPresentInfo;
|
VkResult result = VK_SUCCESS;
|
||||||
VkSemaphore *semaphores =
|
|
||||||
(VkSemaphore *)malloc(sizeof(VkSemaphore) * (pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount));
|
|
||||||
for (uint32_t i = 0; i < pPresentInfo->waitSemaphoreCount; i++)
|
|
||||||
semaphores[i] = pPresentInfo->pWaitSemaphores[i];
|
|
||||||
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
|
for (uint32_t i = 0; i < pPresentInfo->swapchainCount; i++) {
|
||||||
struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(pPresentInfo->pSwapchains[i]);
|
VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i];
|
||||||
before_present(swapchain_data, pPresentInfo->pImageIndices[i]);
|
struct swapchain_data *swapchain_data = FIND_SWAPCHAIN_DATA(swapchain);
|
||||||
semaphores[pPresentInfo->waitSemaphoreCount + i] = swapchain_data->submission_semaphore;
|
VkPresentInfoKHR present_info = *pPresentInfo;
|
||||||
|
present_info.swapchainCount = 1;
|
||||||
|
present_info.pSwapchains = &swapchain;
|
||||||
|
|
||||||
|
before_present(swapchain_data,
|
||||||
|
pPresentInfo->pWaitSemaphores,
|
||||||
|
pPresentInfo->waitSemaphoreCount,
|
||||||
|
pPresentInfo->pImageIndices[i]);
|
||||||
|
/* Because the submission of the overlay draw waits on the semaphores
|
||||||
|
* handed for present, we don't need to have this present operation wait
|
||||||
|
* on them as well, we can just wait on the overlay submission
|
||||||
|
* semaphore.
|
||||||
|
*/
|
||||||
|
present_info.pWaitSemaphores = &swapchain_data->submission_semaphore;
|
||||||
|
present_info.waitSemaphoreCount = 1;
|
||||||
|
|
||||||
|
VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
|
||||||
|
if (pPresentInfo->pResults)
|
||||||
|
pPresentInfo->pResults[i] = chain_result;
|
||||||
|
if (chain_result != VK_SUCCESS && result == VK_SUCCESS)
|
||||||
|
result = chain_result;
|
||||||
}
|
}
|
||||||
present_info.pWaitSemaphores = semaphores;
|
|
||||||
present_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount + pPresentInfo->swapchainCount;
|
|
||||||
VkResult result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
|
|
||||||
free(semaphores);
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1627,7 +1772,74 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_BeginCommandBuffer(
|
||||||
struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
|
struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
|
||||||
struct device_data *device_data = cmd_buffer_data->device;
|
struct device_data *device_data = cmd_buffer_data->device;
|
||||||
|
|
||||||
return device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
|
/* We don't record any query in secondary command buffers, just make sure
|
||||||
|
* we have the right inheritance.
|
||||||
|
*/
|
||||||
|
if (cmd_buffer_data->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
|
||||||
|
VkCommandBufferBeginInfo *begin_info = (VkCommandBufferBeginInfo *)
|
||||||
|
clone_chain((const struct VkBaseInStructure *)pBeginInfo);
|
||||||
|
VkCommandBufferInheritanceInfo *parent_inhe_info = (VkCommandBufferInheritanceInfo *)
|
||||||
|
vk_find_struct(begin_info, COMMAND_BUFFER_INHERITANCE_INFO);
|
||||||
|
VkCommandBufferInheritanceInfo inhe_info = {
|
||||||
|
VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO,
|
||||||
|
NULL,
|
||||||
|
VK_NULL_HANDLE,
|
||||||
|
0,
|
||||||
|
VK_NULL_HANDLE,
|
||||||
|
VK_FALSE,
|
||||||
|
0,
|
||||||
|
overlay_query_flags,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (parent_inhe_info)
|
||||||
|
parent_inhe_info->pipelineStatistics = overlay_query_flags;
|
||||||
|
else {
|
||||||
|
inhe_info.pNext = begin_info->pNext;
|
||||||
|
begin_info->pNext = &inhe_info;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
|
||||||
|
|
||||||
|
if (!parent_inhe_info)
|
||||||
|
begin_info->pNext = inhe_info.pNext;
|
||||||
|
|
||||||
|
free_chain((struct VkBaseOutStructure *)begin_info);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Primary command buffers with no queries. */
|
||||||
|
if (!cmd_buffer_data->pipeline_query_pool && cmd_buffer_data->timestamp_query_pool)
|
||||||
|
return device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
|
||||||
|
|
||||||
|
/* Otherwise record a begin query as first command. */
|
||||||
|
VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
|
||||||
|
|
||||||
|
if (result == VK_SUCCESS) {
|
||||||
|
if (cmd_buffer_data->pipeline_query_pool) {
|
||||||
|
device_data->vtable.CmdResetQueryPool(commandBuffer,
|
||||||
|
cmd_buffer_data->pipeline_query_pool,
|
||||||
|
cmd_buffer_data->query_index, 1);
|
||||||
|
}
|
||||||
|
if (cmd_buffer_data->timestamp_query_pool) {
|
||||||
|
device_data->vtable.CmdResetQueryPool(commandBuffer,
|
||||||
|
cmd_buffer_data->timestamp_query_pool,
|
||||||
|
cmd_buffer_data->query_index * 2, 2);
|
||||||
|
}
|
||||||
|
if (cmd_buffer_data->pipeline_query_pool) {
|
||||||
|
device_data->vtable.CmdBeginQuery(commandBuffer,
|
||||||
|
cmd_buffer_data->pipeline_query_pool,
|
||||||
|
cmd_buffer_data->query_index, 0);
|
||||||
|
}
|
||||||
|
if (cmd_buffer_data->timestamp_query_pool) {
|
||||||
|
device_data->vtable.CmdWriteTimestamp(commandBuffer,
|
||||||
|
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||||
|
cmd_buffer_data->timestamp_query_pool,
|
||||||
|
cmd_buffer_data->query_index * 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
VKAPI_ATTR VkResult VKAPI_CALL overlay_EndCommandBuffer(
|
VKAPI_ATTR VkResult VKAPI_CALL overlay_EndCommandBuffer(
|
||||||
|
|
@ -1636,6 +1848,12 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_EndCommandBuffer(
|
||||||
struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
|
struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(commandBuffer);
|
||||||
struct device_data *device_data = cmd_buffer_data->device;
|
struct device_data *device_data = cmd_buffer_data->device;
|
||||||
|
|
||||||
|
if (cmd_buffer_data->timestamp_query_pool) {
|
||||||
|
device_data->vtable.CmdWriteTimestamp(commandBuffer,
|
||||||
|
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||||
|
cmd_buffer_data->timestamp_query_pool,
|
||||||
|
cmd_buffer_data->query_index * 2 + 1);
|
||||||
|
}
|
||||||
if (cmd_buffer_data->pipeline_query_pool) {
|
if (cmd_buffer_data->pipeline_query_pool) {
|
||||||
device_data->vtable.CmdEndQuery(commandBuffer,
|
device_data->vtable.CmdEndQuery(commandBuffer,
|
||||||
cmd_buffer_data->pipeline_query_pool,
|
cmd_buffer_data->pipeline_query_pool,
|
||||||
|
|
@ -1686,21 +1904,82 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_AllocateCommandBuffers(
|
||||||
device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers);
|
device_data->vtable.AllocateCommandBuffers(device, pAllocateInfo, pCommandBuffers);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++)
|
|
||||||
new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level, device_data);
|
VkQueryPool pipeline_query_pool = VK_NULL_HANDLE;
|
||||||
|
VkQueryPool timestamp_query_pool = VK_NULL_HANDLE;
|
||||||
|
if (device_data->instance->pipeline_statistics_enabled &&
|
||||||
|
pAllocateInfo->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
|
||||||
|
VkQueryPoolCreateInfo pool_info = {
|
||||||
|
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
VK_QUERY_TYPE_PIPELINE_STATISTICS,
|
||||||
|
pAllocateInfo->commandBufferCount,
|
||||||
|
overlay_query_flags,
|
||||||
|
};
|
||||||
|
VkResult err =
|
||||||
|
device_data->vtable.CreateQueryPool(device_data->device, &pool_info,
|
||||||
|
NULL, &pipeline_query_pool);
|
||||||
|
check_vk_result(err);
|
||||||
|
}
|
||||||
|
if (device_data->instance->params.enabled[OVERLAY_PARAM_ENABLED_gpu_timing]) {
|
||||||
|
VkQueryPoolCreateInfo pool_info = {
|
||||||
|
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
VK_QUERY_TYPE_TIMESTAMP,
|
||||||
|
pAllocateInfo->commandBufferCount * 2,
|
||||||
|
0,
|
||||||
|
};
|
||||||
|
VkResult err =
|
||||||
|
device_data->vtable.CreateQueryPool(device_data->device, &pool_info,
|
||||||
|
NULL, ×tamp_query_pool);
|
||||||
|
check_vk_result(err);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < pAllocateInfo->commandBufferCount; i++) {
|
||||||
|
new_command_buffer_data(pCommandBuffers[i], pAllocateInfo->level,
|
||||||
|
pipeline_query_pool, timestamp_query_pool,
|
||||||
|
i, device_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pipeline_query_pool)
|
||||||
|
map_object(pipeline_query_pool, (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
|
||||||
|
if (timestamp_query_pool)
|
||||||
|
map_object(timestamp_query_pool, (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
VKAPI_ATTR void VKAPI_CALL overlay_FreeCommandBuffers(VkDevice device,
|
VKAPI_ATTR void VKAPI_CALL overlay_FreeCommandBuffers(
|
||||||
VkCommandPool commandPool,
|
VkDevice device,
|
||||||
uint32_t commandBufferCount,
|
VkCommandPool commandPool,
|
||||||
const VkCommandBuffer* pCommandBuffers)
|
uint32_t commandBufferCount,
|
||||||
|
const VkCommandBuffer* pCommandBuffers)
|
||||||
{
|
{
|
||||||
struct device_data *device_data = FIND_DEVICE_DATA(device);
|
struct device_data *device_data = FIND_DEVICE_DATA(device);
|
||||||
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
||||||
struct command_buffer_data *cmd_buffer_data = FIND_CMD_BUFFER_DATA(pCommandBuffers[i]);
|
struct command_buffer_data *cmd_buffer_data =
|
||||||
|
FIND_CMD_BUFFER_DATA(pCommandBuffers[i]);
|
||||||
|
uint64_t count = (uintptr_t)find_object_data((void *)cmd_buffer_data->pipeline_query_pool);
|
||||||
|
if (count == 1) {
|
||||||
|
unmap_object(cmd_buffer_data->pipeline_query_pool);
|
||||||
|
device_data->vtable.DestroyQueryPool(device_data->device,
|
||||||
|
cmd_buffer_data->pipeline_query_pool, NULL);
|
||||||
|
} else if (count != 0) {
|
||||||
|
map_object(cmd_buffer_data->pipeline_query_pool, (void *)(uintptr_t)(count - 1));
|
||||||
|
}
|
||||||
|
count = (uintptr_t)find_object_data((void *)cmd_buffer_data->timestamp_query_pool);
|
||||||
|
if (count == 1) {
|
||||||
|
unmap_object(cmd_buffer_data->timestamp_query_pool);
|
||||||
|
device_data->vtable.DestroyQueryPool(device_data->device,
|
||||||
|
cmd_buffer_data->timestamp_query_pool, NULL);
|
||||||
|
} else if (count != 0) {
|
||||||
|
map_object(cmd_buffer_data->timestamp_query_pool, (void *)(uintptr_t)(count - 1));
|
||||||
|
}
|
||||||
destroy_command_buffer_data(cmd_buffer_data);
|
destroy_command_buffer_data(cmd_buffer_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
device_data->vtable.FreeCommandBuffers(device, commandPool,
|
device_data->vtable.FreeCommandBuffers(device, commandPool,
|
||||||
commandBufferCount, pCommandBuffers);
|
commandBufferCount, pCommandBuffers);
|
||||||
}
|
}
|
||||||
|
|
@ -1724,6 +2003,21 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_QueueSubmit(
|
||||||
/* Merge the submitted command buffer stats into the device. */
|
/* Merge the submitted command buffer stats into the device. */
|
||||||
for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++)
|
for (uint32_t st = 0; st < OVERLAY_PARAM_ENABLED_MAX; st++)
|
||||||
device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st];
|
device_data->frame_stats.stats[st] += cmd_buffer_data->stats.stats[st];
|
||||||
|
|
||||||
|
/* Attach the command buffer to the queue so we remember to read its
|
||||||
|
* pipeline statistics & timestamps at QueuePresent().
|
||||||
|
*/
|
||||||
|
if (!cmd_buffer_data->pipeline_query_pool &&
|
||||||
|
!cmd_buffer_data->timestamp_query_pool)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (list_empty(&cmd_buffer_data->link)) {
|
||||||
|
list_addtail(&cmd_buffer_data->link,
|
||||||
|
&queue_data->running_command_buffer);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "Command buffer submitted multiple times before present.\n"
|
||||||
|
"This could lead to invalid data.\n");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1751,7 +2045,19 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateDevice(
|
||||||
// Advance the link info for the next element on the chain
|
// Advance the link info for the next element on the chain
|
||||||
chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
|
chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
|
||||||
|
|
||||||
VkResult result = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice);
|
VkPhysicalDeviceFeatures device_features = {};
|
||||||
|
VkDeviceCreateInfo device_info = *pCreateInfo;
|
||||||
|
|
||||||
|
if (pCreateInfo->pEnabledFeatures)
|
||||||
|
device_features = *(pCreateInfo->pEnabledFeatures);
|
||||||
|
if (instance_data->pipeline_statistics_enabled) {
|
||||||
|
device_features.inheritedQueries = true;
|
||||||
|
device_features.pipelineStatisticsQuery = true;
|
||||||
|
}
|
||||||
|
device_info.pEnabledFeatures = &device_features;
|
||||||
|
|
||||||
|
|
||||||
|
VkResult result = fpCreateDevice(physicalDevice, &device_info, pAllocator, pDevice);
|
||||||
if (result != VK_SUCCESS) return result;
|
if (result != VK_SUCCESS) return result;
|
||||||
|
|
||||||
struct device_data *device_data = new_device_data(*pDevice, instance_data);
|
struct device_data *device_data = new_device_data(*pDevice, instance_data);
|
||||||
|
|
@ -1811,6 +2117,14 @@ VKAPI_ATTR VkResult VKAPI_CALL overlay_CreateInstance(
|
||||||
|
|
||||||
parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG"));
|
parse_overlay_env(&instance_data->params, getenv("VK_LAYER_MESA_OVERLAY_CONFIG"));
|
||||||
|
|
||||||
|
for (int i = OVERLAY_PARAM_ENABLED_vertices;
|
||||||
|
i <= OVERLAY_PARAM_ENABLED_compute_invocations; i++) {
|
||||||
|
if (instance_data->params.enabled[i]) {
|
||||||
|
instance_data->pipeline_statistics_enabled = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1854,10 +2168,12 @@ static const struct {
|
||||||
ADD_HOOK(AcquireNextImage2KHR),
|
ADD_HOOK(AcquireNextImage2KHR),
|
||||||
|
|
||||||
ADD_HOOK(QueueSubmit),
|
ADD_HOOK(QueueSubmit),
|
||||||
ADD_HOOK(CreateInstance),
|
|
||||||
ADD_HOOK(DestroyInstance),
|
|
||||||
ADD_HOOK(CreateDevice),
|
ADD_HOOK(CreateDevice),
|
||||||
ADD_HOOK(DestroyDevice),
|
ADD_HOOK(DestroyDevice),
|
||||||
|
|
||||||
|
ADD_HOOK(CreateInstance),
|
||||||
|
ADD_HOOK(DestroyInstance),
|
||||||
#undef ADD_HOOK
|
#undef ADD_HOOK
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,18 @@ extern "C" {
|
||||||
OVERLAY_PARAM_BOOL(pipeline_raytracing) \
|
OVERLAY_PARAM_BOOL(pipeline_raytracing) \
|
||||||
OVERLAY_PARAM_BOOL(acquire) \
|
OVERLAY_PARAM_BOOL(acquire) \
|
||||||
OVERLAY_PARAM_BOOL(acquire_timing) \
|
OVERLAY_PARAM_BOOL(acquire_timing) \
|
||||||
|
OVERLAY_PARAM_BOOL(vertices) \
|
||||||
|
OVERLAY_PARAM_BOOL(primitives) \
|
||||||
|
OVERLAY_PARAM_BOOL(vert_invocations) \
|
||||||
|
OVERLAY_PARAM_BOOL(geom_invocations) \
|
||||||
|
OVERLAY_PARAM_BOOL(geom_primitives) \
|
||||||
|
OVERLAY_PARAM_BOOL(clip_invocations) \
|
||||||
|
OVERLAY_PARAM_BOOL(clip_primitives) \
|
||||||
|
OVERLAY_PARAM_BOOL(frag_invocations) \
|
||||||
|
OVERLAY_PARAM_BOOL(tess_ctrl_patches) \
|
||||||
|
OVERLAY_PARAM_BOOL(tess_eval_invocations) \
|
||||||
|
OVERLAY_PARAM_BOOL(compute_invocations) \
|
||||||
|
OVERLAY_PARAM_BOOL(gpu_timing) \
|
||||||
OVERLAY_PARAM_CUSTOM(fps_sampling_period) \
|
OVERLAY_PARAM_CUSTOM(fps_sampling_period) \
|
||||||
OVERLAY_PARAM_CUSTOM(output_file) \
|
OVERLAY_PARAM_CUSTOM(output_file) \
|
||||||
OVERLAY_PARAM_CUSTOM(position) \
|
OVERLAY_PARAM_CUSTOM(position) \
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue