mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-16 15:10:31 +01:00
venus: use feedback for vkGetQueryPoolResults
Create a feedback buffer for each query pool and retrieve the query results from the buffer instead of a roundtrip call in vkGetQueryPoolResults. VK_QUERY_RESULT_WAIT_BIT queries will poll until the queries are available in the feedback buffer. Query results in the feedback buffer are always VK_QUERY_RESULT_64_BIT and if needed converted to what the app requests at vkGetQueryPoolResults time. Signed-off-by: Juston Li <justonli@google.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23348>
This commit is contained in:
parent
38b1c39b59
commit
e6cffa1f0e
5 changed files with 153 additions and 6 deletions
|
|
@ -1804,6 +1804,8 @@ vn_CmdEndQuery(VkCommandBuffer commandBuffer,
|
||||||
uint32_t query)
|
uint32_t query)
|
||||||
{
|
{
|
||||||
VN_CMD_ENQUEUE(vkCmdEndQuery, commandBuffer, queryPool, query);
|
VN_CMD_ENQUEUE(vkCmdEndQuery, commandBuffer, queryPool, query);
|
||||||
|
|
||||||
|
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -1814,6 +1816,9 @@ vn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
||||||
{
|
{
|
||||||
VN_CMD_ENQUEUE(vkCmdResetQueryPool, commandBuffer, queryPool, firstQuery,
|
VN_CMD_ENQUEUE(vkCmdResetQueryPool, commandBuffer, queryPool, firstQuery,
|
||||||
queryCount);
|
queryCount);
|
||||||
|
|
||||||
|
vn_feedback_query_reset_cmd_record(commandBuffer, queryPool, firstQuery,
|
||||||
|
queryCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -1824,6 +1829,8 @@ vn_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
|
||||||
{
|
{
|
||||||
VN_CMD_ENQUEUE(vkCmdWriteTimestamp, commandBuffer, pipelineStage,
|
VN_CMD_ENQUEUE(vkCmdWriteTimestamp, commandBuffer, pipelineStage,
|
||||||
queryPool, query);
|
queryPool, query);
|
||||||
|
|
||||||
|
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -1834,6 +1841,8 @@ vn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
||||||
{
|
{
|
||||||
VN_CMD_ENQUEUE(vkCmdWriteTimestamp2, commandBuffer, stage, queryPool,
|
VN_CMD_ENQUEUE(vkCmdWriteTimestamp2, commandBuffer, stage, queryPool,
|
||||||
query);
|
query);
|
||||||
|
|
||||||
|
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -2012,6 +2021,8 @@ vn_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
|
||||||
{
|
{
|
||||||
VN_CMD_ENQUEUE(vkCmdEndQueryIndexedEXT, commandBuffer, queryPool, query,
|
VN_CMD_ENQUEUE(vkCmdEndQueryIndexedEXT, commandBuffer, queryPool, query,
|
||||||
index);
|
index);
|
||||||
|
|
||||||
|
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,7 @@ static const struct debug_control vn_perf_options[] = {
|
||||||
{ "no_memory_suballoc", VN_PERF_NO_MEMORY_SUBALLOC },
|
{ "no_memory_suballoc", VN_PERF_NO_MEMORY_SUBALLOC },
|
||||||
{ "no_cmd_batching", VN_PERF_NO_CMD_BATCHING },
|
{ "no_cmd_batching", VN_PERF_NO_CMD_BATCHING },
|
||||||
{ "no_timeline_sem_feedback", VN_PERF_NO_TIMELINE_SEM_FEEDBACK },
|
{ "no_timeline_sem_feedback", VN_PERF_NO_TIMELINE_SEM_FEEDBACK },
|
||||||
|
{ "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK },
|
||||||
{ NULL, 0 },
|
{ NULL, 0 },
|
||||||
/* clang-format on */
|
/* clang-format on */
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -115,6 +115,7 @@ enum vn_perf {
|
||||||
VN_PERF_NO_MEMORY_SUBALLOC = 1ull << 5,
|
VN_PERF_NO_MEMORY_SUBALLOC = 1ull << 5,
|
||||||
VN_PERF_NO_CMD_BATCHING = 1ull << 6,
|
VN_PERF_NO_CMD_BATCHING = 1ull << 6,
|
||||||
VN_PERF_NO_TIMELINE_SEM_FEEDBACK = 1ull << 7,
|
VN_PERF_NO_TIMELINE_SEM_FEEDBACK = 1ull << 7,
|
||||||
|
VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef uint64_t vn_object_id;
|
typedef uint64_t vn_object_id;
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@
|
||||||
#include "venus-protocol/vn_protocol_driver_query_pool.h"
|
#include "venus-protocol/vn_protocol_driver_query_pool.h"
|
||||||
|
|
||||||
#include "vn_device.h"
|
#include "vn_device.h"
|
||||||
|
#include "vn_feedback.h"
|
||||||
|
|
||||||
/* query pool commands */
|
/* query pool commands */
|
||||||
|
|
||||||
|
|
@ -85,6 +86,15 @@ vn_CreateQueryPool(VkDevice device,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!VN_PERF(NO_QUERY_FEEDBACK)) {
|
||||||
|
/* Feedback results are always 64 bit and include availability bit
|
||||||
|
* (also 64 bit)
|
||||||
|
*/
|
||||||
|
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
|
||||||
|
vn_feedback_buffer_create(dev, slot_size * pCreateInfo->queryCount,
|
||||||
|
alloc, &pool->feedback);
|
||||||
|
}
|
||||||
|
|
||||||
VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
|
VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
|
||||||
vn_async_vkCreateQueryPool(dev->instance, device, pCreateInfo, NULL,
|
vn_async_vkCreateQueryPool(dev->instance, device, pCreateInfo, NULL,
|
||||||
&pool_handle);
|
&pool_handle);
|
||||||
|
|
@ -109,6 +119,9 @@ vn_DestroyQueryPool(VkDevice device,
|
||||||
|
|
||||||
alloc = pAllocator ? pAllocator : &pool->allocator;
|
alloc = pAllocator ? pAllocator : &pool->allocator;
|
||||||
|
|
||||||
|
if (pool->feedback)
|
||||||
|
vn_feedback_buffer_destroy(dev, pool->feedback, alloc);
|
||||||
|
|
||||||
vn_async_vkDestroyQueryPool(dev->instance, device, queryPool, NULL);
|
vn_async_vkDestroyQueryPool(dev->instance, device, queryPool, NULL);
|
||||||
|
|
||||||
vn_object_base_fini(&pool->base);
|
vn_object_base_fini(&pool->base);
|
||||||
|
|
@ -123,9 +136,118 @@ vn_ResetQueryPool(VkDevice device,
|
||||||
{
|
{
|
||||||
VN_TRACE_FUNC();
|
VN_TRACE_FUNC();
|
||||||
struct vn_device *dev = vn_device_from_handle(device);
|
struct vn_device *dev = vn_device_from_handle(device);
|
||||||
|
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
||||||
|
|
||||||
vn_async_vkResetQueryPool(dev->instance, device, queryPool, firstQuery,
|
vn_async_vkResetQueryPool(dev->instance, device, queryPool, firstQuery,
|
||||||
queryCount);
|
queryCount);
|
||||||
|
if (pool->feedback) {
|
||||||
|
/* Feedback results are always 64 bit and include availability bit
|
||||||
|
* (also 64 bit)
|
||||||
|
*/
|
||||||
|
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
|
||||||
|
const uint32_t offset = slot_size * firstQuery;
|
||||||
|
memset(pool->feedback->data + offset, 0, slot_size * queryCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
vn_get_query_pool_feedback(struct vn_query_pool *pool,
|
||||||
|
uint32_t firstQuery,
|
||||||
|
uint32_t queryCount,
|
||||||
|
void *pData,
|
||||||
|
VkDeviceSize stride,
|
||||||
|
VkQueryResultFlags flags)
|
||||||
|
{
|
||||||
|
VkResult result = VK_SUCCESS;
|
||||||
|
/* Feedback results are always 64 bit and include availability bit
|
||||||
|
* (also 64 bit)
|
||||||
|
*/
|
||||||
|
const uint32_t slot_array_size = pool->result_array_size + 1;
|
||||||
|
uint64_t *src = pool->feedback->data;
|
||||||
|
src += slot_array_size * firstQuery;
|
||||||
|
|
||||||
|
uint32_t dst_index = 0;
|
||||||
|
uint32_t src_index = 0;
|
||||||
|
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||||||
|
uint64_t *dst = pData;
|
||||||
|
uint32_t index_stride = stride / sizeof(uint64_t);
|
||||||
|
for (uint32_t i = 0; i < queryCount; i++) {
|
||||||
|
/* Copy the result if its available */
|
||||||
|
const uint64_t avail = src[src_index + pool->result_array_size];
|
||||||
|
if (avail) {
|
||||||
|
memcpy(&dst[dst_index], &src[src_index],
|
||||||
|
pool->result_array_size * sizeof(uint64_t));
|
||||||
|
} else {
|
||||||
|
result = VK_NOT_READY;
|
||||||
|
/* valid to return result of 0 if partial bit is set */
|
||||||
|
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
|
||||||
|
memset(&dst[dst_index], 0,
|
||||||
|
pool->result_array_size * sizeof(uint64_t));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Set the availability bit if requested */
|
||||||
|
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||||
|
dst[dst_index + pool->result_array_size] = avail;
|
||||||
|
|
||||||
|
dst_index += index_stride;
|
||||||
|
src_index += slot_array_size;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
uint32_t *dst = pData;
|
||||||
|
uint32_t index_stride = stride / sizeof(uint32_t);
|
||||||
|
for (uint32_t i = 0; i < queryCount; i++) {
|
||||||
|
/* Copy the result if its available, converting down to uint32_t */
|
||||||
|
const uint32_t avail =
|
||||||
|
(uint32_t)src[src_index + pool->result_array_size];
|
||||||
|
if (avail) {
|
||||||
|
for (uint32_t j = 0; j < pool->result_array_size; j++)
|
||||||
|
dst[dst_index + j] = (uint32_t)src[src_index + j];
|
||||||
|
} else {
|
||||||
|
result = VK_NOT_READY;
|
||||||
|
/* valid to return result of 0 if partial bit is set */
|
||||||
|
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
|
||||||
|
for (uint32_t j = 0; j < pool->result_array_size; j++)
|
||||||
|
dst[dst_index + j] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Set the availability bit if requested */
|
||||||
|
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||||
|
dst[dst_index + pool->result_array_size] = avail;
|
||||||
|
|
||||||
|
dst_index += index_stride;
|
||||||
|
src_index += slot_array_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static VkResult
|
||||||
|
vn_query_feedback_wait_ready(struct vn_query_pool *pool,
|
||||||
|
uint32_t firstQuery,
|
||||||
|
uint32_t queryCount)
|
||||||
|
{
|
||||||
|
/* Timeout after 5 seconds */
|
||||||
|
uint64_t timeout = 5000ull * 1000 * 1000;
|
||||||
|
uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout);
|
||||||
|
|
||||||
|
/* Feedback results are always 64 bit and include availability bit
|
||||||
|
* (also 64 bit)
|
||||||
|
*/
|
||||||
|
const uint32_t slot_array_size = pool->result_array_size + 1;
|
||||||
|
volatile uint64_t *src = pool->feedback->data;
|
||||||
|
src += (slot_array_size * firstQuery) + pool->result_array_size;
|
||||||
|
|
||||||
|
uint32_t src_index = 0;
|
||||||
|
for (uint32_t i = 0; i < queryCount; i++) {
|
||||||
|
while (!src[src_index]) {
|
||||||
|
if (os_time_get_nano() > abs_timeout_ns)
|
||||||
|
return VK_ERROR_DEVICE_LOST;
|
||||||
|
|
||||||
|
thrd_yield();
|
||||||
|
}
|
||||||
|
src_index += slot_array_size;
|
||||||
|
}
|
||||||
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
|
|
@ -142,12 +264,28 @@ vn_GetQueryPoolResults(VkDevice device,
|
||||||
struct vn_device *dev = vn_device_from_handle(device);
|
struct vn_device *dev = vn_device_from_handle(device);
|
||||||
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
||||||
const VkAllocationCallbacks *alloc = &pool->allocator;
|
const VkAllocationCallbacks *alloc = &pool->allocator;
|
||||||
|
VkResult result;
|
||||||
|
|
||||||
const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
|
const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
|
||||||
const size_t result_size = pool->result_array_size * result_width;
|
const size_t result_size = pool->result_array_size * result_width;
|
||||||
const bool result_always_written =
|
const bool result_always_written =
|
||||||
flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
|
flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
|
||||||
|
|
||||||
|
/* Get results from feedback buffers
|
||||||
|
* Not possible for VK_QUERY_RESULT_PARTIAL_BIT
|
||||||
|
*/
|
||||||
|
if (pool->feedback) {
|
||||||
|
/* If wait bit is set, wait poll until query is ready */
|
||||||
|
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||||
|
result = vn_query_feedback_wait_ready(pool, firstQuery, queryCount);
|
||||||
|
if (result != VK_SUCCESS)
|
||||||
|
return vn_result(dev->instance, result);
|
||||||
|
}
|
||||||
|
result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
|
||||||
|
stride, flags);
|
||||||
|
return vn_result(dev->instance, result);
|
||||||
|
}
|
||||||
|
|
||||||
VkQueryResultFlags packed_flags = flags;
|
VkQueryResultFlags packed_flags = flags;
|
||||||
size_t packed_stride = result_size;
|
size_t packed_stride = result_size;
|
||||||
if (!result_always_written)
|
if (!result_always_written)
|
||||||
|
|
@ -165,12 +303,7 @@ vn_GetQueryPoolResults(VkDevice device,
|
||||||
if (!packed_data)
|
if (!packed_data)
|
||||||
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
}
|
}
|
||||||
|
result = vn_call_vkGetQueryPoolResults(
|
||||||
/* TODO the renderer should transparently vkCmdCopyQueryPoolResults to a
|
|
||||||
* coherent memory such that we can memcpy from the coherent memory to
|
|
||||||
* avoid this serialized round trip.
|
|
||||||
*/
|
|
||||||
VkResult result = vn_call_vkGetQueryPoolResults(
|
|
||||||
dev->instance, device, queryPool, firstQuery, queryCount, packed_size,
|
dev->instance, device, queryPool, firstQuery, queryCount, packed_size,
|
||||||
packed_data, packed_stride, packed_flags);
|
packed_data, packed_stride, packed_flags);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ struct vn_query_pool {
|
||||||
|
|
||||||
VkAllocationCallbacks allocator;
|
VkAllocationCallbacks allocator;
|
||||||
|
|
||||||
|
/* non-NULL if VN_PERF_NO_QUERY_FEEDBACK is disabled */
|
||||||
struct vn_feedback_buffer *feedback;
|
struct vn_feedback_buffer *feedback;
|
||||||
uint32_t result_array_size;
|
uint32_t result_array_size;
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue