venus: use feedback for vkGetQueryPoolResults

Create a feedback buffer for each query pool and retrieve the query
results from the buffer instead of a roundtrip call in
vkGetQueryPoolResults.

VK_QUERY_RESULT_WAIT_BIT queries will poll until the queries are
available in the feedback buffer.

Query results in the feedback buffer are always VK_QUERY_RESULT_64_BIT
and if needed converted to what the app requests at
vkGetQueryPoolResults time.

Signed-off-by: Juston Li <justonli@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23348>
This commit is contained in:
Juston Li 2023-05-31 16:22:06 -07:00 committed by Marge Bot
parent 38b1c39b59
commit e6cffa1f0e
5 changed files with 153 additions and 6 deletions

View file

@ -1804,6 +1804,8 @@ vn_CmdEndQuery(VkCommandBuffer commandBuffer,
uint32_t query)
{
VN_CMD_ENQUEUE(vkCmdEndQuery, commandBuffer, queryPool, query);
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
}
void
@ -1814,6 +1816,9 @@ vn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
{
VN_CMD_ENQUEUE(vkCmdResetQueryPool, commandBuffer, queryPool, firstQuery,
queryCount);
vn_feedback_query_reset_cmd_record(commandBuffer, queryPool, firstQuery,
queryCount);
}
void
@ -1824,6 +1829,8 @@ vn_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
{
VN_CMD_ENQUEUE(vkCmdWriteTimestamp, commandBuffer, pipelineStage,
queryPool, query);
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
}
void
@ -1834,6 +1841,8 @@ vn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
{
VN_CMD_ENQUEUE(vkCmdWriteTimestamp2, commandBuffer, stage, queryPool,
query);
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
}
void
@ -2012,6 +2021,8 @@ vn_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
{
VN_CMD_ENQUEUE(vkCmdEndQueryIndexedEXT, commandBuffer, queryPool, query,
index);
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
}
void

View file

@ -46,6 +46,7 @@ static const struct debug_control vn_perf_options[] = {
{ "no_memory_suballoc", VN_PERF_NO_MEMORY_SUBALLOC },
{ "no_cmd_batching", VN_PERF_NO_CMD_BATCHING },
{ "no_timeline_sem_feedback", VN_PERF_NO_TIMELINE_SEM_FEEDBACK },
{ "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK },
{ NULL, 0 },
/* clang-format on */
};

View file

@ -115,6 +115,7 @@ enum vn_perf {
VN_PERF_NO_MEMORY_SUBALLOC = 1ull << 5,
VN_PERF_NO_CMD_BATCHING = 1ull << 6,
VN_PERF_NO_TIMELINE_SEM_FEEDBACK = 1ull << 7,
VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8,
};
typedef uint64_t vn_object_id;

View file

@ -13,6 +13,7 @@
#include "venus-protocol/vn_protocol_driver_query_pool.h"
#include "vn_device.h"
#include "vn_feedback.h"
/* query pool commands */
@ -85,6 +86,15 @@ vn_CreateQueryPool(VkDevice device,
break;
}
if (!VN_PERF(NO_QUERY_FEEDBACK)) {
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
vn_feedback_buffer_create(dev, slot_size * pCreateInfo->queryCount,
alloc, &pool->feedback);
}
VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
vn_async_vkCreateQueryPool(dev->instance, device, pCreateInfo, NULL,
&pool_handle);
@ -109,6 +119,9 @@ vn_DestroyQueryPool(VkDevice device,
alloc = pAllocator ? pAllocator : &pool->allocator;
if (pool->feedback)
vn_feedback_buffer_destroy(dev, pool->feedback, alloc);
vn_async_vkDestroyQueryPool(dev->instance, device, queryPool, NULL);
vn_object_base_fini(&pool->base);
@ -123,9 +136,118 @@ vn_ResetQueryPool(VkDevice device,
{
VN_TRACE_FUNC();
struct vn_device *dev = vn_device_from_handle(device);
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
vn_async_vkResetQueryPool(dev->instance, device, queryPool, firstQuery,
queryCount);
if (pool->feedback) {
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
const uint32_t offset = slot_size * firstQuery;
memset(pool->feedback->data + offset, 0, slot_size * queryCount);
}
}
static VkResult
vn_get_query_pool_feedback(struct vn_query_pool *pool,
uint32_t firstQuery,
uint32_t queryCount,
void *pData,
VkDeviceSize stride,
VkQueryResultFlags flags)
{
VkResult result = VK_SUCCESS;
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t slot_array_size = pool->result_array_size + 1;
uint64_t *src = pool->feedback->data;
src += slot_array_size * firstQuery;
uint32_t dst_index = 0;
uint32_t src_index = 0;
if (flags & VK_QUERY_RESULT_64_BIT) {
uint64_t *dst = pData;
uint32_t index_stride = stride / sizeof(uint64_t);
for (uint32_t i = 0; i < queryCount; i++) {
/* Copy the result if its available */
const uint64_t avail = src[src_index + pool->result_array_size];
if (avail) {
memcpy(&dst[dst_index], &src[src_index],
pool->result_array_size * sizeof(uint64_t));
} else {
result = VK_NOT_READY;
/* valid to return result of 0 if partial bit is set */
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
memset(&dst[dst_index], 0,
pool->result_array_size * sizeof(uint64_t));
}
}
/* Set the availability bit if requested */
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
dst[dst_index + pool->result_array_size] = avail;
dst_index += index_stride;
src_index += slot_array_size;
}
} else {
uint32_t *dst = pData;
uint32_t index_stride = stride / sizeof(uint32_t);
for (uint32_t i = 0; i < queryCount; i++) {
/* Copy the result if its available, converting down to uint32_t */
const uint32_t avail =
(uint32_t)src[src_index + pool->result_array_size];
if (avail) {
for (uint32_t j = 0; j < pool->result_array_size; j++)
dst[dst_index + j] = (uint32_t)src[src_index + j];
} else {
result = VK_NOT_READY;
/* valid to return result of 0 if partial bit is set */
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
for (uint32_t j = 0; j < pool->result_array_size; j++)
dst[dst_index + j] = 0;
}
}
/* Set the availability bit if requested */
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
dst[dst_index + pool->result_array_size] = avail;
dst_index += index_stride;
src_index += slot_array_size;
}
}
return result;
}
static VkResult
vn_query_feedback_wait_ready(struct vn_query_pool *pool,
uint32_t firstQuery,
uint32_t queryCount)
{
/* Timeout after 5 seconds */
uint64_t timeout = 5000ull * 1000 * 1000;
uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout);
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t slot_array_size = pool->result_array_size + 1;
volatile uint64_t *src = pool->feedback->data;
src += (slot_array_size * firstQuery) + pool->result_array_size;
uint32_t src_index = 0;
for (uint32_t i = 0; i < queryCount; i++) {
while (!src[src_index]) {
if (os_time_get_nano() > abs_timeout_ns)
return VK_ERROR_DEVICE_LOST;
thrd_yield();
}
src_index += slot_array_size;
}
return VK_SUCCESS;
}
VkResult
@ -142,12 +264,28 @@ vn_GetQueryPoolResults(VkDevice device,
struct vn_device *dev = vn_device_from_handle(device);
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
const VkAllocationCallbacks *alloc = &pool->allocator;
VkResult result;
const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
const size_t result_size = pool->result_array_size * result_width;
const bool result_always_written =
flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
/* Get results from feedback buffers
* Not possible for VK_QUERY_RESULT_PARTIAL_BIT
*/
if (pool->feedback) {
/* If wait bit is set, wait poll until query is ready */
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
result = vn_query_feedback_wait_ready(pool, firstQuery, queryCount);
if (result != VK_SUCCESS)
return vn_result(dev->instance, result);
}
result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
stride, flags);
return vn_result(dev->instance, result);
}
VkQueryResultFlags packed_flags = flags;
size_t packed_stride = result_size;
if (!result_always_written)
@ -165,12 +303,7 @@ vn_GetQueryPoolResults(VkDevice device,
if (!packed_data)
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
}
/* TODO the renderer should transparently vkCmdCopyQueryPoolResults to a
* coherent memory such that we can memcpy from the coherent memory to
* avoid this serialized round trip.
*/
VkResult result = vn_call_vkGetQueryPoolResults(
result = vn_call_vkGetQueryPoolResults(
dev->instance, device, queryPool, firstQuery, queryCount, packed_size,
packed_data, packed_stride, packed_flags);

View file

@ -20,6 +20,7 @@ struct vn_query_pool {
VkAllocationCallbacks allocator;
/* non-NULL if VN_PERF_NO_QUERY_FEEDBACK is disabled */
struct vn_feedback_buffer *feedback;
uint32_t result_array_size;
};