mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 06:30:10 +01:00
venus: use feedback for vkGetQueryPoolResults
Create a feedback buffer for each query pool and retrieve the query results from the buffer instead of a roundtrip call in vkGetQueryPoolResults. VK_QUERY_RESULT_WAIT_BIT queries will poll until the queries are available in the feedback buffer. Query results in the feedback buffer are always VK_QUERY_RESULT_64_BIT and if needed converted to what the app requests at vkGetQueryPoolResults time. Signed-off-by: Juston Li <justonli@google.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23348>
This commit is contained in:
parent
38b1c39b59
commit
e6cffa1f0e
5 changed files with 153 additions and 6 deletions
|
|
@ -1804,6 +1804,8 @@ vn_CmdEndQuery(VkCommandBuffer commandBuffer,
|
|||
uint32_t query)
|
||||
{
|
||||
VN_CMD_ENQUEUE(vkCmdEndQuery, commandBuffer, queryPool, query);
|
||||
|
||||
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1814,6 +1816,9 @@ vn_CmdResetQueryPool(VkCommandBuffer commandBuffer,
|
|||
{
|
||||
VN_CMD_ENQUEUE(vkCmdResetQueryPool, commandBuffer, queryPool, firstQuery,
|
||||
queryCount);
|
||||
|
||||
vn_feedback_query_reset_cmd_record(commandBuffer, queryPool, firstQuery,
|
||||
queryCount);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1824,6 +1829,8 @@ vn_CmdWriteTimestamp(VkCommandBuffer commandBuffer,
|
|||
{
|
||||
VN_CMD_ENQUEUE(vkCmdWriteTimestamp, commandBuffer, pipelineStage,
|
||||
queryPool, query);
|
||||
|
||||
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -1834,6 +1841,8 @@ vn_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
|||
{
|
||||
VN_CMD_ENQUEUE(vkCmdWriteTimestamp2, commandBuffer, stage, queryPool,
|
||||
query);
|
||||
|
||||
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -2012,6 +2021,8 @@ vn_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer,
|
|||
{
|
||||
VN_CMD_ENQUEUE(vkCmdEndQueryIndexedEXT, commandBuffer, queryPool, query,
|
||||
index);
|
||||
|
||||
vn_cmd_add_query_feedback(commandBuffer, queryPool, query, 1);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ static const struct debug_control vn_perf_options[] = {
|
|||
{ "no_memory_suballoc", VN_PERF_NO_MEMORY_SUBALLOC },
|
||||
{ "no_cmd_batching", VN_PERF_NO_CMD_BATCHING },
|
||||
{ "no_timeline_sem_feedback", VN_PERF_NO_TIMELINE_SEM_FEEDBACK },
|
||||
{ "no_query_feedback", VN_PERF_NO_QUERY_FEEDBACK },
|
||||
{ NULL, 0 },
|
||||
/* clang-format on */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -115,6 +115,7 @@ enum vn_perf {
|
|||
VN_PERF_NO_MEMORY_SUBALLOC = 1ull << 5,
|
||||
VN_PERF_NO_CMD_BATCHING = 1ull << 6,
|
||||
VN_PERF_NO_TIMELINE_SEM_FEEDBACK = 1ull << 7,
|
||||
VN_PERF_NO_QUERY_FEEDBACK = 1ull << 8,
|
||||
};
|
||||
|
||||
typedef uint64_t vn_object_id;
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
#include "venus-protocol/vn_protocol_driver_query_pool.h"
|
||||
|
||||
#include "vn_device.h"
|
||||
#include "vn_feedback.h"
|
||||
|
||||
/* query pool commands */
|
||||
|
||||
|
|
@ -85,6 +86,15 @@ vn_CreateQueryPool(VkDevice device,
|
|||
break;
|
||||
}
|
||||
|
||||
if (!VN_PERF(NO_QUERY_FEEDBACK)) {
|
||||
/* Feedback results are always 64 bit and include availability bit
|
||||
* (also 64 bit)
|
||||
*/
|
||||
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
|
||||
vn_feedback_buffer_create(dev, slot_size * pCreateInfo->queryCount,
|
||||
alloc, &pool->feedback);
|
||||
}
|
||||
|
||||
VkQueryPool pool_handle = vn_query_pool_to_handle(pool);
|
||||
vn_async_vkCreateQueryPool(dev->instance, device, pCreateInfo, NULL,
|
||||
&pool_handle);
|
||||
|
|
@ -109,6 +119,9 @@ vn_DestroyQueryPool(VkDevice device,
|
|||
|
||||
alloc = pAllocator ? pAllocator : &pool->allocator;
|
||||
|
||||
if (pool->feedback)
|
||||
vn_feedback_buffer_destroy(dev, pool->feedback, alloc);
|
||||
|
||||
vn_async_vkDestroyQueryPool(dev->instance, device, queryPool, NULL);
|
||||
|
||||
vn_object_base_fini(&pool->base);
|
||||
|
|
@ -123,9 +136,118 @@ vn_ResetQueryPool(VkDevice device,
|
|||
{
|
||||
VN_TRACE_FUNC();
|
||||
struct vn_device *dev = vn_device_from_handle(device);
|
||||
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
||||
|
||||
vn_async_vkResetQueryPool(dev->instance, device, queryPool, firstQuery,
|
||||
queryCount);
|
||||
if (pool->feedback) {
|
||||
/* Feedback results are always 64 bit and include availability bit
|
||||
* (also 64 bit)
|
||||
*/
|
||||
const uint32_t slot_size = (pool->result_array_size * 8) + 8;
|
||||
const uint32_t offset = slot_size * firstQuery;
|
||||
memset(pool->feedback->data + offset, 0, slot_size * queryCount);
|
||||
}
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vn_get_query_pool_feedback(struct vn_query_pool *pool,
|
||||
uint32_t firstQuery,
|
||||
uint32_t queryCount,
|
||||
void *pData,
|
||||
VkDeviceSize stride,
|
||||
VkQueryResultFlags flags)
|
||||
{
|
||||
VkResult result = VK_SUCCESS;
|
||||
/* Feedback results are always 64 bit and include availability bit
|
||||
* (also 64 bit)
|
||||
*/
|
||||
const uint32_t slot_array_size = pool->result_array_size + 1;
|
||||
uint64_t *src = pool->feedback->data;
|
||||
src += slot_array_size * firstQuery;
|
||||
|
||||
uint32_t dst_index = 0;
|
||||
uint32_t src_index = 0;
|
||||
if (flags & VK_QUERY_RESULT_64_BIT) {
|
||||
uint64_t *dst = pData;
|
||||
uint32_t index_stride = stride / sizeof(uint64_t);
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
/* Copy the result if its available */
|
||||
const uint64_t avail = src[src_index + pool->result_array_size];
|
||||
if (avail) {
|
||||
memcpy(&dst[dst_index], &src[src_index],
|
||||
pool->result_array_size * sizeof(uint64_t));
|
||||
} else {
|
||||
result = VK_NOT_READY;
|
||||
/* valid to return result of 0 if partial bit is set */
|
||||
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
|
||||
memset(&dst[dst_index], 0,
|
||||
pool->result_array_size * sizeof(uint64_t));
|
||||
}
|
||||
}
|
||||
/* Set the availability bit if requested */
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
dst[dst_index + pool->result_array_size] = avail;
|
||||
|
||||
dst_index += index_stride;
|
||||
src_index += slot_array_size;
|
||||
}
|
||||
} else {
|
||||
uint32_t *dst = pData;
|
||||
uint32_t index_stride = stride / sizeof(uint32_t);
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
/* Copy the result if its available, converting down to uint32_t */
|
||||
const uint32_t avail =
|
||||
(uint32_t)src[src_index + pool->result_array_size];
|
||||
if (avail) {
|
||||
for (uint32_t j = 0; j < pool->result_array_size; j++)
|
||||
dst[dst_index + j] = (uint32_t)src[src_index + j];
|
||||
} else {
|
||||
result = VK_NOT_READY;
|
||||
/* valid to return result of 0 if partial bit is set */
|
||||
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
|
||||
for (uint32_t j = 0; j < pool->result_array_size; j++)
|
||||
dst[dst_index + j] = 0;
|
||||
}
|
||||
}
|
||||
/* Set the availability bit if requested */
|
||||
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)
|
||||
dst[dst_index + pool->result_array_size] = avail;
|
||||
|
||||
dst_index += index_stride;
|
||||
src_index += slot_array_size;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vn_query_feedback_wait_ready(struct vn_query_pool *pool,
|
||||
uint32_t firstQuery,
|
||||
uint32_t queryCount)
|
||||
{
|
||||
/* Timeout after 5 seconds */
|
||||
uint64_t timeout = 5000ull * 1000 * 1000;
|
||||
uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout);
|
||||
|
||||
/* Feedback results are always 64 bit and include availability bit
|
||||
* (also 64 bit)
|
||||
*/
|
||||
const uint32_t slot_array_size = pool->result_array_size + 1;
|
||||
volatile uint64_t *src = pool->feedback->data;
|
||||
src += (slot_array_size * firstQuery) + pool->result_array_size;
|
||||
|
||||
uint32_t src_index = 0;
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
while (!src[src_index]) {
|
||||
if (os_time_get_nano() > abs_timeout_ns)
|
||||
return VK_ERROR_DEVICE_LOST;
|
||||
|
||||
thrd_yield();
|
||||
}
|
||||
src_index += slot_array_size;
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
@ -142,12 +264,28 @@ vn_GetQueryPoolResults(VkDevice device,
|
|||
struct vn_device *dev = vn_device_from_handle(device);
|
||||
struct vn_query_pool *pool = vn_query_pool_from_handle(queryPool);
|
||||
const VkAllocationCallbacks *alloc = &pool->allocator;
|
||||
VkResult result;
|
||||
|
||||
const size_t result_width = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
|
||||
const size_t result_size = pool->result_array_size * result_width;
|
||||
const bool result_always_written =
|
||||
flags & (VK_QUERY_RESULT_WAIT_BIT | VK_QUERY_RESULT_PARTIAL_BIT);
|
||||
|
||||
/* Get results from feedback buffers
|
||||
* Not possible for VK_QUERY_RESULT_PARTIAL_BIT
|
||||
*/
|
||||
if (pool->feedback) {
|
||||
/* If wait bit is set, wait poll until query is ready */
|
||||
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
|
||||
result = vn_query_feedback_wait_ready(pool, firstQuery, queryCount);
|
||||
if (result != VK_SUCCESS)
|
||||
return vn_result(dev->instance, result);
|
||||
}
|
||||
result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
|
||||
stride, flags);
|
||||
return vn_result(dev->instance, result);
|
||||
}
|
||||
|
||||
VkQueryResultFlags packed_flags = flags;
|
||||
size_t packed_stride = result_size;
|
||||
if (!result_always_written)
|
||||
|
|
@ -165,12 +303,7 @@ vn_GetQueryPoolResults(VkDevice device,
|
|||
if (!packed_data)
|
||||
return vn_error(dev->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
/* TODO the renderer should transparently vkCmdCopyQueryPoolResults to a
|
||||
* coherent memory such that we can memcpy from the coherent memory to
|
||||
* avoid this serialized round trip.
|
||||
*/
|
||||
VkResult result = vn_call_vkGetQueryPoolResults(
|
||||
result = vn_call_vkGetQueryPoolResults(
|
||||
dev->instance, device, queryPool, firstQuery, queryCount, packed_size,
|
||||
packed_data, packed_stride, packed_flags);
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ struct vn_query_pool {
|
|||
|
||||
VkAllocationCallbacks allocator;
|
||||
|
||||
/* non-NULL if VN_PERF_NO_QUERY_FEEDBACK is disabled */
|
||||
struct vn_feedback_buffer *feedback;
|
||||
uint32_t result_array_size;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue