venus: avoid constant busy wait for query result waiting

Up to this commit in this MR, the gfxbench manhattan scores have been
improved by 10~15% with ANGLE-on-Venus on some AMD platforms.

Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28287>
This commit is contained in:
Yiwei Zhang 2024-03-18 20:48:02 -07:00 committed by Marge Bot
parent 88b64d14d8
commit 1e47ec2321
3 changed files with 21 additions and 24 deletions

View file

@ -184,6 +184,8 @@ vn_relax_reason_string(enum vn_relax_reason reason)
return "fence";
case VN_RELAX_REASON_SEMAPHORE:
return "semaphore";
case VN_RELAX_REASON_QUERY:
return "query";
}
return "";
}

View file

@ -212,6 +212,7 @@ enum vn_relax_reason {
VN_RELAX_REASON_RING_SPACE,
VN_RELAX_REASON_FENCE,
VN_RELAX_REASON_SEMAPHORE,
VN_RELAX_REASON_QUERY,
};
struct vn_relax_state {

View file

@ -250,33 +250,29 @@ vn_get_query_pool_feedback(struct vn_query_pool *pool,
return result;
}
static VkResult
vn_query_feedback_wait_ready(struct vn_query_pool *pool,
uint32_t firstQuery,
uint32_t queryCount)
static void
vn_query_feedback_wait_ready(struct vn_device *dev,
struct vn_query_pool *pool,
uint32_t first_query,
uint32_t query_count)
{
/* Timeout after 5 seconds */
uint64_t timeout = 5000ull * 1000 * 1000;
uint64_t abs_timeout_ns = os_time_get_absolute_timeout(timeout);
VN_TRACE_FUNC();
/* Feedback results are always 64 bit and include availability bit
* (also 64 bit)
*/
const uint32_t slot_array_size = pool->result_array_size + 1;
volatile uint64_t *src = pool->fb_buf->data;
src += (slot_array_size * firstQuery) + pool->result_array_size;
const uint32_t step = pool->result_array_size + 1;
const uint64_t *avail = (uint64_t *)pool->fb_buf->data +
first_query * step + pool->result_array_size;
uint32_t src_index = 0;
for (uint32_t i = 0; i < queryCount; i++) {
while (!src[src_index]) {
if (os_time_get_nano() > abs_timeout_ns)
return VK_ERROR_DEVICE_LOST;
thrd_yield();
struct vn_relax_state relax_state =
vn_relax_init(dev->instance, VN_RELAX_REASON_QUERY);
for (uint32_t i = 0, j = 0; i < query_count; i++, j += step) {
while (!avail[j]) {
vn_relax(&relax_state);
}
src_index += slot_array_size;
}
return VK_SUCCESS;
vn_relax_fini(&relax_state);
}
VkResult
@ -304,11 +300,9 @@ vn_GetQueryPoolResults(VkDevice device,
*/
if (pool->fb_buf) {
/* If wait bit is set, wait poll until query is ready */
if (flags & VK_QUERY_RESULT_WAIT_BIT) {
result = vn_query_feedback_wait_ready(pool, firstQuery, queryCount);
if (result != VK_SUCCESS)
return vn_result(dev->instance, result);
}
if (flags & VK_QUERY_RESULT_WAIT_BIT)
vn_query_feedback_wait_ready(dev, pool, firstQuery, queryCount);
result = vn_get_query_pool_feedback(pool, firstQuery, queryCount, pData,
stride, flags);
return vn_result(dev->instance, result);