venus: add vn_relax_init/_fini()

Use a new calling contract so we can do pre/post-work around every ring-waiting
iteration. All looping uses of `vn_relax()` must now call `vn_relax_init()` and
`vn_relax_fini()` before/after their loop bodies.

Signed-off-by: Ryan Neph <ryanneph@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22036>
This commit is contained in:
Ryan Neph 2023-03-22 12:12:21 -07:00 committed by Marge Bot
parent 0f14724039
commit bb4c10f85e
6 changed files with 73 additions and 25 deletions

View file

@ -123,9 +123,23 @@ vn_extension_get_spec_version(const char *name)
return index >= 0 ? vn_info_extension_get(index)->spec_version : 0;
}
void
vn_relax(const struct vn_ring *ring, uint32_t *iter, const char *reason)
struct vn_relax_state
vn_relax_init(struct vn_ring *ring, const char *reason)
{
return (struct vn_relax_state){
.ring = ring,
.iter = 0,
.reason = reason,
};
}
void
vn_relax(struct vn_relax_state *state)
{
struct vn_ring *ring = state->ring;
uint32_t *iter = &state->iter;
const char *reason = state->reason;
/* Yield for the first 2^busy_wait_order times and then sleep for
* base_sleep_us microseconds for the same number of times. After that,
* keep doubling both sleep length and count.

View file

@ -156,6 +156,12 @@ struct vn_env {
};
extern struct vn_env vn_env;
struct vn_relax_state {
struct vn_ring *ring;
uint32_t iter;
const char *reason;
};
void
vn_env_init(void);
@ -223,8 +229,16 @@ vn_refcount_dec(struct vn_refcount *ref)
uint32_t
vn_extension_get_spec_version(const char *name);
struct vn_relax_state
vn_relax_init(struct vn_ring *ring, const char *reason);
void
vn_relax(const struct vn_ring *ring, uint32_t *iter, const char *reason);
vn_relax(struct vn_relax_state *state);
static inline void
vn_relax_fini(struct vn_relax_state *state)
{
}
static_assert(sizeof(vn_object_id) >= sizeof(uintptr_t), "");

View file

@ -321,15 +321,17 @@ vn_instance_wait_roundtrip(struct vn_instance *instance,
return;
}
const struct vn_ring *ring = &instance->ring.ring;
struct vn_ring *ring = &instance->ring.ring;
const volatile atomic_uint *ptr = ring->shared.extra;
uint32_t iter = 0;
struct vn_relax_state relax_state = vn_relax_init(ring, "roundtrip");
do {
const uint32_t cur = atomic_load_explicit(ptr, memory_order_acquire);
/* clamp to 32bit for legacy ring extra based roundtrip waiting */
if (roundtrip_seqno_ge(cur, roundtrip_seqno))
if (roundtrip_seqno_ge(cur, roundtrip_seqno)) {
vn_relax_fini(&relax_state);
break;
vn_relax(ring, &iter, "roundtrip");
}
vn_relax(&relax_state);
} while (true);
}

View file

@ -1286,7 +1286,7 @@ static VkResult
vn_update_sync_result(struct vn_device *dev,
VkResult result,
int64_t abs_timeout,
uint32_t *iter)
struct vn_relax_state *relax_state)
{
switch (result) {
case VK_NOT_READY:
@ -1294,7 +1294,7 @@ vn_update_sync_result(struct vn_device *dev,
os_time_get_nano() >= abs_timeout)
result = VK_TIMEOUT;
else
vn_relax(&dev->instance->ring.ring, iter, "client");
vn_relax(relax_state);
break;
default:
assert(result == VK_SUCCESS || result < 0);
@ -1317,7 +1317,6 @@ vn_WaitForFences(VkDevice device,
const int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
VkResult result = VK_NOT_READY;
uint32_t iter = 0;
if (fenceCount > 1 && waitAll) {
VkFence local_fences[8];
VkFence *fences = local_fences;
@ -1330,18 +1329,26 @@ vn_WaitForFences(VkDevice device,
}
memcpy(fences, pFences, sizeof(*fences) * fenceCount);
struct vn_relax_state relax_state =
vn_relax_init(&dev->instance->ring.ring, "client");
while (result == VK_NOT_READY) {
result = vn_remove_signaled_fences(device, fences, &fenceCount);
result = vn_update_sync_result(dev, result, abs_timeout, &iter);
result =
vn_update_sync_result(dev, result, abs_timeout, &relax_state);
}
vn_relax_fini(&relax_state);
if (fences != local_fences)
vk_free(alloc, fences);
} else {
struct vn_relax_state relax_state =
vn_relax_init(&dev->instance->ring.ring, "client");
while (result == VK_NOT_READY) {
result = vn_find_first_signaled_fence(device, pFences, fenceCount);
result = vn_update_sync_result(dev, result, abs_timeout, &iter);
result =
vn_update_sync_result(dev, result, abs_timeout, &relax_state);
}
vn_relax_fini(&relax_state);
}
return vn_result(dev->instance, result);
@ -1812,7 +1819,6 @@ vn_WaitSemaphores(VkDevice device,
const int64_t abs_timeout = os_time_get_absolute_timeout(timeout);
VkResult result = VK_NOT_READY;
uint32_t iter = 0;
if (pWaitInfo->semaphoreCount > 1 &&
!(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT)) {
uint32_t semaphore_count = pWaitInfo->semaphoreCount;
@ -1833,21 +1839,29 @@ vn_WaitSemaphores(VkDevice device,
sizeof(*semaphores) * semaphore_count);
memcpy(values, pWaitInfo->pValues, sizeof(*values) * semaphore_count);
struct vn_relax_state relax_state =
vn_relax_init(&dev->instance->ring.ring, "client");
while (result == VK_NOT_READY) {
result = vn_remove_signaled_semaphores(device, semaphores, values,
&semaphore_count);
result = vn_update_sync_result(dev, result, abs_timeout, &iter);
result =
vn_update_sync_result(dev, result, abs_timeout, &relax_state);
}
vn_relax_fini(&relax_state);
if (semaphores != local_semaphores)
vk_free(alloc, semaphores);
} else {
struct vn_relax_state relax_state =
vn_relax_init(&dev->instance->ring.ring, "client");
while (result == VK_NOT_READY) {
result = vn_find_first_signaled_semaphore(
device, pWaitInfo->pSemaphores, pWaitInfo->pValues,
pWaitInfo->semaphoreCount);
result = vn_update_sync_result(dev, result, abs_timeout, &iter);
result =
vn_update_sync_result(dev, result, abs_timeout, &relax_state);
}
vn_relax_fini(&relax_state);
}
return vn_result(dev->instance, result);

View file

@ -89,17 +89,19 @@ vn_ring_retire_submits(struct vn_ring *ring, uint32_t seqno)
}
static uint32_t
vn_ring_wait_seqno(const struct vn_ring *ring, uint32_t seqno)
vn_ring_wait_seqno(struct vn_ring *ring, uint32_t seqno)
{
/* A renderer wait incurs several hops and the renderer might poll
* repeatedly anyway. Let's just poll here.
*/
uint32_t iter = 0;
struct vn_relax_state relax_state = vn_relax_init(ring, "ring seqno");
do {
const uint32_t head = vn_ring_load_head(ring);
if (vn_ring_ge_seqno(ring, head, seqno))
if (vn_ring_ge_seqno(ring, head, seqno)) {
vn_relax_fini(&relax_state);
return head;
vn_relax(ring, &iter, "ring seqno");
}
vn_relax(&relax_state);
} while (true);
}
@ -118,7 +120,7 @@ vn_ring_has_space(const struct vn_ring *ring,
}
static uint32_t
vn_ring_wait_space(const struct vn_ring *ring, uint32_t size)
vn_ring_wait_space(struct vn_ring *ring, uint32_t size)
{
assert(size <= ring->buffer_size);
@ -130,11 +132,13 @@ vn_ring_wait_space(const struct vn_ring *ring, uint32_t size)
VN_TRACE_FUNC();
/* see the reasoning in vn_ring_wait_seqno */
uint32_t iter = 0;
struct vn_relax_state relax_state = vn_relax_init(ring, "ring space");
do {
vn_relax(ring, &iter, "ring space");
if (vn_ring_has_space(ring, size, &head))
vn_relax(&relax_state);
if (vn_ring_has_space(ring, size, &head)) {
vn_relax_fini(&relax_state);
return head;
}
} while (true);
}
}
@ -263,7 +267,7 @@ vn_ring_submit(struct vn_ring *ring,
* This is thread-safe.
*/
void
vn_ring_wait(const struct vn_ring *ring, uint32_t seqno)
vn_ring_wait(struct vn_ring *ring, uint32_t seqno)
{
vn_ring_wait_seqno(ring, seqno);
}

View file

@ -96,7 +96,7 @@ vn_ring_submit(struct vn_ring *ring,
uint32_t *seqno);
void
vn_ring_wait(const struct vn_ring *ring, uint32_t seqno);
vn_ring_wait(struct vn_ring *ring, uint32_t seqno);
bool
vn_ring_fatal(const struct vn_ring *ring);