mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 22:49:13 +02:00
vulkan: don't destroy vk_sync_timeline if a point is still pending
Currently, vk_sync_timeline_wait() may report idle while we still have
time points outstanding in vk_queue. This race between vk_sync_timeline
and vk_queue is problematic as vk_queue_submit_cleanup() can end-up
trying to unref a timeline point that was already destroyed in
vk_sync_timeline_finish(), leading to an use-after-free.
Address this race-condition by introducing a reference counter to the
timeline. Each timeline point takes a reference to the timeline when
it's checked out and drops the reference when it's freed. Now, the
timeline is only destroyed when all the references had been dropped.
To guarantee that all references will be dropped and the timeline will
be destroyed, vk_sync_timeline_finish() waits for all pending points
to be checked out.
This commit fixes several Mesa CI flakes in v3dv related to timeline
semaphores.
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12648
Signed-off-by: Maíra Canal <mcanal@igalia.com>
Reviewed-by: Faith Ekstrand <faith.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35921>
(cherry picked from commit ab3aaad957)
This commit is contained in:
parent
27b2041013
commit
57cc38d1fe
3 changed files with 114 additions and 45 deletions
|
|
@ -1974,7 +1974,7 @@
|
|||
"description": "vulkan: don't destroy vk_sync_timeline if a point is still pending",
|
||||
"nominated": true,
|
||||
"nomination_type": 1,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null,
|
||||
"notes": null
|
||||
|
|
|
|||
|
|
@ -60,6 +60,38 @@ vk_sync_timeline_type_validate(const struct vk_sync_timeline_type *ttype)
|
|||
assert(!(req_features & ~ttype->point_sync_type->features));
|
||||
}
|
||||
|
||||
static void
|
||||
vk_sync_timeline_state_ref(struct vk_sync_timeline_state *state)
|
||||
{
|
||||
p_atomic_inc(&state->refcount);
|
||||
}
|
||||
|
||||
static void
|
||||
vk_sync_timeline_state_unref(struct vk_device *device,
|
||||
struct vk_sync_timeline_state *state)
|
||||
{
|
||||
if (p_atomic_dec_return(&state->refcount))
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(struct vk_sync_timeline_point, point,
|
||||
&state->free_points, link) {
|
||||
list_del(&point->link);
|
||||
vk_sync_finish(device, &point->sync);
|
||||
vk_free(&device->alloc, point);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(struct vk_sync_timeline_point, point,
|
||||
&state->pending_points, link) {
|
||||
list_del(&point->link);
|
||||
vk_sync_finish(device, &point->sync);
|
||||
vk_free(&device->alloc, point);
|
||||
}
|
||||
|
||||
u_cnd_monotonic_destroy(&state->cond);
|
||||
mtx_destroy(&state->mutex);
|
||||
vk_free(&device->alloc, state);
|
||||
}
|
||||
|
||||
VkResult
|
||||
vk_sync_timeline_init(struct vk_device *device,
|
||||
struct vk_sync *sync,
|
||||
|
|
@ -95,33 +127,37 @@ vk_sync_timeline_init(struct vk_device *device,
|
|||
list_inithead(&state->pending_points);
|
||||
list_inithead(&state->free_points);
|
||||
|
||||
p_atomic_set(&state->refcount, 1);
|
||||
|
||||
timeline->state = state;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_sync_timeline_gc_locked(struct vk_device *device,
|
||||
struct vk_sync_timeline_state *state,
|
||||
bool drain);
|
||||
|
||||
static void
|
||||
vk_sync_timeline_finish(struct vk_device *device,
|
||||
struct vk_sync *sync)
|
||||
{
|
||||
struct vk_sync_timeline_state *state = to_vk_sync_timeline_state(sync);
|
||||
|
||||
list_for_each_entry_safe(struct vk_sync_timeline_point, point,
|
||||
&state->free_points, link) {
|
||||
list_del(&point->link);
|
||||
vk_sync_finish(device, &point->sync);
|
||||
vk_free(&device->alloc, point);
|
||||
}
|
||||
list_for_each_entry_safe(struct vk_sync_timeline_point, point,
|
||||
&state->pending_points, link) {
|
||||
list_del(&point->link);
|
||||
vk_sync_finish(device, &point->sync);
|
||||
vk_free(&device->alloc, point);
|
||||
}
|
||||
/* We need to garbage collect to get rid of any pending points so that the
|
||||
* vk_sync_timeline_state_unref() at the end drops the final reference
|
||||
* held by the vk_sync_timeline. It's up to the client to ensure that
|
||||
* there are no vk_sync in-flight when this is called so this should get
|
||||
* rid of all pending time points. The only time point references left are
|
||||
* those held by waits or vk_queue_submit().
|
||||
*/
|
||||
mtx_lock(&state->mutex);
|
||||
vk_sync_timeline_gc_locked(device, state, true);
|
||||
assert(list_is_empty(&state->pending_points));
|
||||
mtx_unlock(&state->mutex);
|
||||
|
||||
u_cnd_monotonic_destroy(&state->cond);
|
||||
mtx_destroy(&state->mutex);
|
||||
vk_free(&device->alloc, state);
|
||||
vk_sync_timeline_state_unref(device, state);
|
||||
}
|
||||
|
||||
static struct vk_sync_timeline_point *
|
||||
|
|
@ -137,11 +173,6 @@ vk_sync_timeline_first_point(struct vk_sync_timeline_state *state)
|
|||
return point;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_sync_timeline_gc_locked(struct vk_device *device,
|
||||
struct vk_sync_timeline_state *state,
|
||||
bool drain);
|
||||
|
||||
static VkResult
|
||||
vk_sync_timeline_alloc_point_locked(struct vk_device *device,
|
||||
struct vk_sync_timeline *timeline,
|
||||
|
|
@ -197,6 +228,8 @@ vk_sync_timeline_alloc_point_locked(struct vk_device *device,
|
|||
|
||||
*point_out = point;
|
||||
|
||||
vk_sync_timeline_state_ref(state);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -221,21 +254,66 @@ vk_sync_timeline_ref_point_locked(struct vk_sync_timeline_point *point)
|
|||
point->refcount++;
|
||||
}
|
||||
|
||||
static void
|
||||
vk_sync_timeline_unref_point_locked(struct vk_sync_timeline_state *state,
|
||||
struct vk_sync_timeline_point *point)
|
||||
/* Returns true if this was the last reference to point.
|
||||
*
|
||||
* DO NOT call this helper directly. You should call vk_sync_timeline_unref_point_locked()
|
||||
* or vk_sync_timeline_point_unref() instead.
|
||||
*/
|
||||
static bool
|
||||
vk_sync_timeline_unref_point_no_unref_state_locked(struct vk_sync_timeline_point *point)
|
||||
{
|
||||
struct vk_sync_timeline_state *state = point->timeline_state;
|
||||
|
||||
assert(point->refcount > 0);
|
||||
point->refcount--;
|
||||
if (point->refcount == 0) {
|
||||
/* The pending list also takes a reference so this can't be pending */
|
||||
assert(!point->pending);
|
||||
list_add(&point->link, &state->free_points);
|
||||
}
|
||||
|
||||
if (point->refcount > 0)
|
||||
return false;
|
||||
|
||||
assert(point->refcount == 0);
|
||||
|
||||
/* The pending list also takes a reference so this can't be pending */
|
||||
assert(!point->pending);
|
||||
list_add(&point->link, &state->free_points);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
vk_sync_timeline_complete_point_locked(struct vk_sync_timeline_state *state,
|
||||
vk_sync_timeline_unref_point_locked(struct vk_device *device,
|
||||
struct vk_sync_timeline_state *state,
|
||||
struct vk_sync_timeline_point *point)
|
||||
{
|
||||
/* The caller needs to have its own reference to the state, not just the
|
||||
* one implicit in point, because it's also holding the lock.
|
||||
*/
|
||||
assert(p_atomic_read(&state->refcount) > 1);
|
||||
|
||||
if (vk_sync_timeline_unref_point_no_unref_state_locked(point))
|
||||
vk_sync_timeline_state_unref(device, state);
|
||||
}
|
||||
|
||||
void
|
||||
vk_sync_timeline_point_unref(struct vk_device *device,
|
||||
struct vk_sync_timeline_point *point)
|
||||
{
|
||||
struct vk_sync_timeline_state *state = point->timeline_state;
|
||||
|
||||
mtx_lock(&state->mutex);
|
||||
const bool last_ref =
|
||||
vk_sync_timeline_unref_point_no_unref_state_locked(point);
|
||||
mtx_unlock(&state->mutex);
|
||||
|
||||
/* Drop the state reference outside the mutex so we don't free the state
|
||||
* and then try to unlock the mutex.
|
||||
*/
|
||||
if (last_ref)
|
||||
vk_sync_timeline_state_unref(device, state);
|
||||
}
|
||||
|
||||
static void
|
||||
vk_sync_timeline_complete_point_locked(struct vk_device *device,
|
||||
struct vk_sync_timeline_state *state,
|
||||
struct vk_sync_timeline_point *point)
|
||||
{
|
||||
if (!point->pending)
|
||||
|
|
@ -248,7 +326,7 @@ vk_sync_timeline_complete_point_locked(struct vk_sync_timeline_state *state,
|
|||
list_del(&point->link);
|
||||
|
||||
/* Drop the pending reference */
|
||||
vk_sync_timeline_unref_point_locked(state, point);
|
||||
vk_sync_timeline_unref_point_locked(device, state, point);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
|
|
@ -290,7 +368,7 @@ vk_sync_timeline_gc_locked(struct vk_device *device,
|
|||
return result;
|
||||
}
|
||||
|
||||
vk_sync_timeline_complete_point_locked(state, point);
|
||||
vk_sync_timeline_complete_point_locked(device, state, point);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
@ -364,17 +442,6 @@ vk_sync_timeline_get_point(struct vk_device *device,
|
|||
return result;
|
||||
}
|
||||
|
||||
void
|
||||
vk_sync_timeline_point_unref(struct vk_device *device,
|
||||
struct vk_sync_timeline_point *point)
|
||||
{
|
||||
struct vk_sync_timeline_state *state = point->timeline_state;
|
||||
|
||||
mtx_lock(&state->mutex);
|
||||
vk_sync_timeline_unref_point_locked(state, point);
|
||||
mtx_unlock(&state->mutex);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_sync_timeline_signal_locked(struct vk_device *device,
|
||||
struct vk_sync_timeline_state *state,
|
||||
|
|
@ -476,13 +543,13 @@ vk_sync_timeline_wait_locked(struct vk_device *device,
|
|||
|
||||
/* Pick the mutex back up */
|
||||
mtx_lock(&state->mutex);
|
||||
vk_sync_timeline_unref_point_locked(state, point);
|
||||
vk_sync_timeline_unref_point_locked(device, state, point);
|
||||
|
||||
/* This covers both VK_TIMEOUT and VK_ERROR_DEVICE_LOST */
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
vk_sync_timeline_complete_point_locked(state, point);
|
||||
vk_sync_timeline_complete_point_locked(device, state, point);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
|
|
|||
|
|
@ -69,6 +69,8 @@ struct vk_sync_timeline_state {
|
|||
uint64_t highest_past;
|
||||
uint64_t highest_pending;
|
||||
|
||||
uint32_t refcount;
|
||||
|
||||
struct list_head pending_points;
|
||||
struct list_head free_points;
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue