mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-31 16:20:13 +01:00
winsys/amdgpu: handle cs_add_fence_dependency for deferred/unsubmitted fences
The idea is to fix the following interleaving of operations
that can arise from deferred fences:
Thread 1 / Context 1 Thread 2 / Context 2
-------------------- --------------------
f = deferred flush
<------- application-side synchronization ------->
fence_server_sync(f)
...
flush()
flush()
We will now stall in fence_server_sync until the flush of context 1
has completed.
This scenario was unlikely to occur previously, because applications
seem to be doing
Thread 1 / Context 1 Thread 2 / Context 2
-------------------- --------------------
f = glFenceSync()
glFlush()
<------- application-side synchronization ------->
glWaitSync(f)
... and indeed they probably *have* to use this ordering to avoid
deadlocks in the GLX model, where all GL operations conceptually
go through a single connection to the X server. However, it's less
clear whether applications have to do this with other WSI (i.e. EGL).
Besides, even this sequence of GL commands can be translated into
the Gallium-level sequence outlined above when Gallium threading
and asynchronous flushes are used. So it makes sense to be more
robust.
As a side effect, we no longer busy-wait on submission_in_progress.
We won't enable asynchronous flushes on radeon, but add a
cs_add_fence_dependency stub anyway to document the potential
issue.
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
1e5c9cf590
commit
e6dbc804a8
4 changed files with 41 additions and 12 deletions
|
|
@ -543,7 +543,9 @@ struct radeon_winsys {
|
|||
/**
|
||||
* Create a fence before the CS is flushed.
|
||||
* The user must flush manually to complete the initializaton of the fence.
|
||||
* The fence must not be used before the flush.
|
||||
*
|
||||
* The fence must not be used for anything except \ref cs_add_fence_dependency
|
||||
* before the flush.
|
||||
*/
|
||||
struct pipe_fence_handle *(*cs_get_next_fence)(struct radeon_winsys_cs *cs);
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,8 @@ amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type,
|
|||
fence->fence.ip_type = ip_type;
|
||||
fence->fence.ip_instance = ip_instance;
|
||||
fence->fence.ring = ring;
|
||||
fence->submission_in_progress = true;
|
||||
util_queue_fence_init(&fence->submitted);
|
||||
util_queue_fence_reset(&fence->submitted);
|
||||
p_atomic_inc(&ctx->refcount);
|
||||
return (struct pipe_fence_handle *)fence;
|
||||
}
|
||||
|
|
@ -81,6 +82,9 @@ amdgpu_fence_import_sync_file(struct radeon_winsys *rws, int fd)
|
|||
FREE(fence);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
util_queue_fence_init(&fence->submitted);
|
||||
|
||||
return (struct pipe_fence_handle*)fence;
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +102,7 @@ static int amdgpu_fence_export_sync_file(struct radeon_winsys *rws,
|
|||
return r ? -1 : fd;
|
||||
}
|
||||
|
||||
os_wait_until_zero(&fence->submission_in_progress, PIPE_TIMEOUT_INFINITE);
|
||||
util_queue_fence_wait(&fence->submitted);
|
||||
|
||||
/* Convert the amdgpu fence into a fence FD. */
|
||||
int fd;
|
||||
|
|
@ -118,7 +122,7 @@ static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
|
|||
|
||||
rfence->fence.fence = seq_no;
|
||||
rfence->user_fence_cpu_address = user_fence_cpu_address;
|
||||
rfence->submission_in_progress = false;
|
||||
util_queue_fence_signal(&rfence->submitted);
|
||||
}
|
||||
|
||||
static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
|
||||
|
|
@ -126,7 +130,7 @@ static void amdgpu_fence_signalled(struct pipe_fence_handle *fence)
|
|||
struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence;
|
||||
|
||||
rfence->signalled = true;
|
||||
rfence->submission_in_progress = false;
|
||||
util_queue_fence_signal(&rfence->submitted);
|
||||
}
|
||||
|
||||
bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
|
||||
|
|
@ -164,8 +168,7 @@ bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout,
|
|||
/* The fence might not have a number assigned if its IB is being
|
||||
* submitted in the other thread right now. Wait until the submission
|
||||
* is done. */
|
||||
if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress,
|
||||
abs_timeout))
|
||||
if (!util_queue_fence_wait_timeout(&rfence->submitted, abs_timeout))
|
||||
return false;
|
||||
|
||||
user_fence_cpu = rfence->user_fence_cpu_address;
|
||||
|
|
@ -1029,6 +1032,8 @@ static void amdgpu_cs_add_fence_dependency(struct radeon_winsys_cs *rws,
|
|||
struct amdgpu_cs_context *cs = acs->csc;
|
||||
struct amdgpu_fence *fence = (struct amdgpu_fence*)pfence;
|
||||
|
||||
util_queue_fence_wait(&fence->submitted);
|
||||
|
||||
if (is_noop_fence_dependency(acs, fence))
|
||||
return;
|
||||
|
||||
|
|
@ -1304,7 +1309,7 @@ bo_list_error:
|
|||
continue;
|
||||
}
|
||||
|
||||
assert(!fence->submission_in_progress);
|
||||
assert(util_queue_fence_is_signalled(&fence->submitted));
|
||||
amdgpu_cs_chunk_fence_to_dep(&fence->fence, &dep_chunk[num++]);
|
||||
}
|
||||
|
||||
|
|
@ -1327,7 +1332,7 @@ bo_list_error:
|
|||
if (!amdgpu_fence_is_syncobj(fence))
|
||||
continue;
|
||||
|
||||
assert(!fence->submission_in_progress);
|
||||
assert(util_queue_fence_is_signalled(&fence->submitted));
|
||||
sem_chunk[num++].handle = fence->syncobj;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -144,9 +144,11 @@ struct amdgpu_fence {
|
|||
struct amdgpu_cs_fence fence;
|
||||
uint64_t *user_fence_cpu_address;
|
||||
|
||||
/* If the fence is unknown due to an IB still being submitted
|
||||
* in the other thread. */
|
||||
volatile int submission_in_progress; /* bool (int for atomicity) */
|
||||
/* If the fence has been submitted. This is unsignalled for deferred fences
|
||||
* (cs->next_fence) and while an IB is still being submitted in the submit
|
||||
* thread. */
|
||||
struct util_queue_fence submitted;
|
||||
|
||||
volatile int signalled; /* bool (int for atomicity) */
|
||||
};
|
||||
|
||||
|
|
@ -178,6 +180,7 @@ static inline void amdgpu_fence_reference(struct pipe_fence_handle **dst,
|
|||
else
|
||||
amdgpu_ctx_unref(fence->ctx);
|
||||
|
||||
util_queue_fence_destroy(&fence->submitted);
|
||||
FREE(fence);
|
||||
}
|
||||
*rdst = rsrc;
|
||||
|
|
|
|||
|
|
@ -786,6 +786,24 @@ radeon_drm_cs_get_next_fence(struct radeon_winsys_cs *rcs)
|
|||
return fence;
|
||||
}
|
||||
|
||||
static void
|
||||
radeon_drm_cs_add_fence_dependency(struct radeon_winsys_cs *cs,
|
||||
struct pipe_fence_handle *fence)
|
||||
{
|
||||
/* TODO: Handle the following unlikely multi-threaded scenario:
|
||||
*
|
||||
* Thread 1 / Context 1 Thread 2 / Context 2
|
||||
* -------------------- --------------------
|
||||
* f = cs_get_next_fence()
|
||||
* cs_add_fence_dependency(f)
|
||||
* cs_flush()
|
||||
* cs_flush()
|
||||
*
|
||||
* We currently assume that this does not happen because we don't support
|
||||
* asynchronous flushes on Radeon.
|
||||
*/
|
||||
}
|
||||
|
||||
void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
|
||||
{
|
||||
ws->base.ctx_create = radeon_drm_ctx_create;
|
||||
|
|
@ -801,6 +819,7 @@ void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws)
|
|||
ws->base.cs_get_next_fence = radeon_drm_cs_get_next_fence;
|
||||
ws->base.cs_is_buffer_referenced = radeon_bo_is_referenced;
|
||||
ws->base.cs_sync_flush = radeon_drm_cs_sync_flush;
|
||||
ws->base.cs_add_fence_dependency = radeon_drm_cs_add_fence_dependency;
|
||||
ws->base.fence_wait = radeon_fence_wait;
|
||||
ws->base.fence_reference = radeon_fence_reference;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue