mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
freedreno/drm: Async submit support
Move the submit ioctl to it's own thread to unblock the driver thread and let it move on to the next frame. Note that I did experiment with doing the append_bo() parts synchronously on the theory that we should be more likely to hit the fast path if we did that part of submit merging before the bo was potentially re-used in the next batch/submit. It helped some things by a couple percent, but hurt more things. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10444>
This commit is contained in:
parent
2c9e8db28d
commit
e9a9ac6f77
7 changed files with 169 additions and 15 deletions
|
|
@ -33,8 +33,7 @@
|
|||
#include "freedreno_drmif.h"
|
||||
#include "freedreno_priv.h"
|
||||
|
||||
struct fd_device *kgsl_device_new(int fd);
|
||||
struct fd_device *msm_device_new(int fd);
|
||||
struct fd_device *msm_device_new(int fd, drmVersionPtr version);
|
||||
|
||||
struct fd_device *
|
||||
fd_device_new(int fd)
|
||||
|
|
@ -58,7 +57,7 @@ fd_device_new(int fd)
|
|||
goto out;
|
||||
}
|
||||
|
||||
dev = msm_device_new(fd);
|
||||
dev = msm_device_new(fd, version);
|
||||
dev->version = version->version_minor;
|
||||
#if HAVE_FREEDRENO_KGSL
|
||||
} else if (!strcmp(version->name, "kgsl")) {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@
|
|||
#include <stdio.h>
|
||||
#include "util/u_atomic.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_queue.h"
|
||||
|
||||
#include "adreno_common.xml.h"
|
||||
#include "adreno_pm4.xml.h"
|
||||
|
|
@ -98,6 +98,13 @@ struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
|
|||
* out-fence-fd
|
||||
*/
|
||||
struct fd_submit_fence {
|
||||
/**
|
||||
* The ready fence is signaled once the submit is actually flushed down
|
||||
* to the kernel, and fence/fence_fd are populated. You must wait for
|
||||
* this fence to be signaled before reading fence/fence_fd.
|
||||
*/
|
||||
struct util_queue_fence ready;
|
||||
|
||||
struct fd_fence fence;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -34,6 +34,9 @@ static void
|
|||
msm_device_destroy(struct fd_device *dev)
|
||||
{
|
||||
struct msm_device *msm_dev = to_msm_device(dev);
|
||||
if (util_queue_is_initialized(&msm_dev->submit_queue)) {
|
||||
util_queue_destroy(&msm_dev->submit_queue);
|
||||
}
|
||||
free(msm_dev);
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +48,7 @@ static const struct fd_device_funcs funcs = {
|
|||
};
|
||||
|
||||
struct fd_device *
|
||||
msm_device_new(int fd)
|
||||
msm_device_new(int fd, drmVersionPtr version)
|
||||
{
|
||||
struct msm_device *msm_dev;
|
||||
struct fd_device *dev;
|
||||
|
|
@ -61,6 +64,15 @@ msm_device_new(int fd)
|
|||
dev = &msm_dev->base;
|
||||
dev->funcs = &funcs;
|
||||
|
||||
/* async submit_queue currently only used for msm_submit_sp: */
|
||||
if (version->version_minor >= FD_VERSION_SOFTPIN) {
|
||||
/* Note the name is intentionally short to avoid the queue
|
||||
* thread's comm truncating the interesting part of the
|
||||
* process name.
|
||||
*/
|
||||
util_queue_init(&msm_dev->submit_queue, "sq", 8, 1, 0);
|
||||
}
|
||||
|
||||
dev->bo_size = sizeof(struct msm_bo);
|
||||
|
||||
return dev;
|
||||
|
|
|
|||
|
|
@ -40,10 +40,11 @@
|
|||
struct msm_device {
|
||||
struct fd_device base;
|
||||
struct fd_bo_cache ring_cache;
|
||||
struct util_queue submit_queue;
|
||||
};
|
||||
FD_DEFINE_CAST(fd_device, msm_device);
|
||||
|
||||
struct fd_device *msm_device_new(int fd);
|
||||
struct fd_device *msm_device_new(int fd, drmVersionPtr version);
|
||||
|
||||
struct msm_pipe {
|
||||
struct fd_pipe base;
|
||||
|
|
@ -54,6 +55,14 @@ struct msm_pipe {
|
|||
uint32_t chip_id;
|
||||
uint32_t queue_id;
|
||||
struct slab_parent_pool ring_pool;
|
||||
|
||||
/**
|
||||
* The last fence seqno that was flushed to kernel (doesn't mean that it
|
||||
* is complete, just that the kernel knows about it)
|
||||
*/
|
||||
uint32_t last_submit_fence;
|
||||
|
||||
uint32_t last_enqueue_fence; /* just for debugging */
|
||||
};
|
||||
FD_DEFINE_CAST(fd_pipe, msm_pipe);
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "util/hash_table.h"
|
||||
#include "util/os_file.h"
|
||||
|
|
@ -41,6 +42,14 @@
|
|||
|
||||
#define INIT_SIZE 0x1000
|
||||
|
||||
/* In the pipe->flush() path, we don't have a util_queue_fence we can wait on,
|
||||
* instead use a condition-variable. Note that pipe->flush() is not expected
|
||||
* to be a common/hot path.
|
||||
*/
|
||||
static pthread_cond_t flush_cnd = PTHREAD_COND_INITIALIZER;
|
||||
static pthread_mutex_t flush_mtx = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
|
||||
struct msm_submit_sp {
|
||||
struct fd_submit base;
|
||||
|
||||
|
|
@ -64,6 +73,13 @@ struct msm_submit_sp {
|
|||
*/
|
||||
int in_fence_fd;
|
||||
struct fd_submit_fence *out_fence;
|
||||
|
||||
/* State for enqueued submits:
|
||||
*/
|
||||
struct list_head submit_list; /* includes this submit as last element */
|
||||
|
||||
/* Used in case out_fence==NULL: */
|
||||
struct util_queue_fence fence;
|
||||
};
|
||||
FD_DEFINE_CAST(fd_submit, msm_submit_sp);
|
||||
|
||||
|
|
@ -369,14 +385,65 @@ flush_submit_list(struct list_head *submit_list)
|
|||
if (!bos_on_stack)
|
||||
free(submit_bos);
|
||||
|
||||
pthread_mutex_lock(&flush_mtx);
|
||||
assert(fd_fence_before(msm_pipe->last_submit_fence, msm_submit->base.fence));
|
||||
msm_pipe->last_submit_fence = msm_submit->base.fence;
|
||||
pthread_cond_broadcast(&flush_cnd);
|
||||
pthread_mutex_unlock(&flush_mtx);
|
||||
|
||||
if (msm_submit->in_fence_fd != -1)
|
||||
close(msm_submit->in_fence_fd);
|
||||
|
||||
fd_submit_del(&msm_submit->base);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
msm_submit_sp_flush_execute(void *job, int thread_index)
|
||||
{
|
||||
struct fd_submit *submit = job;
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
|
||||
flush_submit_list(&msm_submit->submit_list);
|
||||
|
||||
DEBUG_MSG("finish: %u", submit->fence);
|
||||
}
|
||||
|
||||
static void
|
||||
msm_submit_sp_flush_cleanup(void *job, int thread_index)
|
||||
{
|
||||
struct fd_submit *submit = job;
|
||||
fd_submit_del(submit);
|
||||
}
|
||||
|
||||
static int
|
||||
enqueue_submit_list(struct list_head *submit_list)
|
||||
{
|
||||
struct fd_submit *submit = last_submit(submit_list);
|
||||
struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
|
||||
struct msm_device *msm_dev = to_msm_device(submit->pipe->dev);
|
||||
|
||||
list_replace(submit_list, &msm_submit->submit_list);
|
||||
list_inithead(submit_list);
|
||||
|
||||
struct util_queue_fence *fence;
|
||||
if (msm_submit->out_fence) {
|
||||
fence = &msm_submit->out_fence->ready;
|
||||
} else {
|
||||
util_queue_fence_init(&msm_submit->fence);
|
||||
fence = &msm_submit->fence;
|
||||
}
|
||||
|
||||
DEBUG_MSG("enqueue: %u", submit->fence);
|
||||
|
||||
util_queue_add_job(&msm_dev->submit_queue,
|
||||
submit, fence,
|
||||
msm_submit_sp_flush_execute,
|
||||
msm_submit_sp_flush_cleanup,
|
||||
0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
should_defer(struct fd_submit *submit)
|
||||
{
|
||||
|
|
@ -402,6 +469,7 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
|
|||
struct fd_submit_fence *out_fence)
|
||||
{
|
||||
struct fd_device *dev = submit->pipe->dev;
|
||||
struct msm_pipe *msm_pipe = to_msm_pipe(submit->pipe);
|
||||
|
||||
/* Acquire lock before flush_prep() because it is possible to race between
|
||||
* this and pipe->flush():
|
||||
|
|
@ -420,15 +488,16 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
|
|||
list_inithead(&dev->deferred_submits);
|
||||
dev->deferred_cmds = 0;
|
||||
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
flush_submit_list(&submit_list);
|
||||
simple_mtx_lock(&dev->submit_lock);
|
||||
enqueue_submit_list(&submit_list);
|
||||
}
|
||||
|
||||
list_addtail(&fd_submit_ref(submit)->node, &dev->deferred_submits);
|
||||
|
||||
bool has_shared = msm_submit_sp_flush_prep(submit, in_fence_fd, out_fence);
|
||||
|
||||
assert(fd_fence_before(msm_pipe->last_enqueue_fence, submit->fence));
|
||||
msm_pipe->last_enqueue_fence = submit->fence;
|
||||
|
||||
/* If we don't need an out-fence, we can defer the submit.
|
||||
*
|
||||
* TODO we could defer submits with in-fence as well.. if we took our own
|
||||
|
|
@ -436,6 +505,7 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
|
|||
* deferred submits
|
||||
*/
|
||||
if ((in_fence_fd == -1) && !out_fence && !has_shared && should_defer(submit)) {
|
||||
DEBUG_MSG("defer: %u", submit->fence);
|
||||
dev->deferred_cmds += fd_ringbuffer_cmd_count(submit->primary);
|
||||
assert(dev->deferred_cmds == fd_dev_count_deferred_cmds(dev));
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
|
@ -451,19 +521,24 @@ msm_submit_sp_flush(struct fd_submit *submit, int in_fence_fd,
|
|||
|
||||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
return flush_submit_list(&submit_list);
|
||||
return enqueue_submit_list(&submit_list);
|
||||
}
|
||||
|
||||
void
|
||||
msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
|
||||
{
|
||||
struct msm_pipe *msm_pipe = to_msm_pipe(pipe);
|
||||
struct fd_device *dev = pipe->dev;
|
||||
struct list_head submit_list;
|
||||
|
||||
DEBUG_MSG("flush: %u", fence);
|
||||
|
||||
list_inithead(&submit_list);
|
||||
|
||||
simple_mtx_lock(&dev->submit_lock);
|
||||
|
||||
assert(!fd_fence_after(fence, msm_pipe->last_enqueue_fence));
|
||||
|
||||
foreach_submit_safe (deferred_submit, &dev->deferred_submits) {
|
||||
/* We should never have submits from multiple pipes in the deferred
|
||||
* list. If we did, we couldn't compare their fence to our fence,
|
||||
|
|
@ -485,9 +560,20 @@ msm_pipe_sp_flush(struct fd_pipe *pipe, uint32_t fence)
|
|||
simple_mtx_unlock(&dev->submit_lock);
|
||||
|
||||
if (list_is_empty(&submit_list))
|
||||
return;
|
||||
goto flush_sync;
|
||||
|
||||
flush_submit_list(&submit_list);
|
||||
enqueue_submit_list(&submit_list);
|
||||
|
||||
flush_sync:
|
||||
/* Once we are sure that we've enqueued at least up to the requested
|
||||
* submit, we need to be sure that submitq has caught up and flushed
|
||||
* them to the kernel
|
||||
*/
|
||||
pthread_mutex_lock(&flush_mtx);
|
||||
while (fd_fence_before(msm_pipe->last_submit_fence, fence)) {
|
||||
pthread_cond_wait(&flush_cnd, &flush_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&flush_mtx);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
|||
|
|
@ -59,6 +59,16 @@ fence_flush(struct pipe_context *pctx, struct pipe_fence_handle *fence,
|
|||
}
|
||||
}
|
||||
|
||||
/* If after the pre-created unflushed fence is flushed, we end up
|
||||
* re-populated to a previous last_fence, then *that* is the one
|
||||
* whose submit_fence.ready we want to wait on:
|
||||
*/
|
||||
if (fence->last_fence) {
|
||||
return fence_flush(pctx, fence->last_fence, timeout);
|
||||
}
|
||||
|
||||
util_queue_fence_wait(&fence->submit_fence.ready);
|
||||
|
||||
/* We've already waited for batch to be flushed and fence->batch
|
||||
* to be cleared:
|
||||
*/
|
||||
|
|
@ -69,6 +79,8 @@ fence_flush(struct pipe_context *pctx, struct pipe_fence_handle *fence,
|
|||
if (fence->batch)
|
||||
fd_batch_flush(fence->batch);
|
||||
|
||||
util_queue_fence_wait(&fence->submit_fence.ready);
|
||||
|
||||
debug_assert(!fence->batch);
|
||||
|
||||
return true;
|
||||
|
|
@ -81,19 +93,37 @@ fd_fence_repopulate(struct pipe_fence_handle *fence, struct pipe_fence_handle *l
|
|||
* might have been)
|
||||
*/
|
||||
assert(!fence->submit_fence.use_fence_fd);
|
||||
assert(!last_fence->batch);
|
||||
|
||||
fence->submit_fence.fence = last_fence->submit_fence.fence;
|
||||
|
||||
fd_fence_ref(&fence->last_fence, last_fence);
|
||||
|
||||
/* We have nothing to flush, so nothing will clear the batch reference
|
||||
* (which is normally done when the batch is flushed), so do it now:
|
||||
*/
|
||||
fd_fence_set_batch(fence, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_fence_destroy(struct pipe_fence_handle *fence)
|
||||
{
|
||||
fd_fence_ref(&fence->last_fence, NULL);
|
||||
|
||||
tc_unflushed_batch_token_reference(&fence->tc_token, NULL);
|
||||
if (fence->submit_fence.use_fence_fd)
|
||||
close(fence->submit_fence.fence_fd);
|
||||
if (fence->syncobj)
|
||||
drmSyncobjDestroy(fd_device_fd(fence->screen->dev), fence->syncobj);
|
||||
fd_pipe_del(fence->pipe);
|
||||
|
||||
/* TODO might be worth trying harder to avoid a potential stall here,
|
||||
* but that would require the submit somehow holding a reference to
|
||||
* the pipe_fence_handle.. and I'm not sure if it is a thing that is
|
||||
* likely to matter much.
|
||||
*/
|
||||
util_queue_fence_wait(&fence->submit_fence.ready);
|
||||
|
||||
FREE(fence);
|
||||
}
|
||||
|
||||
|
|
@ -113,6 +143,9 @@ fd_fence_finish(struct pipe_screen *pscreen, struct pipe_context *pctx,
|
|||
if (!fence_flush(pctx, fence, timeout))
|
||||
return false;
|
||||
|
||||
if (fence->last_fence)
|
||||
fence = fence->last_fence;
|
||||
|
||||
if (fence->submit_fence.use_fence_fd) {
|
||||
int ret = sync_wait(fence->submit_fence.fence_fd, timeout / 1000000);
|
||||
return ret == 0;
|
||||
|
|
@ -136,9 +169,10 @@ fence_create(struct fd_context *ctx, struct fd_batch *batch, int fence_fd,
|
|||
|
||||
pipe_reference_init(&fence->reference, 1);
|
||||
util_queue_fence_init(&fence->ready);
|
||||
util_queue_fence_init(&fence->submit_fence.ready);
|
||||
|
||||
fence->ctx = ctx;
|
||||
fence->batch = batch;
|
||||
fd_fence_set_batch(fence, batch);
|
||||
fence->pipe = fd_pipe_ref(ctx->pipe);
|
||||
fence->screen = ctx->screen;
|
||||
fence->submit_fence.fence_fd = fence_fd;
|
||||
|
|
@ -237,6 +271,7 @@ fd_fence_set_batch(struct pipe_fence_handle *fence, struct fd_batch *batch)
|
|||
if (batch) {
|
||||
assert(!fence->batch);
|
||||
fence->batch = batch;
|
||||
batch->needs_flush = true;
|
||||
} else {
|
||||
fence->batch = NULL;
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,12 @@
|
|||
struct pipe_fence_handle {
|
||||
struct pipe_reference reference;
|
||||
|
||||
/* When a pre-created unflushed fence has no actual rendering to flush, and
|
||||
* the last_fence optimization is used, this will be a reference to the
|
||||
* *actualy* fence which needs to be flushed before waiting.
|
||||
*/
|
||||
struct pipe_fence_handle *last_fence;
|
||||
|
||||
/* fence holds a weak reference to the batch until the batch is flushed, to
|
||||
* accommodate PIPE_FLUSH_DEFERRED. When the batch is actually flushed, it
|
||||
* is cleared (before the batch reference is dropped). If we need to wait
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue