From 23af6d3d35ee9f98031075bcd918c384b368efe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 30 Sep 2023 20:08:23 -0400 Subject: [PATCH] radeonsi: add another aux context for uploading shaders When the first auxiliary context is locked and wants to compile and upload a shader asynchronously, we need to use another auxiliary context in the compiler thread because the first one is locked at that point. This adds an array of auxiliary contexts into si_screen and changes how aux contexts are accessed. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- .../drivers/radeonsi/si_compute_blit.c | 4 +- src/gallium/drivers/radeonsi/si_debug.c | 2 +- src/gallium/drivers/radeonsi/si_pipe.c | 108 ++++++++++-------- src/gallium/drivers/radeonsi/si_pipe.h | 26 ++++- src/gallium/drivers/radeonsi/si_texture.c | 14 +-- 5 files changed, 93 insertions(+), 61 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index f1e56d6a047..5c5885e4409 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -497,10 +497,10 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst, void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst, uint64_t offset, uint64_t size, unsigned value, unsigned flags) { - struct si_context *ctx = si_get_aux_context(sscreen); + struct si_context *ctx = si_get_aux_context(&sscreen->aux_context.general); si_clear_buffer(ctx, dst, offset, size, &value, 4, flags, SI_COHERENCY_SHADER, SI_AUTO_SELECT_CLEAR_METHOD); - si_put_aux_context_flush(sscreen); + si_put_aux_context_flush(&sscreen->aux_context.general); } static void si_pipe_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c index e6d071a5061..98ea24673c2 100644 --- a/src/gallium/drivers/radeonsi/si_debug.c +++ b/src/gallium/drivers/radeonsi/si_debug.c @@ -456,7 +456,7 @@ void si_log_hw_flush(struct si_context *sctx) si_log_cs(sctx, sctx->log, true); - if (&sctx->b == sctx->screen->aux_context) { + if (sctx->context_flags & SI_CONTEXT_FLAG_AUX) { /* The aux context isn't captured by the ddebug wrapper, * so we dump it on a flush-by-flush basis here. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 6afa0dde8b0..cb7d2934be1 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -812,29 +812,38 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, unsign p_atomic_inc(&screen->num_contexts); /* Check if the aux_context needs to be recreated */ - struct si_context *saux = si_get_aux_context(sscreen); + for (unsigned i = 0; i < ARRAY_SIZE(sscreen->aux_contexts); i++) { + struct si_context *saux = si_get_aux_context(&sscreen->aux_contexts[i]); + enum pipe_reset_status status = + sctx->ws->ctx_query_reset_status(saux->ctx, true, NULL, NULL); - enum pipe_reset_status status = sctx->ws->ctx_query_reset_status( - saux->ctx, true, NULL, NULL); - if (status != PIPE_NO_RESET) { - /* We lost the aux_context, create a new one */ - struct u_log_context *aux_log = (saux)->log; - saux->b.set_log_context(&saux->b, NULL); - saux->b.destroy(&saux->b); + if (status != PIPE_NO_RESET) { + /* We lost the aux_context, create a new one */ + struct u_log_context *aux_log = saux->log; + saux->b.set_log_context(&saux->b, NULL); + saux->b.destroy(&saux->b); - saux = (struct si_context *)si_create_context( - &sscreen->b, SI_CONTEXT_FLAG_AUX | - (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | - (sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY)); - saux->b.set_log_context(&saux->b, aux_log); - sscreen->aux_context = saux; + saux = (struct si_context *)si_create_context( + &sscreen->b, SI_CONTEXT_FLAG_AUX | + (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | + (sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY)); + saux->b.set_log_context(&saux->b, aux_log); + + sscreen->aux_contexts[i].ctx = &saux->b; + } + si_put_aux_context_flush(&sscreen->aux_contexts[i]); } - si_put_aux_context_flush(sscreen); simple_mtx_lock(&sscreen->async_compute_context_lock); - if (status != PIPE_NO_RESET && sscreen->async_compute_context) { - sscreen->async_compute_context->destroy(sscreen->async_compute_context); - sscreen->async_compute_context = NULL; + if (sscreen->async_compute_context) { + struct si_context *compute_ctx = (struct si_context*)sscreen->async_compute_context; + enum pipe_reset_status status = + sctx->ws->ctx_query_reset_status(compute_ctx->ctx, true, NULL, NULL); + + if (status != PIPE_NO_RESET) { + sscreen->async_compute_context->destroy(sscreen->async_compute_context); + sscreen->async_compute_context = NULL; + } } simple_mtx_unlock(&sscreen->async_compute_context_lock); } @@ -948,8 +957,11 @@ static void si_destroy_screen(struct pipe_screen *pscreen) si_resource_reference(&sscreen->attribute_ring, NULL); - if (sscreen->aux_context) { - struct si_context *saux = si_get_aux_context(sscreen); + for (unsigned i = 0; i < ARRAY_SIZE(sscreen->aux_contexts); i++) { + if (!sscreen->aux_contexts[i].ctx) + continue; + + struct si_context *saux = si_get_aux_context(&sscreen->aux_contexts[i]); struct u_log_context *aux_log = saux->log; if (aux_log) { saux->b.set_log_context(&saux->b, NULL); @@ -958,9 +970,9 @@ static void si_destroy_screen(struct pipe_screen *pscreen) } saux->b.destroy(&saux->b); - mtx_unlock(&sscreen->aux_context_lock); + mtx_unlock(&sscreen->aux_contexts[i].lock); + mtx_destroy(&sscreen->aux_contexts[i].lock); } - mtx_destroy(&sscreen->aux_context_lock); simple_mtx_destroy(&sscreen->async_compute_context_lock); if (sscreen->async_compute_context) { @@ -1027,7 +1039,7 @@ static void si_init_gs_info(struct si_screen *sscreen) static void si_test_vmfault(struct si_screen *sscreen, uint64_t test_flags) { - struct pipe_context *ctx = sscreen->aux_context; + struct pipe_context *ctx = sscreen->aux_context.general.ctx; struct si_context *sctx = (struct si_context *)ctx; struct pipe_resource *buf = pipe_buffer_create_const0(&sscreen->b, 0, PIPE_USAGE_DEFAULT, 64); @@ -1231,7 +1243,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, 1 << util_logbase2(sscreen->force_aniso)); } - (void)mtx_init(&sscreen->aux_context_lock, mtx_plain | mtx_recursive); (void)simple_mtx_init(&sscreen->async_compute_context_lock, mtx_plain); (void)simple_mtx_init(&sscreen->gpu_load_mutex, mtx_plain); (void)simple_mtx_init(&sscreen->gds_mutex, mtx_plain); @@ -1433,17 +1444,23 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, } /* Create the auxiliary context. This must be done last. */ - sscreen->aux_context = si_create_context( - &sscreen->b, - SI_CONTEXT_FLAG_AUX | PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET | - (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | - (sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY)); + for (unsigned i = 0; i < ARRAY_SIZE(sscreen->aux_contexts); i++) { + (void)mtx_init(&sscreen->aux_contexts[i].lock, mtx_plain | mtx_recursive); - if (sscreen->options.aux_debug) { - struct u_log_context *log = CALLOC_STRUCT(u_log_context); - u_log_context_init(log); - si_get_aux_context(sscreen)->b.set_log_context(sscreen->aux_context, log); - si_put_aux_context_flush(sscreen); + sscreen->aux_contexts[i].ctx = + si_create_context(&sscreen->b, + SI_CONTEXT_FLAG_AUX | PIPE_CONTEXT_LOSE_CONTEXT_ON_RESET | + (sscreen->options.aux_debug ? PIPE_CONTEXT_DEBUG : 0) | + (sscreen->info.has_graphics ? 0 : PIPE_CONTEXT_COMPUTE_ONLY)); + + if (sscreen->options.aux_debug) { + struct u_log_context *log = CALLOC_STRUCT(u_log_context); + u_log_context_init(log); + + struct si_context *sctx = si_get_aux_context(&sscreen->aux_context.general); + sctx->b.set_log_context(&sctx->b, log); + si_put_aux_context_flush(&sscreen->aux_context.general); + } } if (test_flags & DBG(TEST_IMAGE_COPY)) @@ -1460,15 +1477,15 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, si_test_vmfault(sscreen, test_flags); if (test_flags & DBG(TEST_GDS)) - si_test_gds((struct si_context *)sscreen->aux_context); + si_test_gds((struct si_context *)sscreen->aux_context.general.ctx); if (test_flags & DBG(TEST_GDS_MM)) { - si_test_gds_memory_management((struct si_context *)sscreen->aux_context, 32 * 1024, 4, - RADEON_DOMAIN_GDS); + si_test_gds_memory_management((struct si_context *)sscreen->aux_context.general.ctx, + 32 * 1024, 4, RADEON_DOMAIN_GDS); } if (test_flags & DBG(TEST_GDS_OA_MM)) { - si_test_gds_memory_management((struct si_context *)sscreen->aux_context, 4, 1, - RADEON_DOMAIN_OA); + si_test_gds_memory_management((struct si_context *)sscreen->aux_context.general.ctx, + 4, 1, RADEON_DOMAIN_OA); } ac_print_nonshadowed_regs(sscreen->info.gfx_level, sscreen->info.family); @@ -1509,15 +1526,14 @@ struct pipe_screen *radeonsi_screen_create(int fd, const struct pipe_screen_conf return rw ? rw->screen : NULL; } -struct si_context* si_get_aux_context(struct si_screen *sscreen) +struct si_context *si_get_aux_context(struct si_aux_context *ctx) { - mtx_lock(&sscreen->aux_context_lock); - return (struct si_context*)sscreen->aux_context; + mtx_lock(&ctx->lock); + return (struct si_context*)ctx->ctx; } -void si_put_aux_context_flush(struct si_screen *sscreen) +void si_put_aux_context_flush(struct si_aux_context *ctx) { - struct pipe_context *c = &((struct si_context*)sscreen->aux_context)->b; - c->flush(c, NULL, 0); - mtx_unlock(&sscreen->aux_context_lock); + ctx->ctx->flush(ctx->ctx, NULL, 0); + mtx_unlock(&ctx->lock); } diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e89f025a4bc..43438591299 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -540,6 +540,11 @@ struct radeon_saved_cs { unsigned bo_count; }; +struct si_aux_context { + struct pipe_context *ctx; + mtx_t lock; +}; + struct si_screen { struct pipe_screen b; struct radeon_winsys *ws; @@ -591,9 +596,19 @@ struct si_screen { unsigned max_texel_buffer_elements; - /* Auxiliary context. Mainly used to initialize resources. */ - void *aux_context; - mtx_t aux_context_lock; + /* Auxiliary context. Used to initialize resources and upload shaders. */ + union { + struct { + struct si_aux_context general; + + /* Second auxiliary context for uploading shaders. When the first auxiliary context is + * locked and wants to compile and upload shaders, we need to use a second auxiliary + * context because the first one is locked. + */ + struct si_aux_context shader_upload; + } aux_context; + struct si_aux_context aux_contexts[2]; + }; /* Async compute context for DRI_PRIME copies. */ struct pipe_context *async_compute_context; @@ -1559,8 +1574,9 @@ void si_init_compute_functions(struct si_context *sctx); /* si_pipe.c */ bool si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler); void si_init_aux_async_compute_ctx(struct si_screen *sscreen); -struct si_context* si_get_aux_context(struct si_screen *sscreen); -void si_put_aux_context_flush(struct si_screen *sscreen); +struct si_context *si_get_aux_context(struct si_aux_context *ctx); +void si_put_aux_context_flush(struct si_aux_context *ctx); +void si_put_aux_shader_upload_context_flush(struct si_screen *sscreen); /* si_perfcounters.c */ void si_init_perfcounters(struct si_screen *screen); diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c index 7097594edbd..47a1ffdfb3e 100644 --- a/src/gallium/drivers/radeonsi/si_texture.c +++ b/src/gallium/drivers/radeonsi/si_texture.c @@ -700,7 +700,7 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex bool flush = false; ctx = threaded_context_unwrap_sync(ctx); - sctx = ctx ? (struct si_context *)ctx : si_get_aux_context(sscreen); + sctx = ctx ? (struct si_context *)ctx : si_get_aux_context(&sscreen->aux_context.general); if (resource->target != PIPE_BUFFER) { unsigned plane = whandle->plane; @@ -719,7 +719,7 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex */ if (resource->nr_samples > 1 || tex->is_depth) { if (!ctx) - si_put_aux_context_flush(sscreen); + si_put_aux_context_flush(&sscreen->aux_context.general); return false; } @@ -727,7 +727,7 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex if (plane) { if (!ctx) - si_put_aux_context_flush(sscreen); + si_put_aux_context_flush(&sscreen->aux_context.general); whandle->offset = ac_surface_get_plane_offset(sscreen->info.gfx_level, &tex->surface, plane, 0); whandle->stride = ac_surface_get_plane_stride(sscreen->info.gfx_level, @@ -809,7 +809,7 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex struct pipe_resource *newb = screen->resource_create(screen, &templ); if (!newb) { if (!ctx) - si_put_aux_context_flush(sscreen); + si_put_aux_context_flush(&sscreen->aux_context.general); return false; } @@ -847,7 +847,7 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex if (flush && ctx) sctx->b.flush(&sctx->b, NULL, 0); if (!ctx) - si_put_aux_context_flush(sscreen); + si_put_aux_context_flush(&sscreen->aux_context.general); whandle->stride = stride; whandle->offset = offset + slice_size * whandle->layer; @@ -1191,8 +1191,8 @@ static struct si_texture *si_texture_create_object(struct pipe_screen *screen, /* Execute the clears. */ if (num_clears) { - si_execute_clears(si_get_aux_context(sscreen), clears, num_clears, 0); - si_put_aux_context_flush(sscreen); + si_execute_clears(si_get_aux_context(&sscreen->aux_context.general), clears, num_clears, 0); + si_put_aux_context_flush(&sscreen->aux_context.general); } /* Initialize the CMASK base register value. */