radeonsi: always use async compiles when creating shader/compute states

With Gallium threaded contexts, creating shader/compute states is effectively a screen operation, so we should not use context state. In particular, this allows us to avoid using the context's LLVM TargetMachine. This isn't an issue yet because u_threaded_context filters out non-async debug callbacks, and we disable threaded contexts for debug contexts. However, we may want to change that in the future. Reviewed-by: Marek Olšák <marek.olsak@amd.com>
2026-02-26 22:10:37 +01:00 · 2017-10-22 17:38:43 +02:00 · 2017-10-22 17:38:43 +02:00 · f76a6cb337
commit f76a6cb337
parent b650fc09c3
2 changed files with 50 additions and 34 deletions
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@ -23,6 +23,7 @@
 */

 #include "tgsi/tgsi_parse.h"
+#include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"

@ -84,14 +85,10 @@ static void si_create_compute_state_async(void *job, int thread_index)
 	LLVMTargetMachineRef tm;
 	struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;

-	if (thread_index >= 0) {
-		assert(thread_index < ARRAY_SIZE(program->screen->tm));
-		tm = program->screen->tm[thread_index];
-		if (!debug->async)
-			debug = NULL;
-	} else {
-		tm = program->compiler_ctx_state.tm;
-	}
+	assert(!debug->debug_message || debug->async);
+	assert(thread_index >= 0);
+	assert(thread_index < ARRAY_SIZE(program->screen->tm));
+	tm = program->screen->tm[thread_index];

 	memset(&sel, 0, sizeof(sel));

@ -167,20 +164,31 @@ static void *si_create_compute_state(
 			return NULL;
 		}

-		program->compiler_ctx_state.tm = sctx->tm;
 		program->compiler_ctx_state.debug = sctx->debug;
 		program->compiler_ctx_state.is_debug_context = sctx->is_debug;
 		p_atomic_inc(&sscreen->b.num_shaders_created);
 		util_queue_fence_init(&program->ready);

-		if ((sctx->debug.debug_message && !sctx->debug.async) ||
-		    sctx->is_debug ||
-		    si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE))
-			si_create_compute_state_async(program, -1);
-		else
-			util_queue_add_job(&sscreen->shader_compiler_queue,
-					   program, &program->ready,
-					   si_create_compute_state_async, NULL);
+		struct util_async_debug_callback async_debug;
+		bool wait =
+			(sctx->debug.debug_message && !sctx->debug.async) ||
+			sctx->is_debug ||
+			si_can_dump_shader(&sscreen->b, PIPE_SHADER_COMPUTE);
+
+		if (wait) {
+			u_async_debug_init(&async_debug);
+			program->compiler_ctx_state.debug = async_debug.base;
+		}
+
+		util_queue_add_job(&sscreen->shader_compiler_queue,
+				   program, &program->ready,
+				   si_create_compute_state_async, NULL);
+
+		if (wait) {
+			util_queue_fence_wait(&program->ready);
+			u_async_debug_drain(&async_debug, &sctx->debug);
+			u_async_debug_cleanup(&async_debug);
+		}
 	} else {
 		const struct pipe_llvm_program_header *header;
 		const char *code;
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@ -30,6 +30,7 @@
 #include "tgsi/tgsi_ureg.h"
 #include "util/hash_table.h"
 #include "util/crc32.h"
+#include "util/u_async_debug.h"
 #include "util/u_memory.h"
 #include "util/u_prim.h"

@ -1840,14 +1841,10 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 	struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
 	unsigned i;

-	if (thread_index >= 0) {
-		assert(thread_index < ARRAY_SIZE(sscreen->tm));
-		tm = sscreen->tm[thread_index];
-		if (!debug->async)
-			debug = NULL;
-	} else {
-		tm = sel->compiler_ctx_state.tm;
-	}
+	assert(!debug->debug_message || debug->async);
+	assert(thread_index >= 0);
+	assert(thread_index < ARRAY_SIZE(sscreen->tm));
+	tm = sscreen->tm[thread_index];

 	/* Compile the main shader part for use with a prolog and/or epilog.
 	 * If this fails, the driver will try to compile a monolithic shader
@ -2042,7 +2039,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,

 	pipe_reference_init(&sel->reference, 1);
 	sel->screen = sscreen;
-	sel->compiler_ctx_state.tm = sctx->tm;
 	sel->compiler_ctx_state.debug = sctx->debug;
 	sel->compiler_ctx_state.is_debug_context = sctx->is_debug;

@ -2272,14 +2268,26 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 	(void) mtx_init(&sel->mutex, mtx_plain);
 	util_queue_fence_init(&sel->ready);

-	if ((sctx->debug.debug_message && !sctx->debug.async) ||
-	    sctx->is_debug ||
-	    si_can_dump_shader(&sscreen->b, sel->info.processor))
-		si_init_shader_selector_async(sel, -1);
-	else
-		util_queue_add_job(&sscreen->shader_compiler_queue, sel,
-                                   &sel->ready, si_init_shader_selector_async,
-                                   NULL);
+	struct util_async_debug_callback async_debug;
+	bool wait =
+		(sctx->debug.debug_message && !sctx->debug.async) ||
+		sctx->is_debug ||
+		si_can_dump_shader(&sscreen->b, sel->info.processor);
+
+	if (wait) {
+		u_async_debug_init(&async_debug);
+		sel->compiler_ctx_state.debug = async_debug.base;
+	}
+
+	util_queue_add_job(&sscreen->shader_compiler_queue, sel,
+			   &sel->ready, si_init_shader_selector_async,
+			   NULL);
+
+	if (wait) {
+		util_queue_fence_wait(&sel->ready);
+		u_async_debug_drain(&async_debug, &sctx->debug);
+		u_async_debug_cleanup(&async_debug);
+	}

 	return sel;
 }