From cc62a75a17c51ce6b505bcfbd367ec9c8e293f59 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Fri, 7 Feb 2025 09:29:12 +0800 Subject: [PATCH] radeonsi,util: add more usage for AMD_FORCE_SHADER_USE_ACO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to change a bunch of shaders to use aco. Used to find problem shader when use aco quickly instead of one by one when too many shaders. Tested-by: Mike Lothian Reviewed-by: Marek Olšák Part-of: --- src/gallium/drivers/radeonsi/si_pipe.c | 85 ++++++++++++++++--- src/gallium/drivers/radeonsi/si_pipe.h | 5 +- src/gallium/drivers/radeonsi/si_shader_info.c | 9 +- src/util/mesa-blake3.c | 2 +- src/util/mesa-blake3.h | 1 + 5 files changed, 85 insertions(+), 17 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 73aaf1bac95..a0055b1eed4 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1075,6 +1075,7 @@ void si_destroy_screen(struct pipe_screen *pscreen) util_vertex_state_cache_deinit(&sscreen->vertex_state_cache); sscreen->ws->destroy(sscreen->ws); + FREE(sscreen->use_aco_shader_blakes); FREE(sscreen->nir_options); FREE(sscreen); } @@ -1159,6 +1160,79 @@ static bool si_is_parallel_shader_compilation_finished(struct pipe_screen *scree return util_queue_fence_is_signalled(&sel->ready); } +static void si_setup_force_shader_use_aco(struct si_screen *sscreen, bool support_aco) +{ + /* Usage: + * 1. shader type: vs|tcs|tes|gs|ps|cs, specify a class of shaders to use aco + * 2. shader blake: specify a single shader blake directly to use aco + * 3. filename: specify a file which contains shader blakes in lines + */ + + sscreen->use_aco_shader_type = MESA_SHADER_NONE; + + if (sscreen->use_aco || !support_aco) + return; + + const char *option = debug_get_option("AMD_FORCE_SHADER_USE_ACO", NULL); + if (!option) + return; + + if (!strcmp("vs", option)) { + sscreen->use_aco_shader_type = MESA_SHADER_VERTEX; + return; + } else if (!strcmp("tcs", option)) { + sscreen->use_aco_shader_type = MESA_SHADER_TESS_CTRL; + return; + } else if (!strcmp("tes", option)) { + sscreen->use_aco_shader_type = MESA_SHADER_TESS_EVAL; + return; + } else if (!strcmp("gs", option)) { + sscreen->use_aco_shader_type = MESA_SHADER_GEOMETRY; + return; + } else if (!strcmp("ps", option)) { + sscreen->use_aco_shader_type = MESA_SHADER_FRAGMENT; + return; + } else if (!strcmp("cs", option)) { + sscreen->use_aco_shader_type = MESA_SHADER_COMPUTE; + return; + } + + blake3_hash blake; + if (_mesa_blake3_from_printed_string(blake, option)) { + sscreen->use_aco_shader_blakes = MALLOC(sizeof(blake)); + memcpy(sscreen->use_aco_shader_blakes[0], blake, sizeof(blake)); + sscreen->num_use_aco_shader_blakes = 1; + return; + } + + FILE *f = fopen(option, "r"); + if (!f) { + fprintf(stderr, "radeonsi: invalid AMD_FORCE_SHADER_USE_ACO value\n"); + return; + } + + unsigned max_size = 16 * sizeof(blake3_hash); + sscreen->use_aco_shader_blakes = MALLOC(max_size); + + char line[1024]; + while (fgets(line, sizeof(line), f)) { + if (sscreen->num_use_aco_shader_blakes * sizeof(blake3_hash) >= max_size) { + sscreen->use_aco_shader_blakes = REALLOC( + sscreen->use_aco_shader_blakes, max_size, max_size * 2); + max_size *= 2; + } + + if (line[BLAKE3_PRINTED_LEN] == '\n') + line[BLAKE3_PRINTED_LEN] = 0; + + if (_mesa_blake3_from_printed_string( + sscreen->use_aco_shader_blakes[sscreen->num_use_aco_shader_blakes], line)) + sscreen->num_use_aco_shader_blakes++; + } + + fclose(f); +} + static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, const struct pipe_screen_config *config) { @@ -1226,16 +1300,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, return NULL; } - if (!sscreen->use_aco && support_aco) { - const char *shader_blake = debug_get_option("AMD_FORCE_SHADER_USE_ACO", NULL); - if (shader_blake) { - sscreen->force_shader_use_aco = - _mesa_blake3_from_printed_string(sscreen->use_aco_shader_blake, shader_blake); - - if (!sscreen->force_shader_use_aco) - fprintf(stderr, "radeonsi: invalid AMD_SHADER_FORCE_ACO value\n"); - } - } + si_setup_force_shader_use_aco(sscreen, support_aco); if ((sscreen->debug_flags & DBG(TMZ)) && !sscreen->info.has_tmz_support) { diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index e4c4aa2367c..967e207ea6d 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -534,8 +534,9 @@ struct si_screen { bool use_aco; /* Force a single shader to use ACO, debug usage. */ - bool force_shader_use_aco; - blake3_hash use_aco_shader_blake; + blake3_hash *use_aco_shader_blakes; + unsigned num_use_aco_shader_blakes; + enum pipe_shader_type use_aco_shader_type; struct { #define OPT_BOOL(name, dflt, description) bool name : 1; diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 78b0c50cc96..770d4aeb713 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -483,11 +483,12 @@ static void scan_instruction(const struct nir_shader *nir, struct si_shader_info void si_nir_scan_shader(struct si_screen *sscreen, struct nir_shader *nir, struct si_shader_info *info, bool colors_lowered) { - bool force_use_aco = false; - if (sscreen->force_shader_use_aco) { - if (!memcmp(sscreen->use_aco_shader_blake, nir->info.source_blake3, - sizeof(sscreen->use_aco_shader_blake))) { + bool force_use_aco = sscreen->use_aco_shader_type == nir->info.stage; + for (unsigned i = 0; i < sscreen->num_use_aco_shader_blakes; i++) { + if (!memcmp(sscreen->use_aco_shader_blakes[i], nir->info.source_blake3, + sizeof(blake3_hash))) { force_use_aco = true; + break; } } diff --git a/src/util/mesa-blake3.c b/src/util/mesa-blake3.c index 0f89f2da8a8..b5f34d83ae7 100644 --- a/src/util/mesa-blake3.c +++ b/src/util/mesa-blake3.c @@ -73,7 +73,7 @@ _mesa_blake3_print(FILE *f, const blake3_hash blake3) bool _mesa_blake3_from_printed_string(blake3_hash blake3, const char *printed) { - unsigned expected_len = BLAKE3_OUT_LEN32 * 12 - 2; + unsigned expected_len = BLAKE3_PRINTED_LEN; if (strlen(printed) != expected_len) return false; diff --git a/src/util/mesa-blake3.h b/src/util/mesa-blake3.h index 504cad8911f..8c2ae78911f 100644 --- a/src/util/mesa-blake3.h +++ b/src/util/mesa-blake3.h @@ -35,6 +35,7 @@ extern "C" { #define mesa_blake3 blake3_hasher #define BLAKE3_OUT_LEN32 (BLAKE3_OUT_LEN / 4) #define BLAKE3_HEX_LEN (2 * BLAKE3_OUT_LEN + 1) +#define BLAKE3_PRINTED_LEN (BLAKE3_OUT_LEN32 * 12 - 2) typedef uint8_t blake3_hash[BLAKE3_OUT_LEN];