From 676d4ddcf83a62973aad8062a34c7c838bfc8a4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 18 Nov 2021 18:36:37 -0500 Subject: [PATCH] radeonsi: centralize wave size computation in si_get_shader_wave_size The big comment was not really true. The other debug options are unused right now, but will be used again in the future. Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_pipe.c | 28 ------------------- src/gallium/drivers/radeonsi/si_pipe.h | 38 ++++++++++---------------- 2 files changed, 14 insertions(+), 52 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index f24c635808b..bd7d3f27c74 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1374,34 +1374,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, } sscreen->ngg_subgroup_size = 128; - sscreen->ge_wave_size = 64; - sscreen->ps_wave_size = 64; - sscreen->compute_wave_size = 64; - - if (sscreen->info.chip_class >= GFX10) { - /* Pixel shaders: Wave64 is always fastest. - * Vertex shaders: Wave64 is probably better, because: - * - greater chance of L0 cache hits, because more threads are assigned - * to the same CU - * - scalar instructions are only executed once for 64 threads instead of twice - * - VGPR allocation granularity is half of Wave32, so 1 Wave64 can - * sometimes use fewer VGPRs than 2 Wave32 - * - TessMark X64 with NGG culling is faster with Wave64 - */ - if (sscreen->debug_flags & DBG(W32_GE)) - sscreen->ge_wave_size = 32; - if (sscreen->debug_flags & DBG(W32_PS)) - sscreen->ps_wave_size = 32; - if (sscreen->debug_flags & DBG(W32_CS)) - sscreen->compute_wave_size = 32; - - if (sscreen->debug_flags & DBG(W64_GE)) - sscreen->ge_wave_size = 64; - if (sscreen->debug_flags & DBG(W64_PS)) - sscreen->ps_wave_size = 64; - if (sscreen->debug_flags & DBG(W64_CS)) - sscreen->compute_wave_size = 64; - } /* Create the auxiliary context. This must be done last. */ sscreen->aux_context = si_create_context( diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index f58a48d4b48..a7fdfccc086 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -686,9 +686,6 @@ struct si_screen { * We want to minimize the impact on multithreaded Mesa. */ struct ac_llvm_compiler compiler_lowp[10]; - unsigned compute_wave_size; - unsigned ps_wave_size; - unsigned ge_wave_size; unsigned ngg_subgroup_size; struct util_idalloc_mt buffer_ids; @@ -1980,35 +1977,28 @@ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sc radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage); } -static inline unsigned si_get_wave_size(struct si_screen *sscreen, - gl_shader_stage stage, bool ngg, bool es) -{ - if (stage == MESA_SHADER_COMPUTE) - return sscreen->compute_wave_size; - else if (stage == MESA_SHADER_FRAGMENT) - return sscreen->ps_wave_size; - else if ((stage == MESA_SHADER_VERTEX && es && !ngg) || - (stage == MESA_SHADER_TESS_EVAL && es && !ngg) || - (stage == MESA_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */ - return 64; - else - return sscreen->ge_wave_size; -} - static inline unsigned si_get_shader_wave_size(struct si_screen *sscreen, struct si_shader *shader) { /* There are a few uses that pass shader=NULL here, expecting the default compute wave size. */ struct si_shader_info *info = shader ? &shader->selector->info : NULL; gl_shader_stage stage = info ? info->stage : MESA_SHADER_COMPUTE; - if (shader && shader->is_gs_copy_shader) - return shader->selector->screen->ge_wave_size; + if (sscreen->info.chip_class < GFX10) + return 64; - if (stage <= MESA_SHADER_GEOMETRY) { - return si_get_wave_size(sscreen, stage, shader->key.ge.as_ngg, shader->key.ge.as_es); - } + /* Legacy GS only supports Wave64. */ + if ((stage == MESA_SHADER_VERTEX && shader->key.ge.as_es && !shader->key.ge.as_ngg) || + (stage == MESA_SHADER_TESS_EVAL && shader->key.ge.as_es && !shader->key.ge.as_ngg) || + (stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)) + return 64; - return si_get_wave_size(sscreen, stage, false, false); + if (stage == MESA_SHADER_COMPUTE) + return sscreen->debug_flags & DBG(W32_CS) ? 32 : 64; + + if (stage == MESA_SHADER_FRAGMENT) + return sscreen->debug_flags & DBG(W32_PS) ? 32 : 64; + + return sscreen->debug_flags & DBG(W32_GE) ? 32 : 64; } static inline void si_select_draw_vbo(struct si_context *sctx)