radeonsi: centralize wave size computation in si_get_shader_wave_size

The big comment was not really true.

The other debug options are unused right now, but will be used again
in the future.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13878>
This commit is contained in:
Marek Olšák 2021-11-18 18:36:37 -05:00 committed by Marge Bot
parent b5665bd46c
commit 676d4ddcf8
2 changed files with 14 additions and 52 deletions

View file

@ -1374,34 +1374,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
}
sscreen->ngg_subgroup_size = 128;
sscreen->ge_wave_size = 64;
sscreen->ps_wave_size = 64;
sscreen->compute_wave_size = 64;
if (sscreen->info.chip_class >= GFX10) {
/* Pixel shaders: Wave64 is always fastest.
* Vertex shaders: Wave64 is probably better, because:
* - greater chance of L0 cache hits, because more threads are assigned
* to the same CU
* - scalar instructions are only executed once for 64 threads instead of twice
* - VGPR allocation granularity is half of Wave32, so 1 Wave64 can
* sometimes use fewer VGPRs than 2 Wave32
* - TessMark X64 with NGG culling is faster with Wave64
*/
if (sscreen->debug_flags & DBG(W32_GE))
sscreen->ge_wave_size = 32;
if (sscreen->debug_flags & DBG(W32_PS))
sscreen->ps_wave_size = 32;
if (sscreen->debug_flags & DBG(W32_CS))
sscreen->compute_wave_size = 32;
if (sscreen->debug_flags & DBG(W64_GE))
sscreen->ge_wave_size = 64;
if (sscreen->debug_flags & DBG(W64_PS))
sscreen->ps_wave_size = 64;
if (sscreen->debug_flags & DBG(W64_CS))
sscreen->compute_wave_size = 64;
}
/* Create the auxiliary context. This must be done last. */
sscreen->aux_context = si_create_context(

View file

@ -686,9 +686,6 @@ struct si_screen {
* We want to minimize the impact on multithreaded Mesa. */
struct ac_llvm_compiler compiler_lowp[10];
unsigned compute_wave_size;
unsigned ps_wave_size;
unsigned ge_wave_size;
unsigned ngg_subgroup_size;
struct util_idalloc_mt buffer_ids;
@ -1980,35 +1977,28 @@ static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sc
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage);
}
static inline unsigned si_get_wave_size(struct si_screen *sscreen,
gl_shader_stage stage, bool ngg, bool es)
{
if (stage == MESA_SHADER_COMPUTE)
return sscreen->compute_wave_size;
else if (stage == MESA_SHADER_FRAGMENT)
return sscreen->ps_wave_size;
else if ((stage == MESA_SHADER_VERTEX && es && !ngg) ||
(stage == MESA_SHADER_TESS_EVAL && es && !ngg) ||
(stage == MESA_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */
return 64;
else
return sscreen->ge_wave_size;
}
static inline unsigned si_get_shader_wave_size(struct si_screen *sscreen, struct si_shader *shader)
{
/* There are a few uses that pass shader=NULL here, expecting the default compute wave size. */
struct si_shader_info *info = shader ? &shader->selector->info : NULL;
gl_shader_stage stage = info ? info->stage : MESA_SHADER_COMPUTE;
if (shader && shader->is_gs_copy_shader)
return shader->selector->screen->ge_wave_size;
if (sscreen->info.chip_class < GFX10)
return 64;
if (stage <= MESA_SHADER_GEOMETRY) {
return si_get_wave_size(sscreen, stage, shader->key.ge.as_ngg, shader->key.ge.as_es);
}
/* Legacy GS only supports Wave64. */
if ((stage == MESA_SHADER_VERTEX && shader->key.ge.as_es && !shader->key.ge.as_ngg) ||
(stage == MESA_SHADER_TESS_EVAL && shader->key.ge.as_es && !shader->key.ge.as_ngg) ||
(stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg))
return 64;
return si_get_wave_size(sscreen, stage, false, false);
if (stage == MESA_SHADER_COMPUTE)
return sscreen->debug_flags & DBG(W32_CS) ? 32 : 64;
if (stage == MESA_SHADER_FRAGMENT)
return sscreen->debug_flags & DBG(W32_PS) ? 32 : 64;
return sscreen->debug_flags & DBG(W32_GE) ? 32 : 64;
}
static inline void si_select_draw_vbo(struct si_context *sctx)