mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-02 10:08:08 +02:00
radeonsi: increase NGG workgroup size to 256 for VS/TES with streamout and GS
NGG streamout performance is limited by the workgroup size, so make it as large as possible. Since this uses si_get_max_workgroup_size() to set the NGG workgroup size, the side effect is that all GS is also getting an increase to 256, which is OK. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21403>
This commit is contained in:
parent
43fd552872
commit
461ccb00e1
5 changed files with 10 additions and 9 deletions
|
|
@ -117,8 +117,9 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
|
|||
gs_sel->screen->info.gfx_level >= GFX11 ? 3 : /* gfx11 requires at least 1 primitive per TG */
|
||||
gs_sel->screen->info.gfx_level >= GFX10_3 ? 29 : (24 - 1 + max_verts_per_prim);
|
||||
bool max_vert_out_per_gs_instance = false;
|
||||
unsigned max_gsprims_base = gs_sel->screen->ngg_subgroup_size; /* default prim group size clamp */
|
||||
unsigned max_esverts_base = gs_sel->screen->ngg_subgroup_size;
|
||||
unsigned max_gsprims_base, max_esverts_base;
|
||||
|
||||
max_gsprims_base = max_esverts_base = si_get_max_workgroup_size(shader);
|
||||
|
||||
if (gs_stage == MESA_SHADER_GEOMETRY) {
|
||||
bool force_multi_cycling = false;
|
||||
|
|
|
|||
|
|
@ -1413,8 +1413,6 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||
}
|
||||
}
|
||||
|
||||
sscreen->ngg_subgroup_size = 128;
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX11) {
|
||||
unsigned attr_ring_size = sscreen->info.attribute_ring_size_per_se * sscreen->info.max_se;
|
||||
sscreen->attribute_ring = si_aligned_buffer_create(&sscreen->b,
|
||||
|
|
|
|||
|
|
@ -694,8 +694,6 @@ struct si_screen {
|
|||
* We want to minimize the impact on multithreaded Mesa. */
|
||||
struct ac_llvm_compiler compiler_lowp[10];
|
||||
|
||||
unsigned ngg_subgroup_size;
|
||||
|
||||
struct util_idalloc_mt buffer_ids;
|
||||
struct util_vertex_state_cache vertex_state_cache;
|
||||
|
||||
|
|
|
|||
|
|
@ -206,7 +206,11 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
|
|||
switch (shader->selector->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
return shader->key.ge.as_ngg ? shader->selector->screen->ngg_subgroup_size : 0;
|
||||
/* Use the largest workgroup size for streamout */
|
||||
if (shader->key.ge.as_ngg)
|
||||
return si_shader_uses_streamout(shader) ? 256 : 128;
|
||||
else
|
||||
return 0;
|
||||
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
/* Return this so that LLVM doesn't remove s_barrier
|
||||
|
|
@ -214,7 +218,7 @@ unsigned si_get_max_workgroup_size(const struct si_shader *shader)
|
|||
return shader->selector->screen->info.gfx_level >= GFX7 ? 128 : 0;
|
||||
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
/* ngg_subgroup_size is only the input size. GS can always generate up to 256 vertices. */
|
||||
/* GS can always generate up to 256 vertices. */
|
||||
return shader->selector->screen->info.gfx_level >= GFX9 ? 256 : 0;
|
||||
|
||||
case MESA_SHADER_COMPUTE:
|
||||
|
|
|
|||
|
|
@ -1058,7 +1058,7 @@ static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader)
|
|||
shader->selector->info.writes_edgeflag;
|
||||
}
|
||||
|
||||
static inline bool si_shader_uses_streamout(struct si_shader *shader)
|
||||
static inline bool si_shader_uses_streamout(const struct si_shader *shader)
|
||||
{
|
||||
return shader->selector->stage <= MESA_SHADER_GEOMETRY &&
|
||||
shader->selector->info.enabled_streamout_buffer_mask &&
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue