radeonsi: allow using 64K LDS for NGG to allow larger workgroups

This should help with NGG streamout performance, which is limited by
the workgroup size (it should be as large as possible).

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21403>
This commit is contained in:
Marek Olšák 2023-02-25 17:41:39 -05:00 committed by Marge Bot
parent e01d505291
commit 43fd552872

View file

@ -106,10 +106,8 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
const unsigned max_verts_per_prim = u_vertices_per_prim(input_prim);
const unsigned min_verts_per_prim = gs_stage == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
/* All these are in dwords: */
/* GE can only use 8K dwords (32KB) of LDS per workgroup.
*/
const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader);
/* All these are in dwords. The maximum is 16K dwords (64KB) of LDS per workgroup. */
const unsigned max_lds_size = 16 * 1024 - gfx10_ngg_get_scratch_dw_size(shader);
const unsigned target_lds_size = max_lds_size;
unsigned esvert_lds_size = 0;
unsigned gsprim_lds_size = 0;