diff --git a/.pick_status.json b/.pick_status.json index f26e0ca48e0..33d415ef8b6 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -3739,7 +3739,7 @@ "description": "radeonsi: fix applying the NGG minimum vertex count requirement", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "4ecc39e1aa1568f19ebf54a99ffe14643bac7d15" }, diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index c9fdceef605..2eb278fbb79 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -2003,6 +2003,8 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) max_esverts = MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size); max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); + /* Hardware restriction: minimum value of max_esverts */ + max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim); max_gsprims = align(max_gsprims, wavesize); max_gsprims = MIN2(max_gsprims, max_gsprims_base); @@ -2012,10 +2014,13 @@ void gfx10_ngg_calculate_subgroup_info(struct si_shader *shader) clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, use_adjacency); assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims); - } - /* Hardware restriction: minimum value of max_esverts */ - max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim); + /* Verify the restriction. */ + assert(max_esverts >= 23 + max_verts_per_prim); + } else { + /* Hardware restriction: minimum value of max_esverts */ + max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim); + } unsigned max_out_vertices = max_vert_out_per_gs_instance