aco/ngg: Incorporate GS invocations into workgroup size calculation.

If the workgroup_size variable is lower than the actual workgroup size,
that means it's possible that ACO won't emit some s_barrier instructions
when in fact it should. This can possibly cause a GPU hang.

This is just for the sake of general correctness, currently this
can't cause a real problem because the maximum vertex count is always
greater than (or equal to) the primitive count in GS, and already
takes into account the number of GS invocations.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7232>
This commit is contained in:
Timur Kristóf 2020-10-15 10:33:18 +02:00
parent 09b9e52c0d
commit f74ef15879

View file

@ -1129,15 +1129,16 @@ setup_isel_context(Program* program,
program->workgroup_size = ctx.tcs_num_patches * MAX2(shaders[1]->info.tess.tcs_vertices_out, ctx.args->options->key.tcs.input_vertices);
} else if (program->stage.hw == HWStage::NGG) {
gfx10_ngg_info &ngg_info = args->shader_info->ngg_info;
unsigned num_gs_invocations = (program->stage.has(SWStage::GS)) ? MAX2(shaders[1]->info.gs.invocations, 1) : 1;
/* Max ES (SW VS) threads */
/* Max ES (SW VS/TES) threads */
uint32_t max_esverts = ngg_info.hw_max_esverts;
/* Max GS input primitives = max GS threads */
uint32_t max_gs_input_prims = ngg_info.max_gsprims;
uint32_t max_gs_input_prims = ngg_info.max_gsprims * num_gs_invocations;
/* Maximum output vertices -- each thread can export only 1 vertex */
uint32_t max_out_vtx = ngg_info.max_out_verts;
/* Maximum output primitives -- each thread can export only 1 or 0 primitive */
uint32_t max_out_prm = ngg_info.max_gsprims * ngg_info.prim_amp_factor;
uint32_t max_out_prm = ngg_info.max_gsprims * num_gs_invocations * ngg_info.prim_amp_factor;
program->workgroup_size = MAX4(max_esverts, max_gs_input_prims, max_out_vtx, max_out_prm);
} else {