diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp index f218d278fe2..0ce0324865e 100644 --- a/src/amd/compiler/aco_instruction_selection_setup.cpp +++ b/src/amd/compiler/aco_instruction_selection_setup.cpp @@ -1129,15 +1129,16 @@ setup_isel_context(Program* program, program->workgroup_size = ctx.tcs_num_patches * MAX2(shaders[1]->info.tess.tcs_vertices_out, ctx.args->options->key.tcs.input_vertices); } else if (program->stage.hw == HWStage::NGG) { gfx10_ngg_info &ngg_info = args->shader_info->ngg_info; + unsigned num_gs_invocations = (program->stage.has(SWStage::GS)) ? MAX2(shaders[1]->info.gs.invocations, 1) : 1; - /* Max ES (SW VS) threads */ + /* Max ES (SW VS/TES) threads */ uint32_t max_esverts = ngg_info.hw_max_esverts; /* Max GS input primitives = max GS threads */ - uint32_t max_gs_input_prims = ngg_info.max_gsprims; + uint32_t max_gs_input_prims = ngg_info.max_gsprims * num_gs_invocations; /* Maximum output vertices -- each thread can export only 1 vertex */ uint32_t max_out_vtx = ngg_info.max_out_verts; /* Maximum output primitives -- each thread can export only 1 or 0 primitive */ - uint32_t max_out_prm = ngg_info.max_gsprims * ngg_info.prim_amp_factor; + uint32_t max_out_prm = ngg_info.max_gsprims * num_gs_invocations * ngg_info.prim_amp_factor; program->workgroup_size = MAX4(max_esverts, max_gs_input_prims, max_out_vtx, max_out_prm); } else {