diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index 09590878e5e..ca88581215b 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -1113,9 +1113,15 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out } ac_build_endif(&ctx->ac, 16009); + /* If all vertices are culled, set the primitive count to 0, so that all waves are culled here. */ + LLVMValueRef num_primitives = ngg_get_prim_cnt(ctx); + num_primitives = LLVMBuildSelect(builder, + LLVMBuildICmp(builder, LLVMIntEQ, new_num_es_threads, + ctx->ac.i32_0, ""), + ctx->ac.i32_0, num_primitives, ""); /* Kill waves that have inactive threads. */ kill_wave = LLVMBuildICmp(builder, LLVMIntULE, - ac_build_imax(&ctx->ac, new_num_es_threads, ngg_get_prim_cnt(ctx)), + ac_build_imax(&ctx->ac, new_num_es_threads, num_primitives), LLVMBuildMul(builder, get_wave_id_in_tg(ctx), LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, 0), ""), "");