diff --git a/.pick_status.json b/.pick_status.json index 3732985f2a8..9a41e89c8ad 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -31,7 +31,7 @@ "description": "aco: Use Navi 10 empty NGG output workaround on NGG culling shaders.", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index d5a48264ff8..fa0bf6b65e2 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11553,8 +11553,11 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) Builder bld(ctx->program, ctx->block); Temp prm_cnt_0; - if (ctx->program->chip_class == GFX10 && ctx->stage.has(SWStage::GS)) { - /* Navi 1x workaround: make sure to always export at least 1 vertex and triangle */ + if (ctx->program->chip_class == GFX10 && + (ctx->stage.has(SWStage::GS) || ctx->program->info->has_ngg_culling)) { + /* Navi 1x workaround: check whether the workgroup has no output. + * If so, change the number of exported vertices and primitives to 1. + */ prm_cnt_0 = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), prm_cnt, Operand::zero()); prm_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), prm_cnt, bld.scc(prm_cnt_0)); @@ -11568,11 +11571,12 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc), tmp, vtx_cnt); /* Request the SPI to allocate space for the primitives and vertices - * that will be exported by the threadgroup. */ + * that will be exported by the threadgroup. + */ bld.sopp(aco_opcode::s_sendmsg, bld.m0(tmp), -1, sendmsg_gs_alloc_req); if (prm_cnt_0.id()) { - /* Navi 1x workaround: export a triangle with NaN coordinates when GS has no output. + /* Navi 1x workaround: export a triangle with NaN coordinates when NGG has no output. * It can't have all-zero positions because that would render an undesired pixel with * conservative rasterization. */