From 84dde64fd33d8a42a107641beebba1477d5be95d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Mon, 2 Aug 2021 16:48:41 +0200 Subject: [PATCH] aco: Use Navi 10 empty NGG output workaround on NGG culling shaders. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Navi 10 can hang when an NGG workgroup has no output, so we work around that by always exporting a single zero-area triangle with a single vertex that has all-NaN coordinates. Thus far, we only employed this for NGG GS, because on all other stages, the output can't be empty. However, with NGG culling, the output can be empty, so let's apply the same workaround there too. Cc: mesa-stable Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: (cherry picked from commit 448592b9aeb471772bd696fd44e4f952b8f492b6) --- .pick_status.json | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 3732985f2a8..9a41e89c8ad 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -31,7 +31,7 @@ "description": "aco: Use Navi 10 empty NGG output workaround on NGG culling shaders.", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index d5a48264ff8..fa0bf6b65e2 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11553,8 +11553,11 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) Builder bld(ctx->program, ctx->block); Temp prm_cnt_0; - if (ctx->program->chip_class == GFX10 && ctx->stage.has(SWStage::GS)) { - /* Navi 1x workaround: make sure to always export at least 1 vertex and triangle */ + if (ctx->program->chip_class == GFX10 && + (ctx->stage.has(SWStage::GS) || ctx->program->info->has_ngg_culling)) { + /* Navi 1x workaround: check whether the workgroup has no output. + * If so, change the number of exported vertices and primitives to 1. + */ prm_cnt_0 = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), prm_cnt, Operand::zero()); prm_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), prm_cnt, bld.scc(prm_cnt_0)); @@ -11568,11 +11571,12 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc), tmp, vtx_cnt); /* Request the SPI to allocate space for the primitives and vertices - * that will be exported by the threadgroup. */ + * that will be exported by the threadgroup. + */ bld.sopp(aco_opcode::s_sendmsg, bld.m0(tmp), -1, sendmsg_gs_alloc_req); if (prm_cnt_0.id()) { - /* Navi 1x workaround: export a triangle with NaN coordinates when GS has no output. + /* Navi 1x workaround: export a triangle with NaN coordinates when NGG has no output. * It can't have all-zero positions because that would render an undesired pixel with * conservative rasterization. */