ac: add a bug workaround for the 100% NGG culling case

Fixes: 8db00a51f8 - radeonsi/gfx10: implement NGG culling for 4x wave32 subgroups
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4079>
(cherry picked from commit fc65df5651)
This commit is contained in:
Marek Olšák 2020-03-03 19:01:17 -05:00 committed by Eric Engestrom
parent 0f437dd261
commit c53788cd41
2 changed files with 34 additions and 1 deletions

View file

@ -211,7 +211,7 @@
"description": "ac: add a bug workaround for the 100% NGG culling case",
"nominated": true,
"nomination_type": 1,
"resolution": 0,
"resolution": 1,
"master_sha": null,
"because_sha": "8db00a51f85109e958631ef74a458b0614f37097"
},

View file

@ -4784,6 +4784,21 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav
{
LLVMBuilderRef builder = ctx->builder;
LLVMValueRef tmp;
bool export_dummy_prim = false;
/* HW workaround for a GPU hang with 100% culling.
* We always have to export at least 1 primitive.
* Export a degenerate triangle using vertex 0 for all 3 vertices.
*/
if (prim_cnt == ctx->i32_0 &&
(ctx->family == CHIP_NAVI10 ||
ctx->family == CHIP_NAVI12 ||
ctx->family == CHIP_NAVI14)) {
assert(vtx_cnt == ctx->i32_0);
prim_cnt = ctx->i32_1;
vtx_cnt = ctx->i32_1;
export_dummy_prim = true;
}
ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020);
@ -4791,6 +4806,24 @@ void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wav
tmp = LLVMBuildOr(builder, tmp, vtx_cnt, "");
ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp);
if (export_dummy_prim) {
struct ac_ngg_prim prim = {};
/* The vertex indices are 0,0,0. */
prim.passthrough = ctx->i32_0;
struct ac_export_args pos = {};
pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = ctx->f32_0;
pos.target = V_008DFC_SQ_EXP_POS;
pos.enabled_channels = 0xf;
pos.done = true;
ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx),
ctx->i32_0, ""), 5021);
ac_build_export_prim(ctx, &prim);
ac_build_export(ctx, &pos);
ac_build_endif(ctx, 5021);
}
ac_build_endif(ctx, 5020);
}