diff --git a/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp b/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp index 410fe4ec1b6..5ee55f1fb1d 100644 --- a/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp +++ b/src/panfrost/bifrost/valhall/test/test-insert-flow.cpp @@ -104,12 +104,13 @@ TEST_F(InsertFlow, TilebufferWait7) { }); } -TEST_F(InsertFlow, AtestWait6) { +TEST_F(InsertFlow, AtestWait6AndWait0After) { CASE(FRAGMENT, { flow(DISCARD); bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); flow(WAIT0126); bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5)); + flow(WAIT0); flow(END); }); } diff --git a/src/panfrost/bifrost/valhall/va_insert_flow.c b/src/panfrost/bifrost/valhall/va_insert_flow.c index 9d666821e1b..0a2e886f518 100644 --- a/src/panfrost/bifrost/valhall/va_insert_flow.c +++ b/src/panfrost/bifrost/valhall/va_insert_flow.c @@ -429,6 +429,10 @@ va_insert_flow_control_nops(bi_context *ctx) /* Insert waits for tilebuffer and depth/stencil instructions. These * only happen in regular fragment shaders, as the required waits are * assumed to already have happened in blend shaders. + * + * For discarded thread handling, ATEST must be serialized against all + * other asynchronous instructions and should be serialized against all + * instructions. Wait for slot 0 immediately after the ATEST. */ case BI_OPCODE_BLEND: case BI_OPCODE_LD_TILE: @@ -437,6 +441,9 @@ va_insert_flow_control_nops(bi_context *ctx) bi_flow(ctx, bi_before_instr(I), VA_FLOW_WAIT); break; case BI_OPCODE_ATEST: + bi_flow(ctx, bi_before_instr(I), VA_FLOW_WAIT0126); + bi_flow(ctx, bi_after_instr(I), VA_FLOW_WAIT0); + break; case BI_OPCODE_ZS_EMIT: if (!ctx->inputs->is_blend) bi_flow(ctx, bi_before_instr(I), VA_FLOW_WAIT0126);