From da924e2f0346935d791b45fbd71ffc0b80352915 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 12 Jan 2024 17:17:07 -0400 Subject: [PATCH] agx: fuse ballot+cmp Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/compiler/agx_optimizer.c | 32 ++++++++++++++++++++++ src/asahi/compiler/test/test-optimizer.cpp | 32 ++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/asahi/compiler/agx_optimizer.c b/src/asahi/compiler/agx_optimizer.c index 079f7161d90..dd7762fd070 100644 --- a/src/asahi/compiler/agx_optimizer.c +++ b/src/asahi/compiler/agx_optimizer.c @@ -345,6 +345,36 @@ agx_optimizer_cmpsel(agx_instr **defs, agx_instr *I) } } +/* + * Fuse conditions into ballots: + * + * ballot(cmp(x, y)) -> ballot_cmp(x, y) + */ +static void +agx_optimizer_ballot(agx_context *ctx, agx_instr **defs, agx_instr *I) +{ + agx_instr *def = defs[I->src[0].value]; + if (!def || (def->op != AGX_OPCODE_ICMP && def->op != AGX_OPCODE_FCMP)) + return; + + bool quad = I->op == AGX_OPCODE_QUAD_BALLOT; + assert(quad || I->op == AGX_OPCODE_BALLOT); + + /* Replace with a fused instruction since the # of sources changes */ + agx_builder b = agx_init_builder(ctx, agx_before_instr(I)); + + agx_instr *fused = agx_icmp_ballot_to( + &b, I->dest[0], def->src[0], def->src[1], def->icond, def->invert_cond); + + if (def->op == AGX_OPCODE_ICMP) { + fused->op = quad ? AGX_OPCODE_ICMP_QUAD_BALLOT : AGX_OPCODE_ICMP_BALLOT; + } else { + fused->op = quad ? AGX_OPCODE_FCMP_QUAD_BALLOT : AGX_OPCODE_FCMP_BALLOT; + } + + agx_remove_instruction(I); +} + static void agx_optimizer_forward(agx_context *ctx) { @@ -377,6 +407,8 @@ agx_optimizer_forward(agx_context *ctx) agx_optimizer_if_cmp(defs, I); else if (I->op == AGX_OPCODE_ICMPSEL) agx_optimizer_cmpsel(defs, I); + else if (I->op == AGX_OPCODE_BALLOT || I->op == AGX_OPCODE_QUAD_BALLOT) + agx_optimizer_ballot(ctx, defs, I); } free(defs); diff --git a/src/asahi/compiler/test/test-optimizer.cpp b/src/asahi/compiler/test/test-optimizer.cpp index 35ea97f9718..bddcc744da1 100644 --- a/src/asahi/compiler/test/test-optimizer.cpp +++ b/src/asahi/compiler/test/test-optimizer.cpp @@ -4,6 +4,7 @@ */ #include "agx_builder.h" +#include "agx_compiler.h" #include "agx_test.h" #include @@ -218,6 +219,37 @@ TEST_F(Optimizer, NoConversionsOn16BitALU) NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy))); } +TEST_F(Optimizer, BallotCondition) +{ + CASE32(agx_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)), + agx_icmp_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true)); + + CASE32(agx_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GE, false)), + agx_fcmp_ballot_to(b, out, wx, wy, AGX_FCOND_GE, false)); + + CASE32(agx_quad_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)), + agx_icmp_quad_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true)); + + CASE32(agx_quad_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GT, false)), + agx_fcmp_quad_ballot_to(b, out, wx, wy, AGX_FCOND_GT, false)); +} + +TEST_F(Optimizer, BallotMultipleUses) +{ + CASE32( + { + agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false); + agx_index ballot = agx_quad_ballot(b, cmp); + agx_fadd_to(b, out, cmp, ballot); + }, + { + agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false); + agx_index ballot = + agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false); + agx_fadd_to(b, out, cmp, ballot); + }); +} + TEST_F(Optimizer, IfCondition) { CASE_NO_RETURN(agx_if_icmp(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),