agx: fuse ballot+cmp

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27616>
This commit is contained in:
Alyssa Rosenzweig 2024-01-12 17:17:07 -04:00 committed by Marge Bot
parent def00f7bf0
commit da924e2f03
2 changed files with 64 additions and 0 deletions

View file

@ -345,6 +345,36 @@ agx_optimizer_cmpsel(agx_instr **defs, agx_instr *I)
}
}
/*
* Fuse conditions into ballots:
*
* ballot(cmp(x, y)) -> ballot_cmp(x, y)
*/
static void
agx_optimizer_ballot(agx_context *ctx, agx_instr **defs, agx_instr *I)
{
agx_instr *def = defs[I->src[0].value];
if (!def || (def->op != AGX_OPCODE_ICMP && def->op != AGX_OPCODE_FCMP))
return;
bool quad = I->op == AGX_OPCODE_QUAD_BALLOT;
assert(quad || I->op == AGX_OPCODE_BALLOT);
/* Replace with a fused instruction since the # of sources changes */
agx_builder b = agx_init_builder(ctx, agx_before_instr(I));
agx_instr *fused = agx_icmp_ballot_to(
&b, I->dest[0], def->src[0], def->src[1], def->icond, def->invert_cond);
if (def->op == AGX_OPCODE_ICMP) {
fused->op = quad ? AGX_OPCODE_ICMP_QUAD_BALLOT : AGX_OPCODE_ICMP_BALLOT;
} else {
fused->op = quad ? AGX_OPCODE_FCMP_QUAD_BALLOT : AGX_OPCODE_FCMP_BALLOT;
}
agx_remove_instruction(I);
}
static void
agx_optimizer_forward(agx_context *ctx)
{
@ -377,6 +407,8 @@ agx_optimizer_forward(agx_context *ctx)
agx_optimizer_if_cmp(defs, I);
else if (I->op == AGX_OPCODE_ICMPSEL)
agx_optimizer_cmpsel(defs, I);
else if (I->op == AGX_OPCODE_BALLOT || I->op == AGX_OPCODE_QUAD_BALLOT)
agx_optimizer_ballot(ctx, defs, I);
}
free(defs);

View file

@ -4,6 +4,7 @@
*/
#include "agx_builder.h"
#include "agx_compiler.h"
#include "agx_test.h"
#include <gtest/gtest.h>
@ -218,6 +219,37 @@ TEST_F(Optimizer, NoConversionsOn16BitALU)
NEGCASE32(agx_fmov_to(b, out, agx_fadd(b, hx, hy)));
}
TEST_F(Optimizer, BallotCondition)
{
CASE32(agx_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
agx_icmp_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
CASE32(agx_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GE, false)),
agx_fcmp_ballot_to(b, out, wx, wy, AGX_FCOND_GE, false));
CASE32(agx_quad_ballot_to(b, out, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true)),
agx_icmp_quad_ballot_to(b, out, wx, wy, AGX_ICOND_UEQ, true));
CASE32(agx_quad_ballot_to(b, out, agx_fcmp(b, wx, wy, AGX_FCOND_GT, false)),
agx_fcmp_quad_ballot_to(b, out, wx, wy, AGX_FCOND_GT, false));
}
TEST_F(Optimizer, BallotMultipleUses)
{
CASE32(
{
agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
agx_index ballot = agx_quad_ballot(b, cmp);
agx_fadd_to(b, out, cmp, ballot);
},
{
agx_index cmp = agx_fcmp(b, wx, wy, AGX_FCOND_GT, false);
agx_index ballot =
agx_fcmp_quad_ballot(b, wx, wy, AGX_FCOND_GT, false);
agx_fadd_to(b, out, cmp, ballot);
});
}
TEST_F(Optimizer, IfCondition)
{
CASE_NO_RETURN(agx_if_icmp(b, agx_icmp(b, wx, wy, AGX_ICOND_UEQ, true),