From 4562f9f89402c20681f7480b99d82e9967b5de0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Fri, 11 Sep 2020 16:20:21 +0100 Subject: [PATCH] aco: optimize packed fneg Reviewed-by: Rhys Perry Part-of: (cherry picked from commit b03be30e07546b813acc1156407b964e68892beb) --- .pick_status.json | 2 +- src/amd/compiler/aco_optimizer.cpp | 38 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index 1fa8f5c86de..04d5f7e9b70 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -301,7 +301,7 @@ "description": "aco: optimize packed fneg", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 768037a5294..6ce2567a970 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2750,6 +2750,44 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) } } + /* check for fneg modifiers */ + if (instr_info.can_use_input_modifiers[(int)instr->opcode]) { + /* at this point, we only have 2-operand instructions */ + assert(instr->operands.size() == 2); + for (unsigned i = 0; i < 2; i++) { + Operand& op = instr->operands[i]; + if (!op.isTemp()) + continue; + + ssa_info& info = ctx.info[op.tempId()]; + if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 && + info.instr->operands[1].constantEquals(0xBC00)) { + Operand ops[2] = {instr->operands[!i], info.instr->operands[0]}; + if (!check_vop3_operands(ctx, 2, ops)) + continue; + + VOP3P_instruction* fneg = static_cast(info.instr); + if (fneg->clamp) + continue; + instr->operands[i] = fneg->operands[0]; + + /* opsel_lo/hi is either 0 or 1: + * if 0 - pick selection from fneg->lo + * if 1 - pick selection from fneg->hi + */ + bool opsel_lo = vop3p->opsel_lo & (1 << i); + bool opsel_hi = vop3p->opsel_hi & (1 << i); + vop3p->neg_lo[i] ^= true ^ (opsel_lo ? fneg->neg_hi[0] : fneg->neg_lo[0]); + vop3p->neg_hi[i] ^= true ^ (opsel_hi ? fneg->neg_hi[0] : fneg->neg_lo[0]); + vop3p->opsel_lo ^= ((opsel_lo ? ~fneg->opsel_hi : fneg->opsel_lo) & 1) << i; + vop3p->opsel_hi ^= ((opsel_hi ? ~fneg->opsel_hi : fneg->opsel_lo) & 1) << i; + + if (--ctx.uses[fneg->definitions[0].tempId()]) + ctx.uses[fneg->operands[0].tempId()]++; + } + } + } + if (instr->opcode == aco_opcode::v_pk_add_f16) { if (instr->definitions[0].isPrecise()) return;