From 9276b2f2f3223728683b3aa0e89b9e702028f083 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Thu, 7 Jul 2022 12:27:08 +0200 Subject: [PATCH] aco: fix packed 16bit fneg/fsat optimization Make sure that the Operand is '1.0.xx'. Fixes: b03be30e07546b813acc1156407b964e68892beb ('aco: optimize packed fneg') Reviewed-by: Rhys Perry Part-of: (cherry picked from commit 66d46a23fbc289b9492547697f0738f32ecb46c8) --- .pick_status.json | 2 +- src/amd/compiler/aco_optimizer.cpp | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 0954f025c5c..3d4896ac9c9 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -472,7 +472,7 @@ "description": "aco: fix packed 16bit fneg/fsat optimization", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": "b03be30e07546b813acc1156407b964e68892beb" }, diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index e2f540c94e3..76f8fc8aac1 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -3326,7 +3326,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr& instr) /* apply clamp */ if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) && - vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1) { + vop3p->clamp && instr->operands[0].isTemp() && ctx.uses[instr->operands[0].tempId()] == 1 && + !((vop3p->opsel_lo | vop3p->opsel_hi) & 2)) { ssa_info& info = ctx.info[instr->operands[0].tempId()]; if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) { @@ -3350,6 +3351,12 @@ combine_vop3p(opt_ctx& ctx, aco_ptr& instr) ssa_info& info = ctx.info[op.tempId()]; if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 && info.instr->operands[1].constantEquals(0x3C00)) { + + VOP3P_instruction* fneg = &info.instr->vop3p(); + + if ((fneg->opsel_lo | fneg->opsel_hi) & 2) + continue; + Operand ops[3]; for (unsigned j = 0; j < instr->operands.size(); j++) ops[j] = instr->operands[j]; @@ -3357,7 +3364,6 @@ combine_vop3p(opt_ctx& ctx, aco_ptr& instr) if (!check_vop3_operands(ctx, instr->operands.size(), ops)) continue; - VOP3P_instruction* fneg = &info.instr->vop3p(); if (fneg->clamp) continue; instr->operands[i] = fneg->operands[0];