From 6cbd16daae5b6f258cca46b20db643c1e7d41391 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Tue, 20 Jan 2026 17:22:27 +0100 Subject: [PATCH] aco/optimizer: optimize pack(undef, f2f16_rtz(a)) for gfx8+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Do this late because the v_cvt_pkrtz_f16_f32 can be applied to its operand. Reviewed-by: Marek Olšák Acked-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index c73be9aebee..7cd08198eea 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -5306,6 +5306,44 @@ opt_split_cvt_pkrtz(opt_ctx& ctx, aco_ptr& instr) } } +/* After opt_split_cvt_pkrtz, convert + * p_create_vector(undef, v_cvt_pkrtz_f16_f32(a, ...)) to + * v_cvt_pkrtz_f16_f32(0, a) + */ +static void +opt_pack_undef_cvt_pkrtz(opt_ctx& ctx, aco_ptr& instr) +{ + if (instr->operands.size() != 2 || instr->definitions[0].regClass() != v1 || + !instr->operands[0].isUndefined() || !instr->operands[1].isTemp() || + instr->operands[0].bytes() != 2 || ctx.uses[instr->operands[1].tempId()] != 1) + return; + + Instruction* pkrtz_f16 = ctx.info[instr->operands[1].tempId()].parent_instr; + + if (pkrtz_f16->opcode != aco_opcode::v_cvt_pkrtz_f16_f32 && + pkrtz_f16->opcode != aco_opcode::v_cvt_pkrtz_f16_f32_e64) + return; + + if (pkrtz_f16->isSDWA() || pkrtz_f16->isDPP()) + return; + + if (pkrtz_f16->operands[1].isTemp()) { + decrease_and_dce(ctx, pkrtz_f16->operands[1].getTemp()); + pkrtz_f16->operands[1] = Operand::c32(0); + } + + pkrtz_f16->valu().swapOperands(0, 1); + if (!pkrtz_f16->operands[1].isOfType(RegType::vgpr)) + pkrtz_f16->format = asVOP3(pkrtz_f16->format); + + ctx.uses[pkrtz_f16->definitions[0].tempId()] = 0; + ctx.info[pkrtz_f16->definitions[0].tempId()].parent_instr = nullptr; + + pkrtz_f16->definitions[0].setTemp(instr->definitions[0].getTemp()); + ctx.info[pkrtz_f16->definitions[0].tempId()].parent_instr = pkrtz_f16; + instr.reset(); +} + static void opt_fma_mix_acc(opt_ctx& ctx, aco_ptr& instr) { @@ -5469,6 +5507,12 @@ apply_literals(opt_ctx& ctx, aco_ptr& instr) return; } + if (instr->opcode == aco_opcode::p_create_vector) { + opt_pack_undef_cvt_pkrtz(ctx, instr); + if (!instr) + return; + } + if (instr->opcode == aco_opcode::v_mul_f64 || instr->opcode == aco_opcode::v_mul_f64_e64) opt_neg_abs_fp64(ctx, instr);