diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 32ddca431b0..c1cc2d917c5 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2592,12 +2592,70 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil } } +void +optimize_encoding_sopk(Program* program, ra_ctx& ctx, RegisterFile& register_file, + aco_ptr& instr) +{ + /* try to optimize sop2 with literal source to sopk */ + if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_mul_i32 && + instr->opcode != aco_opcode::s_cselect_b32) + return; + + uint32_t literal_idx = 0; + + if (instr->opcode != aco_opcode::s_cselect_b32 && instr->operands[1].isLiteral()) + literal_idx = 1; + + if (!instr->operands[!literal_idx].isTemp() || + !instr->operands[!literal_idx].isKillBeforeDef() || + instr->operands[!literal_idx].getTemp().type() != RegType::sgpr || + instr->operands[!literal_idx].physReg() >= 128) + return; + + if (!instr->operands[literal_idx].isLiteral()) + return; + + const uint32_t i16_mask = 0xffff8000u; + uint32_t value = instr->operands[literal_idx].constantValue(); + if ((value & i16_mask) && (value & i16_mask) != i16_mask) + return; + + unsigned def_id = instr->definitions[0].tempId(); + if (ctx.assignments[def_id].affinity) { + assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity]; + if (affinity.assigned && affinity.reg != instr->operands[!literal_idx].physReg() && + !register_file.test(affinity.reg, instr->operands[!literal_idx].bytes())) + return; + } + + static_assert(sizeof(SOPK_instruction) <= sizeof(SOP2_instruction), + "Invalid direct instruction cast."); + instr->format = Format::SOPK; + SOPK_instruction* instr_sopk = &instr->sopk(); + + instr_sopk->imm = instr_sopk->operands[literal_idx].constantValue() & 0xffff; + if (literal_idx == 0) + std::swap(instr_sopk->operands[0], instr_sopk->operands[1]); + if (instr_sopk->operands.size() > 2) + std::swap(instr_sopk->operands[1], instr_sopk->operands[2]); + instr_sopk->operands.pop_back(); + + switch (instr_sopk->opcode) { + case aco_opcode::s_add_i32: instr_sopk->opcode = aco_opcode::s_addk_i32; break; + case aco_opcode::s_mul_i32: instr_sopk->opcode = aco_opcode::s_mulk_i32; break; + case aco_opcode::s_cselect_b32: instr_sopk->opcode = aco_opcode::s_cmovk_i32; break; + default: unreachable("illegal instruction"); + } +} + void optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file, aco_ptr& instr) { if (instr->isVALU()) optimize_encoding_vop2(program, ctx, register_file, instr); + if (instr->isSALU()) + optimize_encoding_sopk(program, ctx, register_file, instr); } } /* end namespace */ @@ -2744,7 +2802,8 @@ register_allocation(Program* program, std::vector& live_out_per_block, ra instr->operands[2].regClass() == v1); instr->definitions[0].setFixed(instr->operands[2].physReg()); } else if (instr->opcode == aco_opcode::s_addk_i32 || - instr->opcode == aco_opcode::s_mulk_i32) { + instr->opcode == aco_opcode::s_mulk_i32 || + instr->opcode == aco_opcode::s_cmovk_i32) { assert(instr->definitions[0].bytes() == instr->operands[0].bytes()); instr->definitions[0].setFixed(instr->operands[0].physReg()); } else if (instr->isMUBUF() && instr->definitions.size() == 1 &&