diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 1e4f765d3dd..ecce23e8bf0 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -5280,11 +5280,6 @@ apply_literals(opt_ctx& ctx, aco_ptr& instr) if (instr->isSOPC() && ctx.program->gfx_level < GFX12) try_convert_sopc_to_sopk(instr); - /* allow more s_addk_i32 optimizations if carry isn't used */ - if (instr->opcode == aco_opcode::s_add_u32 && ctx.uses[instr->definitions[1].tempId()] == 0 && - (instr->operands[0].isLiteral() || instr->operands[1].isLiteral())) - instr->opcode = aco_opcode::s_add_i32; - if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || instr->opcode == aco_opcode::v_fma_mix_f32) opt_fma_mix_acc(ctx, instr); diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 06a24507239..dcedb16b311 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2613,8 +2613,11 @@ vop3_can_use_vop2acc(ra_ctx& ctx, Instruction* instr) bool sop2_can_use_sopk(ra_ctx& ctx, Instruction* instr) { - if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_mul_i32 && - instr->opcode != aco_opcode::s_cselect_b32) + if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_add_u32 && + instr->opcode != aco_opcode::s_mul_i32 && instr->opcode != aco_opcode::s_cselect_b32) + return false; + + if (instr->opcode == aco_opcode::s_add_u32 && !instr->definitions[1].isKill()) return false; uint32_t literal_idx = 0; @@ -2878,6 +2881,7 @@ optimize_encoding_sopk(ra_ctx& ctx, RegisterFile& register_file, aco_ptroperands.pop_back(); switch (instr->opcode) { + case aco_opcode::s_add_u32: case aco_opcode::s_add_i32: instr->opcode = aco_opcode::s_addk_i32; break; case aco_opcode::s_mul_i32: instr->opcode = aco_opcode::s_mulk_i32; break; case aco_opcode::s_cselect_b32: instr->opcode = aco_opcode::s_cmovk_i32; break;