From 3fb1a64918d61d8d96158730a354b4201f1f7b34 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 6 Jun 2024 11:40:33 +0200 Subject: [PATCH] aco: move s_add_u32 -> s_addk_i32 optimization fully to ra MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Having this in one place is better. When I wrote the old I wasn't aware that checking the kill flag on definitions is the same as checking zero uses. Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 5 ----- src/amd/compiler/aco_register_allocation.cpp | 8 ++++++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 1e4f765d3dd..ecce23e8bf0 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -5280,11 +5280,6 @@ apply_literals(opt_ctx& ctx, aco_ptr& instr) if (instr->isSOPC() && ctx.program->gfx_level < GFX12) try_convert_sopc_to_sopk(instr); - /* allow more s_addk_i32 optimizations if carry isn't used */ - if (instr->opcode == aco_opcode::s_add_u32 && ctx.uses[instr->definitions[1].tempId()] == 0 && - (instr->operands[0].isLiteral() || instr->operands[1].isLiteral())) - instr->opcode = aco_opcode::s_add_i32; - if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || instr->opcode == aco_opcode::v_fma_mix_f32) opt_fma_mix_acc(ctx, instr); diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 06a24507239..dcedb16b311 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2613,8 +2613,11 @@ vop3_can_use_vop2acc(ra_ctx& ctx, Instruction* instr) bool sop2_can_use_sopk(ra_ctx& ctx, Instruction* instr) { - if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_mul_i32 && - instr->opcode != aco_opcode::s_cselect_b32) + if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_add_u32 && + instr->opcode != aco_opcode::s_mul_i32 && instr->opcode != aco_opcode::s_cselect_b32) + return false; + + if (instr->opcode == aco_opcode::s_add_u32 && !instr->definitions[1].isKill()) return false; uint32_t literal_idx = 0; @@ -2878,6 +2881,7 @@ optimize_encoding_sopk(ra_ctx& ctx, RegisterFile& register_file, aco_ptroperands.pop_back(); switch (instr->opcode) { + case aco_opcode::s_add_u32: case aco_opcode::s_add_i32: instr->opcode = aco_opcode::s_addk_i32; break; case aco_opcode::s_mul_i32: instr->opcode = aco_opcode::s_mulk_i32; break; case aco_opcode::s_cselect_b32: instr->opcode = aco_opcode::s_cmovk_i32; break;