aco: move s_add_u32 -> s_addk_i32 optimization fully to ra

Having this in one place is better.
When I wrote the old I wasn't aware that checking the kill flag on definitions
is the same as checking zero uses.

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29512>
This commit is contained in:
Georg Lehmann 2024-06-06 11:40:33 +02:00 committed by Marge Bot
parent 60f3f0fdbb
commit 3fb1a64918
2 changed files with 6 additions and 7 deletions

View file

@ -5280,11 +5280,6 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (instr->isSOPC() && ctx.program->gfx_level < GFX12)
try_convert_sopc_to_sopk(instr);
/* allow more s_addk_i32 optimizations if carry isn't used */
if (instr->opcode == aco_opcode::s_add_u32 && ctx.uses[instr->definitions[1].tempId()] == 0 &&
(instr->operands[0].isLiteral() || instr->operands[1].isLiteral()))
instr->opcode = aco_opcode::s_add_i32;
if (instr->opcode == aco_opcode::v_fma_mixlo_f16 || instr->opcode == aco_opcode::v_fma_mix_f32)
opt_fma_mix_acc(ctx, instr);

View file

@ -2613,8 +2613,11 @@ vop3_can_use_vop2acc(ra_ctx& ctx, Instruction* instr)
bool
sop2_can_use_sopk(ra_ctx& ctx, Instruction* instr)
{
if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_mul_i32 &&
instr->opcode != aco_opcode::s_cselect_b32)
if (instr->opcode != aco_opcode::s_add_i32 && instr->opcode != aco_opcode::s_add_u32 &&
instr->opcode != aco_opcode::s_mul_i32 && instr->opcode != aco_opcode::s_cselect_b32)
return false;
if (instr->opcode == aco_opcode::s_add_u32 && !instr->definitions[1].isKill())
return false;
uint32_t literal_idx = 0;
@ -2878,6 +2881,7 @@ optimize_encoding_sopk(ra_ctx& ctx, RegisterFile& register_file, aco_ptr<Instruc
instr->operands.pop_back();
switch (instr->opcode) {
case aco_opcode::s_add_u32:
case aco_opcode::s_add_i32: instr->opcode = aco_opcode::s_addk_i32; break;
case aco_opcode::s_mul_i32: instr->opcode = aco_opcode::s_mulk_i32; break;
case aco_opcode::s_cselect_b32: instr->opcode = aco_opcode::s_cmovk_i32; break;