From f309d76aab3ec56d5b84fb54ffb1fc9d6472da97 Mon Sep 17 00:00:00 2001 From: Natalie Vock Date: Thu, 27 Mar 2025 18:37:52 +0100 Subject: [PATCH] aco: Add support for multiple ops fixed to defs Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_ir.cpp | 16 +++++++------- src/amd/compiler/aco_ir.h | 2 +- src/amd/compiler/aco_live_var_analysis.cpp | 10 +++++++-- src/amd/compiler/aco_register_allocation.cpp | 22 +++++++++++--------- src/amd/compiler/aco_validate.cpp | 12 ++++++----- 5 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index f30f951510d..16fc05df6b0 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -1410,9 +1410,11 @@ should_form_clause(const Instruction* a, const Instruction* b) return false; } -int -get_op_fixed_to_def(Instruction* instr) +aco::small_vec +get_ops_fixed_to_def(Instruction* instr) { + aco::small_vec ops; + if (instr->opcode == aco_opcode::v_interp_p2_f32 || instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_mac_f16 || instr->opcode == aco_opcode::v_fmac_f16 || instr->opcode == aco_opcode::v_mac_legacy_f32 || @@ -1421,17 +1423,17 @@ get_op_fixed_to_def(Instruction* instr) instr->opcode == aco_opcode::v_writelane_b32_e64 || instr->opcode == aco_opcode::v_dot4c_i32_i8 || instr->opcode == aco_opcode::s_fmac_f32 || instr->opcode == aco_opcode::s_fmac_f16) { - return 2; + ops.push_back(2); } else if (instr->opcode == aco_opcode::s_addk_i32 || instr->opcode == aco_opcode::s_mulk_i32 || instr->opcode == aco_opcode::s_cmovk_i32) { - return 0; + ops.push_back(0); } else if (instr->isMUBUF() && instr->definitions.size() == 1 && instr->operands.size() == 4) { - return 3; + ops.push_back(3); } else if (instr->isMIMG() && instr->definitions.size() == 1 && !instr->operands[2].isUndefined()) { - return 2; + ops.push_back(2); } - return -1; + return ops; } uint8_t diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index a14fb760569..4b0ec7aab6f 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -2321,7 +2321,7 @@ void _aco_err(Program* program, const char* file, unsigned line, const char* fmt #define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__) -int get_op_fixed_to_def(Instruction* instr); +aco::small_vec get_ops_fixed_to_def(Instruction* instr); /* utilities for dealing with register demand */ RegisterDemand get_live_changes(Instruction* instr); diff --git a/src/amd/compiler/aco_live_var_analysis.cpp b/src/amd/compiler/aco_live_var_analysis.cpp index 94c431d59a9..85eb3173020 100644 --- a/src/amd/compiler/aco_live_var_analysis.cpp +++ b/src/amd/compiler/aco_live_var_analysis.cpp @@ -243,9 +243,15 @@ process_live_temps_per_block(live_ctx& ctx, Block* block) } /* Check if a definition clobbers some operand */ - int op_idx = get_op_fixed_to_def(insn); - if (op_idx != -1) + auto fixed_ops = get_ops_fixed_to_def(insn); + for (auto op_idx : fixed_ops) { + assert(std::none_of(fixed_ops.begin(), fixed_ops.end(), + [&](uint32_t i) { + return i != op_idx && insn->operands[i].getTemp() == + insn->operands[op_idx].getTemp(); + })); insn->operands[op_idx].setClobbered(true); + } /* we need to do this in a separate loop because the next one can * setKill() for several operands at once and we don't want to diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index e3eee992895..70638fe6636 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -2104,9 +2104,12 @@ operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr& instr, unsign return false; FALLTHROUGH; case Format::SOP2: - case Format::SOP1: - return get_op_fixed_to_def(instr.get()) != (int)idx || + case Format::SOP1: { + auto fixed_ops = get_ops_fixed_to_def(instr.get()); + return std::all_of(fixed_ops.begin(), fixed_ops.end(), + [idx](auto op_idx) { return op_idx != idx; }) || is_sgpr_writable_without_side_effects(gfx_level, reg); + } default: // TODO: there are more instructions with restrictions on registers return true; @@ -2813,7 +2816,7 @@ get_affinities(ra_ctx& ctx) ctx.assignments[instr->operands[0].tempId()].m0 = true; } - int op_fixed_to_def0 = get_op_fixed_to_def(instr.get()); + auto ops_fixed_to_defs = get_ops_fixed_to_def(instr.get()); for (unsigned i = 0; i < instr->definitions.size(); i++) { const Definition& def = instr->definitions[i]; if (!def.isTemp()) @@ -2827,8 +2830,8 @@ get_affinities(ra_ctx& ctx) Operand op; if (instr->opcode == aco_opcode::p_parallelcopy) { op = instr->operands[i]; - } else if (i == 0 && op_fixed_to_def0 != -1) { - op = instr->operands[op_fixed_to_def0]; + } else if (i < ops_fixed_to_defs.size()) { + op = instr->operands[ops_fixed_to_defs[i]]; } else if (vop3_can_use_vop2acc(ctx, instr.get())) { op = instr->operands[2]; } else if (i == 0 && sop2_can_use_sopk(ctx, instr.get())) { @@ -3253,11 +3256,10 @@ register_allocation(Program* program, ra_test_policy policy) * We can't read from the old location because it's corrupted, and we can't write the new * location because that's used by a live-through operand. */ - int op_fixed_to_def = get_op_fixed_to_def(instr.get()); - if (op_fixed_to_def != -1) { - instr->definitions[0].setPrecolored(instr->operands[op_fixed_to_def].physReg()); - instr->operands[op_fixed_to_def].setPrecolored( - instr->operands[op_fixed_to_def].physReg()); + unsigned fixed_def_idx = 0; + for (auto op_idx : get_ops_fixed_to_def(instr.get())) { + instr->definitions[fixed_def_idx++].setPrecolored(instr->operands[op_idx].physReg()); + instr->operands[op_idx].setPrecolored(instr->operands[op_idx].physReg()); } /* handle fixed definitions first */ diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 720a97cc244..2cad50300a2 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -1472,11 +1472,13 @@ validate_ra(Program* program) assignments[def.tempId()].valid = true; } - int op_fixed_to_def = get_op_fixed_to_def(instr.get()); - if (op_fixed_to_def != -1 && - instr->definitions[0].physReg() != instr->operands[op_fixed_to_def].physReg()) { - err |= ra_fail(program, loc, Location(), - "Operand %d must have the same register as definition", op_fixed_to_def); + unsigned fixed_def_idx = 0; + for (auto op_idx : get_ops_fixed_to_def(instr.get())) { + if (instr->definitions[fixed_def_idx++].physReg() != + instr->operands[op_idx].physReg()) { + err |= ra_fail(program, loc, Location(), + "Operand %d must have the same register as definition", op_idx); + } } } }