diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index d7e64f88a89..0eb52ecb33c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1728,10 +1728,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file, aco_ptr& instr, Operand& operand, unsigned operand_index) { /* check if the operand is fixed */ + PhysReg src = ctx.assignments[operand.tempId()].reg; PhysReg dst; bool blocking_var = false; if (operand.isFixed()) { - assert(operand.physReg() != ctx.assignments[operand.tempId()].reg); + assert(operand.physReg() != src); /* check if target reg is blocked, and move away the blocking var */ if (register_file[operand.physReg()]) { @@ -1741,6 +1742,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file, Operand pc_op = Operand(Temp{blocking_id, rc}); pc_op.setFixed(operand.physReg()); + /* make space in the register file for get_reg() and then block the target reg */ + register_file.clear(src, operand.regClass()); + register_file.clear(pc_op.physReg(), rc); + register_file.block(operand.physReg(), operand.regClass()); + /* find free reg */ PhysReg reg = get_reg(ctx, register_file, pc_op.getTemp(), parallelcopy, ctx.pseudo_dummy); update_renames(ctx, register_file, parallelcopy, ctx.pseudo_dummy, true); @@ -1756,7 +1762,7 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file, } Operand pc_op = operand; - pc_op.setFixed(ctx.assignments[operand.tempId()].reg); + pc_op.setFixed(src); Definition pc_def = Definition(dst, pc_op.regClass()); parallelcopy.emplace_back(pc_op, pc_def); update_renames(ctx, register_file, parallelcopy, instr, true); diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp index 0bf0a237118..2f1565d1c37 100644 --- a/src/amd/compiler/tests/test_regalloc.cpp +++ b/src/amd/compiler/tests/test_regalloc.cpp @@ -77,3 +77,22 @@ BEGIN_TEST(regalloc.32bit_partial_write) finish_ra_test(ra_test_policy()); END_TEST + +BEGIN_TEST(regalloc.precolor.swap) + //>> s2: %op0:s[0-1] = p_startpgm + if (!setup_cs("s2", GFX10)) + return; + + program->dev.sgpr_limit = 4; + + //! s2: %op1:s[2-3] = p_unit_test + Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2)); + + //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1] + //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1] + Operand op(inputs[0]); + op.setFixed(PhysReg(2)); + bld.pseudo(aco_opcode::p_unit_test, op, op1); + + finish_ra_test(ra_test_policy()); +END_TEST