From bc95d55e1f4ce54ff086352d5e52dc733d85118d Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 23 Apr 2021 14:29:22 +0100 Subject: [PATCH] aco/ra: fix get_reg_for_operand() with no free registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (Sienna Cichlid): Totals from 195 (0.13% of 149839) affected shaders: CodeSize: 2352160 -> 2356720 (+0.19%); split: -0.00%, +0.20% Instrs: 431976 -> 433124 (+0.27%); split: -0.00%, +0.27% Latency: 10174434 -> 10174897 (+0.00%); split: -0.00%, +0.00% InvThroughput: 4044388 -> 4044425 (+0.00%); split: -0.00%, +0.00% Copies: 67634 -> 68762 (+1.67%); split: -0.00%, +1.67% fossil-db (Polaris): Totals from 186 (0.12% of 151365) affected shaders: CodeSize: 2272356 -> 2276848 (+0.20%); split: -0.00%, +0.20% Instrs: 432390 -> 433513 (+0.26%); split: -0.00%, +0.26% Latency: 13153394 -> 13160194 (+0.05%); split: -0.00%, +0.05% InvThroughput: 10889509 -> 10889967 (+0.00%); split: -0.00%, +0.00% SClause: 12745 -> 12747 (+0.02%) Copies: 74832 -> 75945 (+1.49%); split: -0.01%, +1.50% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 10 ++++++++-- src/amd/compiler/tests/test_regalloc.cpp | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index d7e64f88a89..0eb52ecb33c 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1728,10 +1728,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file, aco_ptr& instr, Operand& operand, unsigned operand_index) { /* check if the operand is fixed */ + PhysReg src = ctx.assignments[operand.tempId()].reg; PhysReg dst; bool blocking_var = false; if (operand.isFixed()) { - assert(operand.physReg() != ctx.assignments[operand.tempId()].reg); + assert(operand.physReg() != src); /* check if target reg is blocked, and move away the blocking var */ if (register_file[operand.physReg()]) { @@ -1741,6 +1742,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file, Operand pc_op = Operand(Temp{blocking_id, rc}); pc_op.setFixed(operand.physReg()); + /* make space in the register file for get_reg() and then block the target reg */ + register_file.clear(src, operand.regClass()); + register_file.clear(pc_op.physReg(), rc); + register_file.block(operand.physReg(), operand.regClass()); + /* find free reg */ PhysReg reg = get_reg(ctx, register_file, pc_op.getTemp(), parallelcopy, ctx.pseudo_dummy); update_renames(ctx, register_file, parallelcopy, ctx.pseudo_dummy, true); @@ -1756,7 +1762,7 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file, } Operand pc_op = operand; - pc_op.setFixed(ctx.assignments[operand.tempId()].reg); + pc_op.setFixed(src); Definition pc_def = Definition(dst, pc_op.regClass()); parallelcopy.emplace_back(pc_op, pc_def); update_renames(ctx, register_file, parallelcopy, instr, true); diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp index 0bf0a237118..2f1565d1c37 100644 --- a/src/amd/compiler/tests/test_regalloc.cpp +++ b/src/amd/compiler/tests/test_regalloc.cpp @@ -77,3 +77,22 @@ BEGIN_TEST(regalloc.32bit_partial_write) finish_ra_test(ra_test_policy()); END_TEST + +BEGIN_TEST(regalloc.precolor.swap) + //>> s2: %op0:s[0-1] = p_startpgm + if (!setup_cs("s2", GFX10)) + return; + + program->dev.sgpr_limit = 4; + + //! s2: %op1:s[2-3] = p_unit_test + Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2)); + + //! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1] + //! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1] + Operand op(inputs[0]); + op.setFixed(PhysReg(2)); + bld.pseudo(aco_opcode::p_unit_test, op, op1); + + finish_ra_test(ra_test_policy()); +END_TEST