aco/ra: fix get_reg_for_operand() with no free registers

fossil-db (Sienna Cichlid):
Totals from 195 (0.13% of 149839) affected shaders:
CodeSize: 2352160 -> 2356720 (+0.19%); split: -0.00%, +0.20%
Instrs: 431976 -> 433124 (+0.27%); split: -0.00%, +0.27%
Latency: 10174434 -> 10174897 (+0.00%); split: -0.00%, +0.00%
InvThroughput: 4044388 -> 4044425 (+0.00%); split: -0.00%, +0.00%
Copies: 67634 -> 68762 (+1.67%); split: -0.00%, +1.67%

fossil-db (Polaris):
Totals from 186 (0.12% of 151365) affected shaders:
CodeSize: 2272356 -> 2276848 (+0.20%); split: -0.00%, +0.20%
Instrs: 432390 -> 433513 (+0.26%); split: -0.00%, +0.26%
Latency: 13153394 -> 13160194 (+0.05%); split: -0.00%, +0.05%
InvThroughput: 10889509 -> 10889967 (+0.00%); split: -0.00%, +0.00%
SClause: 12745 -> 12747 (+0.02%)
Copies: 74832 -> 75945 (+1.49%); split: -0.01%, +1.50%

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10459>
This commit is contained in:
Rhys Perry 2021-04-23 14:29:22 +01:00 committed by Marge Bot
parent 4e459df0fc
commit bc95d55e1f
2 changed files with 27 additions and 2 deletions

View file

@ -1728,10 +1728,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index)
{
/* check if the operand is fixed */
PhysReg src = ctx.assignments[operand.tempId()].reg;
PhysReg dst;
bool blocking_var = false;
if (operand.isFixed()) {
assert(operand.physReg() != ctx.assignments[operand.tempId()].reg);
assert(operand.physReg() != src);
/* check if target reg is blocked, and move away the blocking var */
if (register_file[operand.physReg()]) {
@ -1741,6 +1742,11 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
Operand pc_op = Operand(Temp{blocking_id, rc});
pc_op.setFixed(operand.physReg());
/* make space in the register file for get_reg() and then block the target reg */
register_file.clear(src, operand.regClass());
register_file.clear(pc_op.physReg(), rc);
register_file.block(operand.physReg(), operand.regClass());
/* find free reg */
PhysReg reg = get_reg(ctx, register_file, pc_op.getTemp(), parallelcopy, ctx.pseudo_dummy);
update_renames(ctx, register_file, parallelcopy, ctx.pseudo_dummy, true);
@ -1756,7 +1762,7 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
}
Operand pc_op = operand;
pc_op.setFixed(ctx.assignments[operand.tempId()].reg);
pc_op.setFixed(src);
Definition pc_def = Definition(dst, pc_op.regClass());
parallelcopy.emplace_back(pc_op, pc_def);
update_renames(ctx, register_file, parallelcopy, instr, true);

View file

@ -77,3 +77,22 @@ BEGIN_TEST(regalloc.32bit_partial_write)
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.precolor.swap)
//>> s2: %op0:s[0-1] = p_startpgm
if (!setup_cs("s2", GFX10))
return;
program->dev.sgpr_limit = 4;
//! s2: %op1:s[2-3] = p_unit_test
Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
//! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
//! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
bld.pseudo(aco_opcode::p_unit_test, op, op1);
finish_ra_test(ra_test_policy());
END_TEST