aco/ra: rework fixed operands

This moves all fixed operands at once, so they don't interfere with one
another.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17493>
This commit is contained in:
Rhys Perry 2022-06-24 12:36:24 +01:00 committed by Marge Bot
parent ec867ef0e7
commit 061b8bfd29
2 changed files with 110 additions and 33 deletions

View file

@ -1918,9 +1918,6 @@ bool
operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
RegClass rc)
{
if (instr->operands[idx].isFixed())
return instr->operands[idx].physReg() == reg;
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
instr->opcode == aco_opcode::v_writelane_b32_e64;
if (gfx_level <= GFX9 && is_writelane && idx <= 1) {
@ -1952,37 +1949,77 @@ operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsign
}
}
void
handle_fixed_operands(ra_ctx& ctx, RegisterFile& register_file,
std::vector<std::pair<Operand, Definition>>& parallelcopy,
aco_ptr<Instruction>& instr)
{
assert(instr->operands.size() <= 64);
RegisterFile tmp_file(register_file);
uint64_t mask = 0;
for (unsigned i = 0; i < instr->operands.size(); i++) {
Operand& op = instr->operands[i];
if (!op.isTemp() || !op.isFixed())
continue;
PhysReg src = ctx.assignments[op.tempId()].reg;
if (op.physReg() == src) {
tmp_file.block(op.physReg(), op.regClass());
continue;
}
bool found = false;
u_foreach_bit64 (j, mask) {
if (instr->operands[j].tempId() == op.tempId() &&
instr->operands[j].physReg() == op.physReg()) {
found = true;
break;
}
}
if (found)
continue; /* the copy is already added to the list */
/* clear from register_file so fixed operands are not collected be collect_vars() */
tmp_file.clear(src, op.regClass()); // TODO: try to avoid moving block vars to src
mask |= (uint64_t)1 << i;
Operand pc_op(instr->operands[i].getTemp(), src);
Definition pc_def = Definition(op.physReg(), pc_op.regClass());
parallelcopy.emplace_back(pc_op, pc_def);
}
if (!mask)
return;
std::vector<unsigned> blocking_vars;
u_foreach_bit64 (i, mask) {
Operand& op = instr->operands[i];
PhysRegInterval target{op.physReg(), op.size()};
std::vector<unsigned> blocking_vars2 = collect_vars(ctx, tmp_file, target);
blocking_vars.insert(blocking_vars.end(), blocking_vars2.begin(), blocking_vars2.end());
/* prevent get_regs_for_copies() from using these registers */
tmp_file.block(op.physReg(), op.regClass());
}
get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, instr, PhysRegInterval());
update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops | fill_killed_ops);
}
void
get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
std::vector<std::pair<Operand, Definition>>& parallelcopy,
aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index)
{
/* check if the operand is fixed */
/* clear the operand in case it's only a stride mismatch */
PhysReg src = ctx.assignments[operand.tempId()].reg;
PhysReg dst;
if (operand.isFixed()) {
assert(operand.physReg() != src);
/* check if target reg is blocked, and move away the blocking var */
if (register_file.test(operand.physReg(), operand.bytes())) {
PhysRegInterval target{operand.physReg(), operand.size()};
RegisterFile tmp_file(register_file);
std::vector<unsigned> blocking_vars = collect_vars(ctx, tmp_file, target);
tmp_file.clear(src, operand.regClass()); // TODO: try to avoid moving block vars to src
tmp_file.block(operand.physReg(), operand.regClass());
get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, instr, PhysRegInterval());
}
dst = operand.physReg();
} else {
/* clear the operand in case it's only a stride mismatch */
register_file.clear(src, operand.regClass());
dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
}
register_file.clear(src, operand.regClass());
PhysReg dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
Operand pc_op = operand;
pc_op.setFixed(src);
@ -2757,6 +2794,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
bool temp_in_scc = register_file[scc];
/* handle operands */
bool fixed = false;
for (unsigned i = 0; i < instr->operands.size(); ++i) {
auto& operand = instr->operands[i];
if (!operand.isTemp())
@ -2766,6 +2804,18 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
operand.setTemp(read_variable(ctx, operand.getTemp(), block.index));
assert(ctx.assignments[operand.tempId()].assigned);
fixed |=
operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg();
}
if (fixed)
handle_fixed_operands(ctx, register_file, parallelcopy, instr);
for (unsigned i = 0; i < instr->operands.size(); ++i) {
auto& operand = instr->operands[i];
if (!operand.isTemp() || operand.isFixed())
continue;
PhysReg reg = ctx.assignments[operand.tempId()].reg;
if (operand_can_use_reg(program->gfx_level, instr, i, reg, operand.regClass()))
operand.setFixed(reg);

View file

@ -89,7 +89,7 @@ BEGIN_TEST(regalloc.precolor.swap)
//! s2: %op1:s[2-3] = p_unit_test
Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
//! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
//! s2: %op0_2:s[2-3], s2: %op1_2:s[0-1] = p_parallelcopy %op0:s[0-1], %op1:s[2-3]
//! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
@ -103,7 +103,7 @@ BEGIN_TEST(regalloc.precolor.blocking_vector)
if (!setup_cs("s2 s1", GFX10))
return;
//! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]
//! s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp0:s[0-1]
//! p_unit_test %tmp1_2:s[1]
Operand op(inputs[1]);
op.setFixed(PhysReg(1));
@ -120,7 +120,7 @@ BEGIN_TEST(regalloc.precolor.vector.test)
if (!setup_cs("s2 s1 s1", GFX10))
return;
//! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]
//! s2: %tmp0_2:s[2-3], s1: %tmp2_2:s[0] = p_parallelcopy %tmp0:s[0-1], %tmp2:s[3]
//! p_unit_test %tmp0_2:s[2-3]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
@ -137,7 +137,7 @@ BEGIN_TEST(regalloc.precolor.vector.collect)
if (!setup_cs("s2 s1 s1", GFX10))
return;
//! s1: %tmp1_2:s[0], s1: %tmp2_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp2:s[3], %tmp0:s[0-1]
//! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[0], s1: %tmp2_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2], %tmp2:s[3]
//! p_unit_test %tmp0_2:s[2-3]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
@ -154,13 +154,40 @@ BEGIN_TEST(regalloc.precolor.vgpr_move)
if (!setup_cs("v1 v1", GFX10))
return;
//! v1: %tmp0_2:v[1], v1: %tmp1_2:v[0] = p_parallelcopy %tmp0:v[0], %tmp1:v[1]
//! v1: %tmp1_2:v[0], v1: %tmp0_2:v[1] = p_parallelcopy %tmp1:v[1], %tmp0:v[0]
//! p_unit_test %tmp0_2:v[1], %tmp1_2:v[0]
bld.pseudo(aco_opcode::p_unit_test, inputs[0], Operand(inputs[1], PhysReg(256)));
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.precolor.multiple_operands)
//>> v1: %tmp0:v[0], v1: %tmp1:v[1], v1: %tmp2:v[2], v1: %tmp3:v[3] = p_startpgm
if (!setup_cs("v1 v1 v1 v1", GFX10))
return;
//! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
//! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)),
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)),
Operand(inputs[2], PhysReg(256+3)));
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.precolor.different_regs)
//>> v1: %tmp0:v[0] = p_startpgm
if (!setup_cs("v1", GFX10))
return;
//! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
//! p_unit_test %tmp1:v[1], %tmp1:v[1], %tmp1:v[1]
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)),
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2)));
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.scratch_sgpr.create_vector)
if (!setup_cs("v1 s1", GFX7))
return;