mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-24 17:30:12 +01:00
aco/ra: rework fixed operands
This moves all fixed operands at once, so they don't interfere with one another. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17493>
This commit is contained in:
parent
ec867ef0e7
commit
061b8bfd29
2 changed files with 110 additions and 33 deletions
|
|
@ -1918,9 +1918,6 @@ bool
|
|||
operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsigned idx, PhysReg reg,
|
||||
RegClass rc)
|
||||
{
|
||||
if (instr->operands[idx].isFixed())
|
||||
return instr->operands[idx].physReg() == reg;
|
||||
|
||||
bool is_writelane = instr->opcode == aco_opcode::v_writelane_b32 ||
|
||||
instr->opcode == aco_opcode::v_writelane_b32_e64;
|
||||
if (gfx_level <= GFX9 && is_writelane && idx <= 1) {
|
||||
|
|
@ -1952,37 +1949,77 @@ operand_can_use_reg(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr, unsign
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
handle_fixed_operands(ra_ctx& ctx, RegisterFile& register_file,
|
||||
std::vector<std::pair<Operand, Definition>>& parallelcopy,
|
||||
aco_ptr<Instruction>& instr)
|
||||
{
|
||||
assert(instr->operands.size() <= 64);
|
||||
|
||||
RegisterFile tmp_file(register_file);
|
||||
|
||||
uint64_t mask = 0;
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
Operand& op = instr->operands[i];
|
||||
|
||||
if (!op.isTemp() || !op.isFixed())
|
||||
continue;
|
||||
|
||||
PhysReg src = ctx.assignments[op.tempId()].reg;
|
||||
|
||||
if (op.physReg() == src) {
|
||||
tmp_file.block(op.physReg(), op.regClass());
|
||||
continue;
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
u_foreach_bit64 (j, mask) {
|
||||
if (instr->operands[j].tempId() == op.tempId() &&
|
||||
instr->operands[j].physReg() == op.physReg()) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found)
|
||||
continue; /* the copy is already added to the list */
|
||||
|
||||
/* clear from register_file so fixed operands are not collected be collect_vars() */
|
||||
tmp_file.clear(src, op.regClass()); // TODO: try to avoid moving block vars to src
|
||||
|
||||
mask |= (uint64_t)1 << i;
|
||||
|
||||
Operand pc_op(instr->operands[i].getTemp(), src);
|
||||
Definition pc_def = Definition(op.physReg(), pc_op.regClass());
|
||||
parallelcopy.emplace_back(pc_op, pc_def);
|
||||
}
|
||||
|
||||
if (!mask)
|
||||
return;
|
||||
|
||||
std::vector<unsigned> blocking_vars;
|
||||
u_foreach_bit64 (i, mask) {
|
||||
Operand& op = instr->operands[i];
|
||||
PhysRegInterval target{op.physReg(), op.size()};
|
||||
std::vector<unsigned> blocking_vars2 = collect_vars(ctx, tmp_file, target);
|
||||
blocking_vars.insert(blocking_vars.end(), blocking_vars2.begin(), blocking_vars2.end());
|
||||
|
||||
/* prevent get_regs_for_copies() from using these registers */
|
||||
tmp_file.block(op.physReg(), op.regClass());
|
||||
}
|
||||
|
||||
get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, instr, PhysRegInterval());
|
||||
update_renames(ctx, register_file, parallelcopy, instr, rename_not_killed_ops | fill_killed_ops);
|
||||
}
|
||||
|
||||
void
|
||||
get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
|
||||
std::vector<std::pair<Operand, Definition>>& parallelcopy,
|
||||
aco_ptr<Instruction>& instr, Operand& operand, unsigned operand_index)
|
||||
{
|
||||
/* check if the operand is fixed */
|
||||
/* clear the operand in case it's only a stride mismatch */
|
||||
PhysReg src = ctx.assignments[operand.tempId()].reg;
|
||||
PhysReg dst;
|
||||
if (operand.isFixed()) {
|
||||
assert(operand.physReg() != src);
|
||||
|
||||
/* check if target reg is blocked, and move away the blocking var */
|
||||
if (register_file.test(operand.physReg(), operand.bytes())) {
|
||||
PhysRegInterval target{operand.physReg(), operand.size()};
|
||||
|
||||
RegisterFile tmp_file(register_file);
|
||||
|
||||
std::vector<unsigned> blocking_vars = collect_vars(ctx, tmp_file, target);
|
||||
|
||||
tmp_file.clear(src, operand.regClass()); // TODO: try to avoid moving block vars to src
|
||||
tmp_file.block(operand.physReg(), operand.regClass());
|
||||
|
||||
get_regs_for_copies(ctx, tmp_file, parallelcopy, blocking_vars, instr, PhysRegInterval());
|
||||
}
|
||||
dst = operand.physReg();
|
||||
|
||||
} else {
|
||||
/* clear the operand in case it's only a stride mismatch */
|
||||
register_file.clear(src, operand.regClass());
|
||||
dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
|
||||
}
|
||||
register_file.clear(src, operand.regClass());
|
||||
PhysReg dst = get_reg(ctx, register_file, operand.getTemp(), parallelcopy, instr, operand_index);
|
||||
|
||||
Operand pc_op = operand;
|
||||
pc_op.setFixed(src);
|
||||
|
|
@ -2757,6 +2794,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
bool temp_in_scc = register_file[scc];
|
||||
|
||||
/* handle operands */
|
||||
bool fixed = false;
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
auto& operand = instr->operands[i];
|
||||
if (!operand.isTemp())
|
||||
|
|
@ -2766,6 +2804,18 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
|
|||
operand.setTemp(read_variable(ctx, operand.getTemp(), block.index));
|
||||
assert(ctx.assignments[operand.tempId()].assigned);
|
||||
|
||||
fixed |=
|
||||
operand.isFixed() && ctx.assignments[operand.tempId()].reg != operand.physReg();
|
||||
}
|
||||
|
||||
if (fixed)
|
||||
handle_fixed_operands(ctx, register_file, parallelcopy, instr);
|
||||
|
||||
for (unsigned i = 0; i < instr->operands.size(); ++i) {
|
||||
auto& operand = instr->operands[i];
|
||||
if (!operand.isTemp() || operand.isFixed())
|
||||
continue;
|
||||
|
||||
PhysReg reg = ctx.assignments[operand.tempId()].reg;
|
||||
if (operand_can_use_reg(program->gfx_level, instr, i, reg, operand.regClass()))
|
||||
operand.setFixed(reg);
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ BEGIN_TEST(regalloc.precolor.swap)
|
|||
//! s2: %op1:s[2-3] = p_unit_test
|
||||
Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
|
||||
|
||||
//! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
|
||||
//! s2: %op0_2:s[2-3], s2: %op1_2:s[0-1] = p_parallelcopy %op0:s[0-1], %op1:s[2-3]
|
||||
//! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
|
||||
Operand op(inputs[0]);
|
||||
op.setFixed(PhysReg(2));
|
||||
|
|
@ -103,7 +103,7 @@ BEGIN_TEST(regalloc.precolor.blocking_vector)
|
|||
if (!setup_cs("s2 s1", GFX10))
|
||||
return;
|
||||
|
||||
//! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]
|
||||
//! s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp0:s[0-1]
|
||||
//! p_unit_test %tmp1_2:s[1]
|
||||
Operand op(inputs[1]);
|
||||
op.setFixed(PhysReg(1));
|
||||
|
|
@ -120,7 +120,7 @@ BEGIN_TEST(regalloc.precolor.vector.test)
|
|||
if (!setup_cs("s2 s1 s1", GFX10))
|
||||
return;
|
||||
|
||||
//! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]
|
||||
//! s2: %tmp0_2:s[2-3], s1: %tmp2_2:s[0] = p_parallelcopy %tmp0:s[0-1], %tmp2:s[3]
|
||||
//! p_unit_test %tmp0_2:s[2-3]
|
||||
Operand op(inputs[0]);
|
||||
op.setFixed(PhysReg(2));
|
||||
|
|
@ -137,7 +137,7 @@ BEGIN_TEST(regalloc.precolor.vector.collect)
|
|||
if (!setup_cs("s2 s1 s1", GFX10))
|
||||
return;
|
||||
|
||||
//! s1: %tmp1_2:s[0], s1: %tmp2_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp2:s[3], %tmp0:s[0-1]
|
||||
//! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[0], s1: %tmp2_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2], %tmp2:s[3]
|
||||
//! p_unit_test %tmp0_2:s[2-3]
|
||||
Operand op(inputs[0]);
|
||||
op.setFixed(PhysReg(2));
|
||||
|
|
@ -154,13 +154,40 @@ BEGIN_TEST(regalloc.precolor.vgpr_move)
|
|||
if (!setup_cs("v1 v1", GFX10))
|
||||
return;
|
||||
|
||||
//! v1: %tmp0_2:v[1], v1: %tmp1_2:v[0] = p_parallelcopy %tmp0:v[0], %tmp1:v[1]
|
||||
//! v1: %tmp1_2:v[0], v1: %tmp0_2:v[1] = p_parallelcopy %tmp1:v[1], %tmp0:v[0]
|
||||
//! p_unit_test %tmp0_2:v[1], %tmp1_2:v[0]
|
||||
bld.pseudo(aco_opcode::p_unit_test, inputs[0], Operand(inputs[1], PhysReg(256)));
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(regalloc.precolor.multiple_operands)
|
||||
//>> v1: %tmp0:v[0], v1: %tmp1:v[1], v1: %tmp2:v[2], v1: %tmp3:v[3] = p_startpgm
|
||||
if (!setup_cs("v1 v1 v1 v1", GFX10))
|
||||
return;
|
||||
|
||||
//! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
|
||||
//! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)),
|
||||
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)),
|
||||
Operand(inputs[2], PhysReg(256+3)));
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(regalloc.precolor.different_regs)
|
||||
//>> v1: %tmp0:v[0] = p_startpgm
|
||||
if (!setup_cs("v1", GFX10))
|
||||
return;
|
||||
|
||||
//! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
|
||||
//! p_unit_test %tmp1:v[1], %tmp1:v[1], %tmp1:v[1]
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)),
|
||||
Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2)));
|
||||
|
||||
finish_ra_test(ra_test_policy());
|
||||
END_TEST
|
||||
|
||||
BEGIN_TEST(regalloc.scratch_sgpr.create_vector)
|
||||
if (!setup_cs("v1 s1", GFX7))
|
||||
return;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue