aco: allow live-range splits of linear vgprs in top-level blocks

Fixes dEQP-VK.ssbo.phys.layout.random.8bit.all_per_block_buffers.46 on
GFX8.

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12172>
This commit is contained in:
Rhys Perry 2021-06-09 14:33:24 +01:00 committed by Marge Bot
parent 8d50385bbd
commit 6ed18749de
3 changed files with 140 additions and 32 deletions

View file

@ -1,2 +0,0 @@
# ACO crash
dEQP-VK.ssbo.phys.layout.random.8bit.all_per_block_buffers.46,Crash

View file

@ -56,6 +56,7 @@ struct assignment {
struct ra_ctx {
Program* program;
Block* block = NULL;
std::vector<assignment> assignments;
std::vector<std::unordered_map<unsigned, Temp>> renames;
std::vector<uint32_t> loop_header;
@ -1074,8 +1075,9 @@ get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
n++;
continue;
}
/* we cannot split live ranges of linear vgprs */
if (ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) {
/* we cannot split live ranges of linear vgprs inside control flow */
if (!(ctx.block->kind & block_kind_top_level) &&
ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) {
found = false;
break;
}
@ -1221,8 +1223,10 @@ get_reg_impl(ra_ctx& ctx, RegisterFile& reg_file,
break;
}
/* we cannot split live ranges of linear vgprs */
if (ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) {
/* we cannot split live ranges of linear vgprs inside control flow */
//TODO: ensure that live range splits inside control flow are never necessary
if (!(ctx.block->kind & block_kind_top_level) &&
ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) {
found = false;
break;
}
@ -1627,7 +1631,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
PhysReg best_pos{0xFFF};
unsigned num_moves = 0xFF;
bool best_war_hint = true;
bool best_avoid = true;
/* test for each operand which definition placement causes the least shuffle instructions */
for (unsigned i = 0, offset = 0; i < instr->operands.size();
@ -1661,14 +1665,9 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
reg_file.get_id(reg_win.hi().advance(-1)) == reg_file.get_id(reg_win.hi()))
continue;
/* count variables to be moved and check war_hint */
bool war_hint = false;
bool linear_vgpr = false;
/* count variables to be moved and check "avoid" */
bool avoid = false;
for (PhysReg j : reg_win) {
if (linear_vgpr) {
break;
}
if (reg_file[j] != 0) {
if (reg_file[j] == 0xF0000000) {
PhysReg reg;
@ -1678,14 +1677,18 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
k += reg_file.test(reg, 1);
} else {
k += 4;
/* we cannot split live ranges of linear vgprs */
if (ctx.assignments[reg_file[j]].rc.is_linear_vgpr())
linear_vgpr = true;
/* we cannot split live ranges of linear vgprs inside control flow */
if (ctx.assignments[reg_file[j]].rc.is_linear_vgpr()) {
if (ctx.block->kind & block_kind_top_level)
avoid = true;
else
break;
}
}
}
war_hint |= ctx.war_hint[j];
avoid |= ctx.war_hint[j];
}
if (linear_vgpr || (war_hint && !best_war_hint))
if (avoid && !best_avoid)
continue;
/* count operands in wrong positions */
@ -1703,7 +1706,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
best_pos = reg_win.lo();
num_moves = k;
best_war_hint = war_hint;
best_avoid = avoid;
}
if (num_moves >= bytes)
@ -1775,24 +1778,22 @@ handle_pseudo(ra_ctx& ctx, const RegisterFile& reg_file, Instruction* instr)
default: return;
}
/* if all definitions are vgpr, no need to care for SCC */
bool writes_sgpr = false;
bool writes_linear = false;
/* if all definitions are logical vgpr, no need to care for SCC */
for (Definition& def : instr->definitions) {
if (def.getTemp().type() == RegType::sgpr) {
writes_sgpr = true;
break;
}
if (def.getTemp().regClass().is_linear())
writes_linear = true;
}
/* if all operands are constant, no need to care either */
bool reads_sgpr = false;
bool reads_linear = false;
bool reads_subdword = false;
for (Operand& op : instr->operands) {
if (op.isTemp() && op.getTemp().type() == RegType::sgpr)
reads_sgpr = true;
if (op.isTemp() && op.getTemp().regClass().is_linear())
reads_linear = true;
if (op.isTemp() && op.regClass().is_subdword())
reads_subdword = true;
}
bool needs_scratch_reg = (writes_sgpr && reads_sgpr && reg_file[scc]) ||
bool needs_scratch_reg = (writes_linear && reads_linear && reg_file[scc]) ||
(ctx.program->chip_class <= GFX7 && reads_subdword);
if (!needs_scratch_reg)
return;
@ -1911,7 +1912,7 @@ Temp
handle_live_in(ra_ctx& ctx, Temp val, Block* block)
{
std::vector<unsigned>& preds = val.is_linear() ? block->linear_preds : block->logical_preds;
if (preds.size() == 0 || val.regClass().is_linear_vgpr())
if (preds.size() == 0)
return val;
if (preds.size() == 1) {
@ -1934,6 +1935,8 @@ handle_live_in(ra_ctx& ctx, Temp val, Block* block)
}
if (needs_phi) {
assert(!val.regClass().is_linear_vgpr());
/* the variable has been renamed differently in the predecessors: we need to insert a phi */
aco_opcode opcode = val.is_linear() ? aco_opcode::p_linear_phi : aco_opcode::p_phi;
aco_ptr<Instruction> phi{
@ -2243,6 +2246,8 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
std::vector<std::bitset<128>> sgpr_live_in(program->blocks.size());
for (Block& block : program->blocks) {
ctx.block = &block;
/* initialize register file */
RegisterFile register_file = init_reg_file(ctx, live_out_per_block, block);
ctx.war_hint.reset();
@ -2646,9 +2651,12 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
pc.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy,
Format::PSEUDO, parallelcopy.size(),
parallelcopy.size()));
bool linear_vgpr = false;
bool sgpr_operands_alias_defs = false;
uint64_t sgpr_operands[4] = {0, 0, 0, 0};
for (unsigned i = 0; i < parallelcopy.size(); i++) {
linear_vgpr |= parallelcopy[i].first.regClass().is_linear_vgpr();
if (temp_in_scc && parallelcopy[i].first.isTemp() &&
parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
if (!sgpr_operands_alias_defs) {
@ -2676,7 +2684,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
ctx.renames[block.index][orig.id()] = pc->definitions[i].getTemp();
}
if (temp_in_scc && sgpr_operands_alias_defs) {
if (temp_in_scc && (sgpr_operands_alias_defs || linear_vgpr)) {
/* disable definitions and re-enable operands */
RegisterFile tmp_file(register_file);
for (const Definition& def : instr->definitions) {

View file

@ -184,3 +184,105 @@ BEGIN_TEST(regalloc.scratch_sgpr.create_vector_sgpr_operand)
finish_ra_test(ra_test_policy(), true);
END_TEST
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.fixed_def)
//>> p_startpgm
if (!setup_cs("", GFX10))
return;
PhysReg reg_v0{256};
PhysReg reg_v1{257};
//! lv1: %tmp1:v[0] = p_unit_test
Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
//! lv1: %tmp2:v[1] = p_parallelcopy %tmp1:v[0]
//! v1: %_:v[0] = p_unit_test
bld.pseudo(aco_opcode::p_unit_test, Definition(reg_v0, v1));
//! p_unit_test %tmp2:v[1]
bld.pseudo(aco_opcode::p_unit_test, tmp);
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
//>> p_startpgm
if (!setup_cs("", GFX10))
return;
program->dev.vgpr_limit = 3;
PhysReg reg_v1{257};
//! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
Temp s0_tmp = bld.tmp(s1);
Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
//! lv1: %tmp1:v[1] = p_unit_test
Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
//! lv1: %tmp2:v[2] = p_parallelcopy %tmp1:v[1]
//! v2: %_:v[0-1] = p_unit_test
bld.pseudo(aco_opcode::p_unit_test, bld.def(v2));
//! p_unit_test %tmp2:v[2], %scc_tmp:scc, %1:s[0]
bld.pseudo(aco_opcode::p_unit_test, tmp, scc_tmp, s0_tmp);
finish_ra_test(ra_test_policy());
//>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
aco_print_instr(&parallelcopy, output);
fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
END_TEST
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
//>> p_startpgm
if (!setup_cs("", GFX10))
return;
program->dev.vgpr_limit = 6;
PhysReg reg_v2{258};
PhysReg reg_v4{260};
//! lv1: %lin_tmp1:v[4] = p_unit_test
Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v4));
//! v2: %log_tmp1:v[2-3] = p_unit_test
Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v2, reg_v2));
//! lv1: %lin_tmp2:v[0], v2: %log_tmp2:v[4-5] = p_parallelcopy %lin_tmp1:v[4], %log_tmp1:v[2-3]
//! v3: %_:v[1-3] = p_unit_test
bld.pseudo(aco_opcode::p_unit_test, bld.def(v3));
//! p_unit_test %log_tmp2:v[4-5], %lin_tmp2:v[0]
bld.pseudo(aco_opcode::p_unit_test, log_tmp, lin_tmp);
finish_ra_test(ra_test_policy());
END_TEST
BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_create_vector)
//>> p_startpgm
if (!setup_cs("", GFX10))
return;
program->dev.vgpr_limit = 4;
PhysReg reg_v0{256};
PhysReg reg_v1{257};
//! lv1: %lin_tmp1:v[0] = p_unit_test
Temp lin_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v0));
//! v1: %log_tmp:v[1] = p_unit_test
Temp log_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1, reg_v1));
//! lv1: %lin_tmp2:v[2] = p_parallelcopy %lin_tmp1:v[0]
//! v2: %_:v[0-1] = p_create_vector v1: undef, %log_tmp:v[1]
bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(v1), log_tmp);
//! p_unit_test %lin_tmp2:v[2]
bld.pseudo(aco_opcode::p_unit_test, lin_tmp);
finish_ra_test(ra_test_policy());
END_TEST