aco: make PhysReg in units of bytes

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Reviewed-By: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4002>
This commit is contained in:
Rhys Perry 2020-02-07 11:55:43 +00:00 committed by Daniel Schürmann
parent dc69738b0f
commit 34424b81df
6 changed files with 40 additions and 38 deletions

View file

@ -156,7 +156,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0;
if (instr->operands.size() >= 2) {
if (!instr->operands[1].isConstant() || instr->operands[1].constantValue() >= 1024) {
encoding |= instr->operands[1].physReg().reg;
encoding |= instr->operands[1].physReg().reg();
} else {
encoding |= instr->operands[1].constantValue() >> 2;
encoding |= 1 << 8;

View file

@ -685,7 +685,7 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event
wait_entry new_entry(event, imm, !rc.is_linear(), wait_on_read);
for (unsigned i = 0; i < rc.size(); i++) {
auto it = ctx.gpr_map.emplace(PhysReg{reg.reg+i}, new_entry);
auto it = ctx.gpr_map.emplace(PhysReg{reg.reg()+i}, new_entry);
if (!it.second)
it.first->second.join(new_entry);
}
@ -696,7 +696,7 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event
unsigned i = u_bit_scan(&counters_todo);
ctx.unwaited_instrs[i].insert(std::make_pair(ctx.gen_instr, 0u));
for (unsigned j = 0; j < rc.size(); j++)
ctx.reg_instrs[i][PhysReg{reg.reg+j}].insert(ctx.gen_instr);
ctx.reg_instrs[i][PhysReg{reg.reg()+j}].insert(ctx.gen_instr);
}
}
}

View file

@ -267,10 +267,12 @@ private:
*/
struct PhysReg {
constexpr PhysReg() = default;
explicit constexpr PhysReg(unsigned r) : reg(r) {}
constexpr operator unsigned() const { return reg; }
explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
constexpr unsigned reg() const { return reg_b >> 2; }
constexpr unsigned byte() const { return reg_b & 0x3; }
constexpr operator unsigned() const { return reg(); }
uint16_t reg = 0;
uint16_t reg_b = 0;
};
/* helper expressions for special registers */
@ -475,12 +477,12 @@ public:
constexpr uint64_t constantValue64(bool signext=false) const noexcept
{
if (is64BitConst_) {
if (reg_.reg <= 192)
return reg_.reg - 128;
else if (reg_.reg <= 208)
return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
if (reg_ <= 192)
return reg_ - 128;
else if (reg_ <= 208)
return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
switch (reg_.reg) {
switch (reg_) {
case 240:
return 0x3FE0000000000000;
case 241:

View file

@ -195,15 +195,15 @@ void emit_int64_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysRe
{
Builder bld(ctx->program, &ctx->instructions);
Definition dst[] = {Definition(dst_reg, v1), Definition(PhysReg{dst_reg+1}, v1)};
RegClass src0_rc = src0_reg.reg >= 256 ? v1 : s1;
RegClass src0_rc = src0_reg.reg() >= 256 ? v1 : s1;
Operand src0[] = {Operand(src0_reg, src0_rc), Operand(PhysReg{src0_reg+1}, src0_rc)};
Operand src1[] = {Operand(src1_reg, v1), Operand(PhysReg{src1_reg+1}, v1)};
Operand src0_64 = Operand(src0_reg, src0_reg.reg >= 256 ? v2 : s2);
Operand src0_64 = Operand(src0_reg, src0_reg.reg() >= 256 ? v2 : s2);
Operand src1_64 = Operand(src1_reg, v2);
if (src0_rc == s1 &&
(op == imul64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)) {
assert(vtmp.reg != 0);
assert(vtmp.reg() != 0);
bld.vop1(aco_opcode::v_mov_b32, Definition(vtmp, v1), src0[0]);
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp+1}, v1), src0[1]);
src0_reg = vtmp;
@ -211,7 +211,7 @@ void emit_int64_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysRe
src0[1] = Operand(PhysReg{vtmp+1}, v1);
src0_64 = Operand(vtmp, v2);
} else if (src0_rc == s1 && op == iadd64) {
assert(vtmp.reg != 0);
assert(vtmp.reg() != 0);
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp+1}, v1), src0[1]);
src0[1] = Operand(PhysReg{vtmp+1}, v1);
}
@ -330,7 +330,7 @@ void emit_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysReg src1
Builder bld(ctx->program, &ctx->instructions);
RegClass rc = RegClass(RegType::vgpr, size);
Definition dst(dst_reg, rc);
Operand src0(src0_reg, RegClass(src0_reg.reg >= 256 ? RegType::vgpr : RegType::sgpr, size));
Operand src0(src0_reg, RegClass(src0_reg.reg() >= 256 ? RegType::vgpr : RegType::sgpr, size));
Operand src1(src1_reg, rc);
aco_opcode opcode = get_reduce_opcode(ctx->program->chip_class, op);

View file

@ -126,7 +126,7 @@ static void print_operand(const Operand *operand, FILE *output)
if (operand->isLiteral()) {
fprintf(output, "0x%x", operand->constantValue());
} else if (operand->isConstant()) {
print_constant(operand->physReg().reg, output);
print_constant(operand->physReg().reg(), output);
} else if (operand->isUndefined()) {
print_reg_class(operand->regClass(), output);
fprintf(output, "undef");

View file

@ -212,7 +212,7 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file,
// FIXME: if a definition got moved, change the target location and remove the parallelcopy
copy.second.setTemp(Temp(ctx.program->allocateId(), copy.second.regClass()));
ctx.assignments[copy.second.tempId()] = {copy.second.physReg(), copy.second.regClass()};
for (unsigned i = copy.second.physReg().reg; i < copy.second.physReg() + copy.second.size(); i++)
for (unsigned i = copy.second.physReg().reg(); i < copy.second.physReg() + copy.second.size(); i++)
reg_file[i] = copy.second.tempId();
/* check if we moved an operand */
for (Operand& op : instr->operands) {
@ -223,8 +223,8 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file,
for (std::pair<Operand, Definition>& pc : parallelcopies) {
PhysReg def_reg = pc.second.physReg();
omit_renaming &= def_reg > copy.first.physReg() ?
(copy.first.physReg() + copy.first.size() <= def_reg.reg) :
(def_reg + pc.second.size() <= copy.first.physReg().reg);
(copy.first.physReg() + copy.first.size() <= def_reg.reg()) :
(def_reg + pc.second.size() <= copy.first.physReg().reg());
}
if (omit_renaming)
continue;
@ -492,7 +492,7 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
instr->operands[j].physReg() >= lb &&
instr->operands[j].physReg() < ub) {
assert(instr->operands[j].isFixed());
assert(reg_file[instr->operands[j].physReg().reg] == 0);
assert(reg_file[instr->operands[j].physReg()] == 0);
reg_file.fill(instr->operands[j].physReg(), instr->operands[j].size(), 0xFFFFFFFF);
killed_ops += instr->operands[j].getTemp().size();
}
@ -879,7 +879,7 @@ bool get_reg_specified(ra_ctx& ctx,
ub = ctx.program->max_reg_demand.sgpr;
}
uint32_t reg_lo = reg.reg;
uint32_t reg_lo = reg.reg();
uint32_t reg_hi = reg + (size - 1);
if (reg_lo < lb || reg_hi >= ub || reg_lo > reg_hi)
@ -930,7 +930,7 @@ void handle_pseudo(ra_ctx& ctx,
return;
Pseudo_instruction *pi = (Pseudo_instruction *)instr;
if (reg_file[scc.reg]) {
if (reg_file[scc.reg()]) {
pi->tmp_in_scc = true;
int reg = ctx.max_used_sgpr;
@ -1385,7 +1385,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
/* fill in sgpr_live_in */
for (unsigned i = 0; i <= ctx.max_used_sgpr; i++)
sgpr_live_in[block.index][i] = register_file[i];
sgpr_live_in[block.index][127] = register_file[scc.reg];
sgpr_live_in[block.index][127] = register_file[scc.reg()];
/* Handle all other instructions of the block */
for (; it != block.instructions.end(); ++it) {
@ -1445,8 +1445,8 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
} else {
/* check if target reg is blocked, and move away the blocking var */
if (register_file[operand.physReg().reg]) {
uint32_t blocking_id = register_file[operand.physReg().reg];
if (register_file[operand.physReg().reg()]) {
uint32_t blocking_id = register_file[operand.physReg().reg()];
RegClass rc = ctx.assignments[blocking_id].second;
Operand pc_op = Operand(Temp{blocking_id, rc});
pc_op.setFixed(operand.physReg());
@ -1503,7 +1503,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
(instr->isVMEM() && i == 3 && program->chip_class == GFX6) ||
(instr->format == Format::DS && static_cast<DS_instruction*>(instr.get())->gds)) {
for (unsigned j = 0; j < operand.size(); j++)
ctx.war_hint.set(operand.physReg().reg + j);
ctx.war_hint.set(operand.physReg().reg() + j);
}
}
std::map<unsigned, phi_info>::iterator phi = phi_map.find(operand.getTemp().id());
@ -1563,11 +1563,11 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
adjust_max_used_regs(ctx, definition.regClass(), definition.physReg());
/* check if the target register is blocked */
if (register_file[definition.physReg().reg] != 0) {
if (register_file[definition.physReg().reg()] != 0) {
/* create parallelcopy pair to move blocking var */
Temp tmp = {register_file[definition.physReg()], ctx.assignments[register_file[definition.physReg()]].second};
Operand pc_op = Operand(tmp);
pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg]].first);
pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg()]].first);
RegClass rc = pc_op.regClass();
tmp = Temp{program->allocateId(), rc};
Definition pc_def = Definition(tmp);
@ -1621,7 +1621,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
continue;
/* find free reg */
if (definition.hasHint() && register_file[definition.physReg().reg] == 0)
if (definition.hasHint() && register_file[definition.physReg().reg()] == 0)
definition.setFixed(definition.physReg());
else if (instr->opcode == aco_opcode::p_split_vector) {
PhysReg reg = PhysReg{instr->operands[0].physReg() + i * definition.size()};
@ -1632,7 +1632,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
PhysReg reg;
if (instr->operands[0].isKillBeforeDef() && instr->operands[0].getTemp().type() == definition.getTemp().type()) {
reg = instr->operands[0].physReg();
assert(register_file[reg.reg] == 0);
assert(register_file[reg.reg()] == 0);
} else {
reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
}
@ -1642,8 +1642,8 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
if (instr->operands[0].isKillBeforeDef() &&
instr->operands[0].getTemp().type() == definition.getTemp().type()) {
reg = instr->operands[0].physReg();
reg.reg += definition.size() * instr->operands[1].constantValue();
assert(register_file[reg.reg] == 0);
reg = PhysReg(reg.reg() + definition.size() * instr->operands[1].constantValue());
assert(register_file[reg.reg()] == 0);
} else {
reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
}
@ -1676,7 +1676,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
op.getTemp().type() == definition.getTemp().type() &&
ctx.assignments.find(op.tempId()) != ctx.assignments.end()) {
PhysReg reg = ctx.assignments[op.tempId()].first;
reg.reg = reg - k + offset;
reg = PhysReg(reg.reg() - k + offset);
if (get_reg_specified(ctx, register_file, definition.regClass(), parallelcopy, instr, reg)) {
definition.setFixed(reg);
break;
@ -1688,7 +1688,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
std::pair<PhysReg, bool> res = get_reg_vec(ctx, register_file, vec->definitions[0].regClass());
PhysReg reg = res.first;
if (res.second) {
reg.reg += offset;
reg = PhysReg(reg.reg() + offset);
} else {
reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
}
@ -1726,17 +1726,17 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
if (!parallelcopy.empty()) {
aco_ptr<Pseudo_instruction> pc;
pc.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, parallelcopy.size(), parallelcopy.size()));
bool temp_in_scc = register_file[scc.reg];
bool temp_in_scc = register_file[scc.reg()];
bool sgpr_operands_alias_defs = false;
uint64_t sgpr_operands[4] = {0, 0, 0, 0};
for (unsigned i = 0; i < parallelcopy.size(); i++) {
if (temp_in_scc && parallelcopy[i].first.isTemp() && parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
if (!sgpr_operands_alias_defs) {
unsigned reg = parallelcopy[i].first.physReg().reg;
unsigned reg = parallelcopy[i].first.physReg().reg();
unsigned size = parallelcopy[i].first.getTemp().size();
sgpr_operands[reg / 64u] |= ((1u << size) - 1) << (reg % 64u);
reg = parallelcopy[i].second.physReg().reg;
reg = parallelcopy[i].second.physReg().reg();
size = parallelcopy[i].second.getTemp().size();
if (sgpr_operands[reg / 64u] & ((1u << size) - 1) << (reg % 64u))
sgpr_operands_alias_defs = true;