mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 20:28:04 +02:00
aco: make PhysReg in units of bytes
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Reviewed-By: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4002>
This commit is contained in:
parent
dc69738b0f
commit
34424b81df
6 changed files with 40 additions and 38 deletions
|
|
@ -156,7 +156,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||
encoding |= instr->operands.size() ? (instr->operands[0].physReg() >> 1) << 9 : 0;
|
||||
if (instr->operands.size() >= 2) {
|
||||
if (!instr->operands[1].isConstant() || instr->operands[1].constantValue() >= 1024) {
|
||||
encoding |= instr->operands[1].physReg().reg;
|
||||
encoding |= instr->operands[1].physReg().reg();
|
||||
} else {
|
||||
encoding |= instr->operands[1].constantValue() >> 2;
|
||||
encoding |= 1 << 8;
|
||||
|
|
|
|||
|
|
@ -685,7 +685,7 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event
|
|||
wait_entry new_entry(event, imm, !rc.is_linear(), wait_on_read);
|
||||
|
||||
for (unsigned i = 0; i < rc.size(); i++) {
|
||||
auto it = ctx.gpr_map.emplace(PhysReg{reg.reg+i}, new_entry);
|
||||
auto it = ctx.gpr_map.emplace(PhysReg{reg.reg()+i}, new_entry);
|
||||
if (!it.second)
|
||||
it.first->second.join(new_entry);
|
||||
}
|
||||
|
|
@ -696,7 +696,7 @@ void insert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event
|
|||
unsigned i = u_bit_scan(&counters_todo);
|
||||
ctx.unwaited_instrs[i].insert(std::make_pair(ctx.gen_instr, 0u));
|
||||
for (unsigned j = 0; j < rc.size(); j++)
|
||||
ctx.reg_instrs[i][PhysReg{reg.reg+j}].insert(ctx.gen_instr);
|
||||
ctx.reg_instrs[i][PhysReg{reg.reg()+j}].insert(ctx.gen_instr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -267,10 +267,12 @@ private:
|
|||
*/
|
||||
struct PhysReg {
|
||||
constexpr PhysReg() = default;
|
||||
explicit constexpr PhysReg(unsigned r) : reg(r) {}
|
||||
constexpr operator unsigned() const { return reg; }
|
||||
explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
|
||||
constexpr unsigned reg() const { return reg_b >> 2; }
|
||||
constexpr unsigned byte() const { return reg_b & 0x3; }
|
||||
constexpr operator unsigned() const { return reg(); }
|
||||
|
||||
uint16_t reg = 0;
|
||||
uint16_t reg_b = 0;
|
||||
};
|
||||
|
||||
/* helper expressions for special registers */
|
||||
|
|
@ -475,12 +477,12 @@ public:
|
|||
constexpr uint64_t constantValue64(bool signext=false) const noexcept
|
||||
{
|
||||
if (is64BitConst_) {
|
||||
if (reg_.reg <= 192)
|
||||
return reg_.reg - 128;
|
||||
else if (reg_.reg <= 208)
|
||||
return 0xFFFFFFFFFFFFFFFF - (reg_.reg - 193);
|
||||
if (reg_ <= 192)
|
||||
return reg_ - 128;
|
||||
else if (reg_ <= 208)
|
||||
return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
|
||||
|
||||
switch (reg_.reg) {
|
||||
switch (reg_) {
|
||||
case 240:
|
||||
return 0x3FE0000000000000;
|
||||
case 241:
|
||||
|
|
|
|||
|
|
@ -195,15 +195,15 @@ void emit_int64_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysRe
|
|||
{
|
||||
Builder bld(ctx->program, &ctx->instructions);
|
||||
Definition dst[] = {Definition(dst_reg, v1), Definition(PhysReg{dst_reg+1}, v1)};
|
||||
RegClass src0_rc = src0_reg.reg >= 256 ? v1 : s1;
|
||||
RegClass src0_rc = src0_reg.reg() >= 256 ? v1 : s1;
|
||||
Operand src0[] = {Operand(src0_reg, src0_rc), Operand(PhysReg{src0_reg+1}, src0_rc)};
|
||||
Operand src1[] = {Operand(src1_reg, v1), Operand(PhysReg{src1_reg+1}, v1)};
|
||||
Operand src0_64 = Operand(src0_reg, src0_reg.reg >= 256 ? v2 : s2);
|
||||
Operand src0_64 = Operand(src0_reg, src0_reg.reg() >= 256 ? v2 : s2);
|
||||
Operand src1_64 = Operand(src1_reg, v2);
|
||||
|
||||
if (src0_rc == s1 &&
|
||||
(op == imul64 || op == umin64 || op == umax64 || op == imin64 || op == imax64)) {
|
||||
assert(vtmp.reg != 0);
|
||||
assert(vtmp.reg() != 0);
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(vtmp, v1), src0[0]);
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp+1}, v1), src0[1]);
|
||||
src0_reg = vtmp;
|
||||
|
|
@ -211,7 +211,7 @@ void emit_int64_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysRe
|
|||
src0[1] = Operand(PhysReg{vtmp+1}, v1);
|
||||
src0_64 = Operand(vtmp, v2);
|
||||
} else if (src0_rc == s1 && op == iadd64) {
|
||||
assert(vtmp.reg != 0);
|
||||
assert(vtmp.reg() != 0);
|
||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp+1}, v1), src0[1]);
|
||||
src0[1] = Operand(PhysReg{vtmp+1}, v1);
|
||||
}
|
||||
|
|
@ -330,7 +330,7 @@ void emit_op(lower_context *ctx, PhysReg dst_reg, PhysReg src0_reg, PhysReg src1
|
|||
Builder bld(ctx->program, &ctx->instructions);
|
||||
RegClass rc = RegClass(RegType::vgpr, size);
|
||||
Definition dst(dst_reg, rc);
|
||||
Operand src0(src0_reg, RegClass(src0_reg.reg >= 256 ? RegType::vgpr : RegType::sgpr, size));
|
||||
Operand src0(src0_reg, RegClass(src0_reg.reg() >= 256 ? RegType::vgpr : RegType::sgpr, size));
|
||||
Operand src1(src1_reg, rc);
|
||||
|
||||
aco_opcode opcode = get_reduce_opcode(ctx->program->chip_class, op);
|
||||
|
|
|
|||
|
|
@ -126,7 +126,7 @@ static void print_operand(const Operand *operand, FILE *output)
|
|||
if (operand->isLiteral()) {
|
||||
fprintf(output, "0x%x", operand->constantValue());
|
||||
} else if (operand->isConstant()) {
|
||||
print_constant(operand->physReg().reg, output);
|
||||
print_constant(operand->physReg().reg(), output);
|
||||
} else if (operand->isUndefined()) {
|
||||
print_reg_class(operand->regClass(), output);
|
||||
fprintf(output, "undef");
|
||||
|
|
|
|||
|
|
@ -212,7 +212,7 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file,
|
|||
// FIXME: if a definition got moved, change the target location and remove the parallelcopy
|
||||
copy.second.setTemp(Temp(ctx.program->allocateId(), copy.second.regClass()));
|
||||
ctx.assignments[copy.second.tempId()] = {copy.second.physReg(), copy.second.regClass()};
|
||||
for (unsigned i = copy.second.physReg().reg; i < copy.second.physReg() + copy.second.size(); i++)
|
||||
for (unsigned i = copy.second.physReg().reg(); i < copy.second.physReg() + copy.second.size(); i++)
|
||||
reg_file[i] = copy.second.tempId();
|
||||
/* check if we moved an operand */
|
||||
for (Operand& op : instr->operands) {
|
||||
|
|
@ -223,8 +223,8 @@ void update_renames(ra_ctx& ctx, RegisterFile& reg_file,
|
|||
for (std::pair<Operand, Definition>& pc : parallelcopies) {
|
||||
PhysReg def_reg = pc.second.physReg();
|
||||
omit_renaming &= def_reg > copy.first.physReg() ?
|
||||
(copy.first.physReg() + copy.first.size() <= def_reg.reg) :
|
||||
(def_reg + pc.second.size() <= copy.first.physReg().reg);
|
||||
(copy.first.physReg() + copy.first.size() <= def_reg.reg()) :
|
||||
(def_reg + pc.second.size() <= copy.first.physReg().reg());
|
||||
}
|
||||
if (omit_renaming)
|
||||
continue;
|
||||
|
|
@ -492,7 +492,7 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
|||
instr->operands[j].physReg() >= lb &&
|
||||
instr->operands[j].physReg() < ub) {
|
||||
assert(instr->operands[j].isFixed());
|
||||
assert(reg_file[instr->operands[j].physReg().reg] == 0);
|
||||
assert(reg_file[instr->operands[j].physReg()] == 0);
|
||||
reg_file.fill(instr->operands[j].physReg(), instr->operands[j].size(), 0xFFFFFFFF);
|
||||
killed_ops += instr->operands[j].getTemp().size();
|
||||
}
|
||||
|
|
@ -879,7 +879,7 @@ bool get_reg_specified(ra_ctx& ctx,
|
|||
ub = ctx.program->max_reg_demand.sgpr;
|
||||
}
|
||||
|
||||
uint32_t reg_lo = reg.reg;
|
||||
uint32_t reg_lo = reg.reg();
|
||||
uint32_t reg_hi = reg + (size - 1);
|
||||
|
||||
if (reg_lo < lb || reg_hi >= ub || reg_lo > reg_hi)
|
||||
|
|
@ -930,7 +930,7 @@ void handle_pseudo(ra_ctx& ctx,
|
|||
return;
|
||||
|
||||
Pseudo_instruction *pi = (Pseudo_instruction *)instr;
|
||||
if (reg_file[scc.reg]) {
|
||||
if (reg_file[scc.reg()]) {
|
||||
pi->tmp_in_scc = true;
|
||||
|
||||
int reg = ctx.max_used_sgpr;
|
||||
|
|
@ -1385,7 +1385,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
/* fill in sgpr_live_in */
|
||||
for (unsigned i = 0; i <= ctx.max_used_sgpr; i++)
|
||||
sgpr_live_in[block.index][i] = register_file[i];
|
||||
sgpr_live_in[block.index][127] = register_file[scc.reg];
|
||||
sgpr_live_in[block.index][127] = register_file[scc.reg()];
|
||||
|
||||
/* Handle all other instructions of the block */
|
||||
for (; it != block.instructions.end(); ++it) {
|
||||
|
|
@ -1445,8 +1445,8 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
|
||||
} else {
|
||||
/* check if target reg is blocked, and move away the blocking var */
|
||||
if (register_file[operand.physReg().reg]) {
|
||||
uint32_t blocking_id = register_file[operand.physReg().reg];
|
||||
if (register_file[operand.physReg().reg()]) {
|
||||
uint32_t blocking_id = register_file[operand.physReg().reg()];
|
||||
RegClass rc = ctx.assignments[blocking_id].second;
|
||||
Operand pc_op = Operand(Temp{blocking_id, rc});
|
||||
pc_op.setFixed(operand.physReg());
|
||||
|
|
@ -1503,7 +1503,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
(instr->isVMEM() && i == 3 && program->chip_class == GFX6) ||
|
||||
(instr->format == Format::DS && static_cast<DS_instruction*>(instr.get())->gds)) {
|
||||
for (unsigned j = 0; j < operand.size(); j++)
|
||||
ctx.war_hint.set(operand.physReg().reg + j);
|
||||
ctx.war_hint.set(operand.physReg().reg() + j);
|
||||
}
|
||||
}
|
||||
std::map<unsigned, phi_info>::iterator phi = phi_map.find(operand.getTemp().id());
|
||||
|
|
@ -1563,11 +1563,11 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
|
||||
adjust_max_used_regs(ctx, definition.regClass(), definition.physReg());
|
||||
/* check if the target register is blocked */
|
||||
if (register_file[definition.physReg().reg] != 0) {
|
||||
if (register_file[definition.physReg().reg()] != 0) {
|
||||
/* create parallelcopy pair to move blocking var */
|
||||
Temp tmp = {register_file[definition.physReg()], ctx.assignments[register_file[definition.physReg()]].second};
|
||||
Operand pc_op = Operand(tmp);
|
||||
pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg]].first);
|
||||
pc_op.setFixed(ctx.assignments[register_file[definition.physReg().reg()]].first);
|
||||
RegClass rc = pc_op.regClass();
|
||||
tmp = Temp{program->allocateId(), rc};
|
||||
Definition pc_def = Definition(tmp);
|
||||
|
|
@ -1621,7 +1621,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
continue;
|
||||
|
||||
/* find free reg */
|
||||
if (definition.hasHint() && register_file[definition.physReg().reg] == 0)
|
||||
if (definition.hasHint() && register_file[definition.physReg().reg()] == 0)
|
||||
definition.setFixed(definition.physReg());
|
||||
else if (instr->opcode == aco_opcode::p_split_vector) {
|
||||
PhysReg reg = PhysReg{instr->operands[0].physReg() + i * definition.size()};
|
||||
|
|
@ -1632,7 +1632,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
PhysReg reg;
|
||||
if (instr->operands[0].isKillBeforeDef() && instr->operands[0].getTemp().type() == definition.getTemp().type()) {
|
||||
reg = instr->operands[0].physReg();
|
||||
assert(register_file[reg.reg] == 0);
|
||||
assert(register_file[reg.reg()] == 0);
|
||||
} else {
|
||||
reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
|
||||
}
|
||||
|
|
@ -1642,8 +1642,8 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
if (instr->operands[0].isKillBeforeDef() &&
|
||||
instr->operands[0].getTemp().type() == definition.getTemp().type()) {
|
||||
reg = instr->operands[0].physReg();
|
||||
reg.reg += definition.size() * instr->operands[1].constantValue();
|
||||
assert(register_file[reg.reg] == 0);
|
||||
reg = PhysReg(reg.reg() + definition.size() * instr->operands[1].constantValue());
|
||||
assert(register_file[reg.reg()] == 0);
|
||||
} else {
|
||||
reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
|
||||
}
|
||||
|
|
@ -1676,7 +1676,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
op.getTemp().type() == definition.getTemp().type() &&
|
||||
ctx.assignments.find(op.tempId()) != ctx.assignments.end()) {
|
||||
PhysReg reg = ctx.assignments[op.tempId()].first;
|
||||
reg.reg = reg - k + offset;
|
||||
reg = PhysReg(reg.reg() - k + offset);
|
||||
if (get_reg_specified(ctx, register_file, definition.regClass(), parallelcopy, instr, reg)) {
|
||||
definition.setFixed(reg);
|
||||
break;
|
||||
|
|
@ -1688,7 +1688,7 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
std::pair<PhysReg, bool> res = get_reg_vec(ctx, register_file, vec->definitions[0].regClass());
|
||||
PhysReg reg = res.first;
|
||||
if (res.second) {
|
||||
reg.reg += offset;
|
||||
reg = PhysReg(reg.reg() + offset);
|
||||
} else {
|
||||
reg = get_reg(ctx, register_file, definition.regClass(), parallelcopy, instr);
|
||||
}
|
||||
|
|
@ -1726,17 +1726,17 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
|
|||
if (!parallelcopy.empty()) {
|
||||
aco_ptr<Pseudo_instruction> pc;
|
||||
pc.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, parallelcopy.size(), parallelcopy.size()));
|
||||
bool temp_in_scc = register_file[scc.reg];
|
||||
bool temp_in_scc = register_file[scc.reg()];
|
||||
bool sgpr_operands_alias_defs = false;
|
||||
uint64_t sgpr_operands[4] = {0, 0, 0, 0};
|
||||
for (unsigned i = 0; i < parallelcopy.size(); i++) {
|
||||
if (temp_in_scc && parallelcopy[i].first.isTemp() && parallelcopy[i].first.getTemp().type() == RegType::sgpr) {
|
||||
if (!sgpr_operands_alias_defs) {
|
||||
unsigned reg = parallelcopy[i].first.physReg().reg;
|
||||
unsigned reg = parallelcopy[i].first.physReg().reg();
|
||||
unsigned size = parallelcopy[i].first.getTemp().size();
|
||||
sgpr_operands[reg / 64u] |= ((1u << size) - 1) << (reg % 64u);
|
||||
|
||||
reg = parallelcopy[i].second.physReg().reg;
|
||||
reg = parallelcopy[i].second.physReg().reg();
|
||||
size = parallelcopy[i].second.getTemp().size();
|
||||
if (sgpr_operands[reg / 64u] & ((1u << size) - 1) << (reg % 64u))
|
||||
sgpr_operands_alias_defs = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue