mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-03 11:30:21 +01:00
aco/ra: Conservatively refactor existing code to use PhysRegInterval
All expressions have been replaced by their closest equivalent. No major simplification efforts have been made to minimize risk of regressions. Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7799>
This commit is contained in:
parent
9bbd6162a9
commit
46eee40abc
1 changed files with 93 additions and 112 deletions
|
|
@ -123,8 +123,7 @@ struct PhysRegInterval {
|
|||
};
|
||||
|
||||
struct DefInfo {
|
||||
uint16_t lb;
|
||||
uint16_t ub;
|
||||
PhysRegInterval bounds;
|
||||
uint8_t size;
|
||||
uint8_t stride;
|
||||
RegClass rc;
|
||||
|
|
@ -134,11 +133,9 @@ struct DefInfo {
|
|||
stride = 1;
|
||||
|
||||
if (rc.type() == RegType::vgpr) {
|
||||
lb = 256;
|
||||
ub = 256 + ctx.program->max_reg_demand.vgpr;
|
||||
bounds = { 256, (unsigned)ctx.program->max_reg_demand.vgpr };
|
||||
} else {
|
||||
lb = 0;
|
||||
ub = ctx.program->max_reg_demand.sgpr;
|
||||
bounds = { 0, (unsigned)ctx.program->max_reg_demand.sgpr };
|
||||
if (size == 2)
|
||||
stride = 2;
|
||||
else if (size >= 4)
|
||||
|
|
@ -668,8 +665,7 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
|
|||
RegisterFile& reg_file,
|
||||
DefInfo info)
|
||||
{
|
||||
uint32_t lb = info.lb;
|
||||
uint32_t ub = info.ub;
|
||||
const PhysRegInterval& bounds = info.bounds;
|
||||
uint32_t size = info.size;
|
||||
uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride;
|
||||
RegClass rc = info.rc;
|
||||
|
|
@ -687,12 +683,10 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
|
|||
|
||||
if (stride == 1) {
|
||||
/* best fit algorithm: find the smallest gap to fit in the variable */
|
||||
unsigned best_pos = 0xFFFF;
|
||||
unsigned gap_size = 0xFFFF;
|
||||
PhysRegInterval best_gap { 0xFFFF, 0xFFFF };
|
||||
unsigned last_pos = 0xFFFF;
|
||||
|
||||
for (unsigned current_reg = lb; current_reg < ub; current_reg++) {
|
||||
|
||||
for (unsigned current_reg = bounds.lo(); current_reg < bounds.hi(); current_reg++) {
|
||||
if (reg_file[current_reg] == 0 && !ctx.war_hint[current_reg]) {
|
||||
if (last_pos == 0xFFFF)
|
||||
last_pos = current_reg;
|
||||
|
|
@ -707,62 +701,61 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
|
|||
if (last_pos == 0xFFFF)
|
||||
continue;
|
||||
|
||||
/* Below this line, "current_reg" indicates the first non-free register after last_pos */
|
||||
|
||||
PhysRegInterval gap = PhysRegInterval::from_until(last_pos, current_reg);
|
||||
/* early return on exact matches */
|
||||
if (last_pos + size == current_reg) {
|
||||
if (size == gap.size) {
|
||||
adjust_max_used_regs(ctx, rc, last_pos);
|
||||
return {PhysReg{last_pos}, true};
|
||||
}
|
||||
|
||||
/* check if it fits and the gap size is smaller */
|
||||
if (last_pos + size < current_reg && current_reg - last_pos < gap_size) {
|
||||
best_pos = last_pos;
|
||||
gap_size = current_reg - last_pos;
|
||||
if (size < gap.size && gap.size < best_gap.size) {
|
||||
best_gap = gap;
|
||||
}
|
||||
last_pos = 0xFFFF;
|
||||
}
|
||||
|
||||
/* final check */
|
||||
if (last_pos + size <= ub && ub - last_pos < gap_size) {
|
||||
best_pos = last_pos;
|
||||
gap_size = ub - last_pos;
|
||||
if (last_pos + size <= bounds.hi() && bounds.hi() - last_pos < best_gap.size) {
|
||||
best_gap = { last_pos, bounds.hi() - last_pos };
|
||||
}
|
||||
|
||||
if (best_pos == 0xFFFF)
|
||||
if (best_gap.lo() == 0xFFFF)
|
||||
return {{}, false};
|
||||
|
||||
/* find best position within gap by leaving a good stride for other variables*/
|
||||
unsigned buffer = gap_size - size;
|
||||
unsigned buffer = best_gap.size - size;
|
||||
if (buffer > 1) {
|
||||
if (((best_pos + size) % 8 != 0 && (best_pos + buffer) % 8 == 0) ||
|
||||
((best_pos + size) % 4 != 0 && (best_pos + buffer) % 4 == 0) ||
|
||||
((best_pos + size) % 2 != 0 && (best_pos + buffer) % 2 == 0))
|
||||
best_pos = best_pos + buffer;
|
||||
if (((best_gap.lo() + size) % 8 != 0 && (best_gap.lo() + buffer) % 8 == 0) ||
|
||||
((best_gap.lo() + size) % 4 != 0 && (best_gap.lo() + buffer) % 4 == 0) ||
|
||||
((best_gap.lo() + size) % 2 != 0 && (best_gap.lo() + buffer) % 2 == 0))
|
||||
best_gap = { best_gap.lo() + buffer, best_gap.size - buffer };
|
||||
}
|
||||
|
||||
adjust_max_used_regs(ctx, rc, best_pos);
|
||||
return {PhysReg{best_pos}, true};
|
||||
adjust_max_used_regs(ctx, rc, best_gap.lo());
|
||||
return {PhysReg{best_gap.lo()}, true};
|
||||
}
|
||||
|
||||
bool found = false;
|
||||
unsigned reg_lo = lb;
|
||||
unsigned reg_hi = lb + size - 1;
|
||||
while (!found && reg_lo + size <= ub) {
|
||||
if (reg_file[reg_lo] != 0) {
|
||||
reg_lo += stride;
|
||||
PhysRegInterval reg_win = { bounds.lo(), size };
|
||||
while (!found && reg_win.hi() <= bounds.hi()) {
|
||||
if (reg_file[reg_win.lo()] != 0) {
|
||||
reg_win += stride;
|
||||
continue;
|
||||
}
|
||||
reg_hi = reg_lo + size - 1;
|
||||
found = true;
|
||||
for (unsigned reg = reg_lo + 1; found && reg <= reg_hi; reg++) {
|
||||
for (unsigned reg = reg_win.lo() + 1; found && reg < reg_win.hi(); reg++) {
|
||||
if (reg_file[reg] != 0 || ctx.war_hint[reg])
|
||||
found = false;
|
||||
}
|
||||
if (found) {
|
||||
adjust_max_used_regs(ctx, rc, reg_lo);
|
||||
return {PhysReg{reg_lo}, true};
|
||||
adjust_max_used_regs(ctx, rc, reg_win.lo());
|
||||
return {PhysReg{reg_win.lo()}, true};
|
||||
}
|
||||
|
||||
reg_lo += stride;
|
||||
reg_win += stride;
|
||||
}
|
||||
|
||||
/* do this late because using the upper bytes of a register can require
|
||||
|
|
@ -771,7 +764,7 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
|
|||
if (rc.is_subdword()) {
|
||||
for (std::pair<uint32_t, std::array<uint32_t, 4>> entry : reg_file.subdword_regs) {
|
||||
assert(reg_file[entry.first] == 0xF0000000);
|
||||
if (lb > entry.first || entry.first >= ub)
|
||||
if (bounds.lo() > entry.first || entry.first >= bounds.hi())
|
||||
continue;
|
||||
|
||||
for (unsigned i = 0; i < 4; i+= info.stride) {
|
||||
|
|
@ -830,12 +823,10 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
RegisterFile& reg_file,
|
||||
std::vector<std::pair<Operand, Definition>>& parallelcopies,
|
||||
const std::set<std::pair<unsigned, unsigned>> &vars,
|
||||
uint32_t lb, uint32_t ub,
|
||||
const PhysRegInterval bounds,
|
||||
aco_ptr<Instruction>& instr,
|
||||
uint32_t def_reg_lo,
|
||||
uint32_t def_reg_hi)
|
||||
const PhysRegInterval def_reg)
|
||||
{
|
||||
|
||||
/* variables are sorted from small sized to large */
|
||||
/* NOTE: variables are also sorted by ID. this only affects a very small number of shaders slightly though. */
|
||||
for (std::set<std::pair<unsigned, unsigned>>::const_reverse_iterator it = vars.rbegin(); it != vars.rend(); ++it) {
|
||||
|
|
@ -859,7 +850,7 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
std::pair<PhysReg, bool> res;
|
||||
if (is_dead_operand) {
|
||||
if (instr->opcode == aco_opcode::p_create_vector) {
|
||||
PhysReg reg(def_reg_lo);
|
||||
PhysReg reg(def_reg.lo());
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id) {
|
||||
res = {reg, (!var.rc.is_subdword() || (reg.byte() % info.stride == 0)) && !reg_file.test(reg, var.rc.bytes())};
|
||||
|
|
@ -870,17 +861,16 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
if (!res.second)
|
||||
res = {var.reg, !reg_file.test(var.reg, var.rc.bytes())};
|
||||
} else {
|
||||
info.lb = def_reg_lo;
|
||||
info.ub = def_reg_hi + 1;
|
||||
info.bounds = def_reg;
|
||||
res = get_reg_simple(ctx, reg_file, info);
|
||||
}
|
||||
} else {
|
||||
info.lb = lb;
|
||||
info.ub = MIN2(def_reg_lo, ub);
|
||||
/* Try to find space within the bounds but outside of the definition */
|
||||
info.bounds = PhysRegInterval::from_until(bounds.lo(), MIN2(def_reg.lo(), bounds.hi()));
|
||||
res = get_reg_simple(ctx, reg_file, info);
|
||||
if (!res.second && def_reg_hi < ub) {
|
||||
info.lb = (def_reg_hi + info.stride) & ~(info.stride - 1);
|
||||
info.ub = ub;
|
||||
if (!res.second && def_reg.hi() <= bounds.hi()) {
|
||||
unsigned lo = (def_reg.hi() + info.stride - 1) & ~(info.stride - 1);
|
||||
info.bounds = PhysRegInterval::from_until(lo, bounds.hi());
|
||||
res = get_reg_simple(ctx, reg_file, info);
|
||||
}
|
||||
}
|
||||
|
|
@ -898,17 +888,16 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
continue;
|
||||
}
|
||||
|
||||
unsigned best_pos = lb;
|
||||
unsigned best_pos = bounds.lo();
|
||||
unsigned num_moves = 0xFF;
|
||||
unsigned num_vars = 0;
|
||||
|
||||
/* we use a sliding window to find potential positions */
|
||||
unsigned reg_lo = lb;
|
||||
unsigned reg_hi = lb + size - 1;
|
||||
PhysRegInterval reg_win { bounds.lo(), size };
|
||||
unsigned stride = var.rc.is_subdword() ? 1 : info.stride;
|
||||
for (reg_lo = lb, reg_hi = lb + size - 1; reg_hi < ub; reg_lo += stride, reg_hi += stride) {
|
||||
if (!is_dead_operand && ((reg_lo >= def_reg_lo && reg_lo <= def_reg_hi) ||
|
||||
(reg_hi >= def_reg_lo && reg_hi <= def_reg_hi)))
|
||||
for (; reg_win.hi() <= bounds.hi(); reg_win += stride) {
|
||||
if (!is_dead_operand && ((reg_win.lo() >= def_reg.lo() && reg_win.lo() < def_reg.hi()) ||
|
||||
(reg_win.hi() > def_reg.lo() && reg_win.hi() <= def_reg.hi())))
|
||||
continue;
|
||||
|
||||
/* second, check that we have at most k=num_moves elements in the window
|
||||
|
|
@ -917,7 +906,7 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
unsigned n = 0;
|
||||
unsigned last_var = 0;
|
||||
bool found = true;
|
||||
for (unsigned j = reg_lo; found && j <= reg_hi; j++) {
|
||||
for (unsigned j = reg_win.lo(); found && j < reg_win.hi(); j++) {
|
||||
if (reg_file[j] == 0 || reg_file[j] == last_var)
|
||||
continue;
|
||||
|
||||
|
|
@ -957,7 +946,7 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
}
|
||||
|
||||
if (found) {
|
||||
best_pos = reg_lo;
|
||||
best_pos = reg_win.lo();
|
||||
num_moves = k;
|
||||
num_vars = n;
|
||||
}
|
||||
|
|
@ -967,24 +956,23 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
if (num_moves == 0xFF)
|
||||
return false;
|
||||
|
||||
reg_lo = best_pos;
|
||||
reg_hi = best_pos + size - 1;
|
||||
reg_win = { best_pos, size };
|
||||
|
||||
/* collect variables and block reg file */
|
||||
std::set<std::pair<unsigned, unsigned>> new_vars = collect_vars(ctx, reg_file, PhysReg{reg_lo}, size);
|
||||
std::set<std::pair<unsigned, unsigned>> new_vars = collect_vars(ctx, reg_file, PhysReg{reg_win.lo()}, size);
|
||||
|
||||
/* mark the area as blocked */
|
||||
reg_file.block(PhysReg{reg_lo}, var.rc);
|
||||
adjust_max_used_regs(ctx, var.rc, reg_lo);
|
||||
reg_file.block(PhysReg{reg_win.lo()}, var.rc);
|
||||
adjust_max_used_regs(ctx, var.rc, reg_win.lo());
|
||||
|
||||
if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, lb, ub, instr, def_reg_lo, def_reg_hi))
|
||||
if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, bounds, instr, def_reg))
|
||||
return false;
|
||||
|
||||
/* create parallelcopy pair (without definition id) */
|
||||
Temp tmp = Temp(id, var.rc);
|
||||
Operand pc_op = Operand(tmp);
|
||||
pc_op.setFixed(var.reg);
|
||||
Definition pc_def = Definition(PhysReg{reg_lo}, pc_op.regClass());
|
||||
Definition pc_def = Definition(PhysReg{reg_win.lo()}, pc_op.regClass());
|
||||
parallelcopies.emplace_back(pc_op, pc_def);
|
||||
}
|
||||
|
||||
|
|
@ -995,17 +983,16 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
|||
std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
||||
RegisterFile& reg_file,
|
||||
std::vector<std::pair<Operand, Definition>>& parallelcopies,
|
||||
DefInfo info,
|
||||
const DefInfo& info,
|
||||
aco_ptr<Instruction>& instr)
|
||||
{
|
||||
uint32_t lb = info.lb;
|
||||
uint32_t ub = info.ub;
|
||||
const PhysRegInterval& bounds = info.bounds;
|
||||
uint32_t size = info.size;
|
||||
uint32_t stride = info.stride;
|
||||
RegClass rc = info.rc;
|
||||
|
||||
/* check how many free regs we have */
|
||||
unsigned regs_free = reg_file.count_zero(PhysReg{lb}, ub-lb);
|
||||
unsigned regs_free = reg_file.count_zero(PhysReg{bounds.lo()}, bounds.size);
|
||||
|
||||
/* mark and count killed operands */
|
||||
unsigned killed_ops = 0;
|
||||
|
|
@ -1014,8 +1001,8 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
|||
Operand& op = instr->operands[j];
|
||||
if (op.isTemp() &&
|
||||
op.isFirstKillBeforeDef() &&
|
||||
op.physReg() >= lb &&
|
||||
op.physReg() < ub &&
|
||||
op.physReg() >= bounds.lo() &&
|
||||
op.physReg() < bounds.hi() &&
|
||||
!reg_file.test(PhysReg{op.physReg().reg()}, align(op.bytes() + op.physReg().byte(), 4))) {
|
||||
assert(op.isFixed());
|
||||
|
||||
|
|
@ -1035,22 +1022,20 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
|||
op_moves = size - (regs_free - killed_ops);
|
||||
|
||||
/* find the best position to place the definition */
|
||||
unsigned best_pos = lb;
|
||||
PhysRegInterval best_win = { bounds.lo(), size };
|
||||
unsigned num_moves = 0xFF;
|
||||
unsigned num_vars = 0;
|
||||
|
||||
/* we use a sliding window to check potential positions */
|
||||
unsigned reg_lo = lb;
|
||||
unsigned reg_hi = lb + size - 1;
|
||||
for (reg_lo = lb, reg_hi = lb + size - 1; reg_hi < ub; reg_lo += stride, reg_hi += stride) {
|
||||
for (PhysRegInterval reg_win = { bounds.lo(), size }; reg_win.hi() <= bounds.hi(); reg_win += stride) {
|
||||
/* first check if the register window starts in the middle of an
|
||||
* allocated variable: this is what we have to fix to allow for
|
||||
* num_moves > size */
|
||||
if (reg_lo > lb && !reg_file.is_empty_or_blocked(PhysReg(reg_lo)) &&
|
||||
reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1)))
|
||||
if (reg_win.lo() > bounds.lo() && !reg_file.is_empty_or_blocked(PhysReg(reg_win.lo())) &&
|
||||
reg_file.get_id(PhysReg(reg_win.lo())) == reg_file.get_id(PhysReg(reg_win.lo()).advance(-1)))
|
||||
continue;
|
||||
if (reg_hi < ub - 1 && !reg_file.is_empty_or_blocked(PhysReg(reg_hi).advance(3)) &&
|
||||
reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4)))
|
||||
if (reg_win.hi() < bounds.hi() && !reg_file.is_empty_or_blocked(PhysReg(reg_win.hi()).advance(-1)) &&
|
||||
reg_file.get_id(PhysReg(reg_win.hi()).advance(-1)) == reg_file.get_id(PhysReg(reg_win.hi())))
|
||||
continue;
|
||||
|
||||
/* second, check that we have at most k=num_moves elements in the window
|
||||
|
|
@ -1060,8 +1045,8 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
|||
unsigned remaining_op_moves = op_moves;
|
||||
unsigned last_var = 0;
|
||||
bool found = true;
|
||||
bool aligned = rc == RegClass::v4 && reg_lo % 4 == 0;
|
||||
for (unsigned j = reg_lo; found && j <= reg_hi; j++) {
|
||||
bool aligned = rc == RegClass::v4 && reg_win.lo() % 4 == 0;
|
||||
for (unsigned j = reg_win.lo(); found && j < reg_win.hi(); j++) {
|
||||
/* dead operands effectively reduce the number of estimated moves */
|
||||
if (is_killed_operand[j & 0xFF]) {
|
||||
if (remaining_op_moves) {
|
||||
|
|
@ -1104,7 +1089,7 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
|||
continue;
|
||||
|
||||
if (found) {
|
||||
best_pos = reg_lo;
|
||||
best_win = reg_win;
|
||||
num_moves = k;
|
||||
num_vars = n;
|
||||
}
|
||||
|
|
@ -1115,19 +1100,19 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
|||
|
||||
/* now, we figured the placement for our definition */
|
||||
RegisterFile tmp_file(reg_file);
|
||||
std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, tmp_file, PhysReg{best_pos}, size);
|
||||
std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, tmp_file, PhysReg{best_win.lo()}, size);
|
||||
|
||||
if (instr->opcode == aco_opcode::p_create_vector) {
|
||||
/* move killed operands which aren't yet at the correct position (GFX9+)
|
||||
* or which are in the definition space */
|
||||
PhysReg reg = PhysReg{best_pos};
|
||||
PhysReg reg = PhysReg{best_win.lo()};
|
||||
for (Operand& op : instr->operands) {
|
||||
if (op.isTemp() && op.isFirstKillBeforeDef() &&
|
||||
op.getTemp().type() == rc.type()) {
|
||||
if (op.physReg() != reg &&
|
||||
(ctx.program->chip_class >= GFX9 ||
|
||||
(op.physReg().advance(op.bytes()) > PhysReg{best_pos} &&
|
||||
op.physReg() < PhysReg{best_pos + size}))) {
|
||||
(op.physReg().advance(op.bytes()) > PhysReg{best_win.lo()} &&
|
||||
op.physReg() < PhysReg{best_win.hi()}))) {
|
||||
vars.emplace(op.bytes(), op.tempId());
|
||||
tmp_file.clear(op);
|
||||
} else {
|
||||
|
|
@ -1145,13 +1130,13 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
|
|||
}
|
||||
|
||||
std::vector<std::pair<Operand, Definition>> pc;
|
||||
if (!get_regs_for_copies(ctx, tmp_file, pc, vars, lb, ub, instr, best_pos, best_pos + size - 1))
|
||||
if (!get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, best_win))
|
||||
return {{}, false};
|
||||
|
||||
parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end());
|
||||
|
||||
adjust_max_used_regs(ctx, rc, best_pos);
|
||||
return {PhysReg{best_pos}, true};
|
||||
adjust_max_used_regs(ctx, rc, best_win.lo());
|
||||
return {PhysReg{best_win.lo()}, true};
|
||||
}
|
||||
|
||||
bool get_reg_specified(ra_ctx& ctx,
|
||||
|
|
@ -1307,7 +1292,7 @@ PhysReg get_reg(ra_ctx& ctx,
|
|||
|
||||
/* We should only fail here because keeping under the limit would require
|
||||
* too many moves. */
|
||||
assert(reg_file.count_zero(PhysReg{info.lb}, info.ub-info.lb) >= info.size);
|
||||
assert(reg_file.count_zero(PhysReg{info.bounds.lo()}, info.bounds.size) >= info.size);
|
||||
|
||||
//FIXME: if nothing helps, shift-rotate the registers to make space
|
||||
|
||||
|
|
@ -1326,13 +1311,11 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
|||
uint32_t size = rc.size();
|
||||
uint32_t bytes = rc.bytes();
|
||||
uint32_t stride = 1;
|
||||
uint32_t lb, ub;
|
||||
PhysRegInterval bounds;
|
||||
if (rc.type() == RegType::vgpr) {
|
||||
lb = 256;
|
||||
ub = 256 + ctx.program->max_reg_demand.vgpr;
|
||||
bounds = { 256, (unsigned)ctx.program->max_reg_demand.vgpr };
|
||||
} else {
|
||||
lb = 0;
|
||||
ub = ctx.program->max_reg_demand.sgpr;
|
||||
bounds = { 0, (unsigned)ctx.program->max_reg_demand.sgpr };
|
||||
if (size == 2)
|
||||
stride = 2;
|
||||
else if (size >= 4)
|
||||
|
|
@ -1354,35 +1337,34 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
|||
if (offset > instr->operands[i].physReg().reg_b)
|
||||
continue;
|
||||
|
||||
unsigned reg_lo = instr->operands[i].physReg().reg_b - offset;
|
||||
if (reg_lo % 4)
|
||||
unsigned reg_lower = instr->operands[i].physReg().reg_b - offset;
|
||||
if (reg_lower % 4)
|
||||
continue;
|
||||
reg_lo /= 4;
|
||||
unsigned reg_hi = reg_lo + size - 1;
|
||||
PhysRegInterval reg_win = { reg_lower / 4, size };
|
||||
unsigned k = 0;
|
||||
|
||||
/* no need to check multiple times */
|
||||
if (reg_lo == best_pos)
|
||||
if (reg_win.lo() == best_pos)
|
||||
continue;
|
||||
|
||||
/* check borders */
|
||||
// TODO: this can be improved */
|
||||
if (reg_lo < lb || reg_hi >= ub || reg_lo % stride != 0)
|
||||
if (reg_win.lo() < bounds.lo() || reg_win.hi() > bounds.hi() || reg_win.lo() % stride != 0)
|
||||
continue;
|
||||
if (reg_lo > lb && reg_file[reg_lo] != 0 && reg_file.get_id(PhysReg(reg_lo)) == reg_file.get_id(PhysReg(reg_lo).advance(-1)))
|
||||
if (reg_win.lo() > bounds.lo() && reg_file[reg_win.lo()] != 0 && reg_file.get_id(PhysReg(reg_win.lo())) == reg_file.get_id(PhysReg(reg_win.lo()).advance(-1)))
|
||||
continue;
|
||||
if (reg_hi < ub - 1 && reg_file[reg_hi] != 0 && reg_file.get_id(PhysReg(reg_hi).advance(3)) == reg_file.get_id(PhysReg(reg_hi).advance(4)))
|
||||
if (reg_win.hi() < bounds.hi() && reg_file[reg_win.hi() - 1] != 0 && reg_file.get_id(PhysReg(reg_win.hi()).advance(-1)) == reg_file.get_id(PhysReg(reg_win.hi())))
|
||||
continue;
|
||||
|
||||
/* count variables to be moved and check war_hint */
|
||||
bool war_hint = false;
|
||||
bool linear_vgpr = false;
|
||||
for (unsigned j = reg_lo; j <= reg_hi && !linear_vgpr; j++) {
|
||||
for (unsigned j = reg_win.lo(); j < reg_win.hi() && !linear_vgpr; j++) {
|
||||
if (reg_file[j] != 0) {
|
||||
if (reg_file[j] == 0xF0000000) {
|
||||
PhysReg reg;
|
||||
reg.reg_b = j * 4;
|
||||
unsigned bytes_left = bytes - (j - reg_lo) * 4;
|
||||
unsigned bytes_left = bytes - (j - reg_win.lo()) * 4;
|
||||
for (unsigned byte_idx = 0; byte_idx < MIN2(bytes_left, 4); byte_idx++, reg.reg_b++)
|
||||
k += reg_file.test(reg, 1);
|
||||
} else {
|
||||
|
|
@ -1403,14 +1385,14 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
|||
!instr->operands[j].isTemp() ||
|
||||
instr->operands[j].getTemp().type() != rc.type())
|
||||
continue;
|
||||
if (instr->operands[j].physReg().reg_b != reg_lo * 4 + offset2)
|
||||
if (instr->operands[j].physReg().reg_b != reg_win.lo() * 4 + offset2)
|
||||
k += instr->operands[j].bytes();
|
||||
}
|
||||
bool aligned = rc == RegClass::v4 && reg_lo % 4 == 0;
|
||||
bool aligned = rc == RegClass::v4 && reg_win.lo() % 4 == 0;
|
||||
if (k > num_moves || (!aligned && k == num_moves))
|
||||
continue;
|
||||
|
||||
best_pos = reg_lo;
|
||||
best_pos = reg_win.lo();
|
||||
num_moves = k;
|
||||
best_war_hint = war_hint;
|
||||
}
|
||||
|
|
@ -1449,7 +1431,7 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
|||
}
|
||||
bool success = false;
|
||||
std::vector<std::pair<Operand, Definition>> pc;
|
||||
success = get_regs_for_copies(ctx, tmp_file, pc, vars, lb, ub, instr, best_pos, best_pos + size - 1);
|
||||
success = get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, PhysRegInterval { best_pos, size });
|
||||
|
||||
if (!success) {
|
||||
increase_register_file(ctx, temp.type());
|
||||
|
|
@ -2199,9 +2181,8 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
|
|||
ASSERTED bool success = false;
|
||||
DefInfo info(ctx, instr, definition.regClass(), -1);
|
||||
success = get_regs_for_copies(ctx, tmp_file, parallelcopy,
|
||||
vars, info.lb, info.ub, instr,
|
||||
definition.physReg(),
|
||||
definition.physReg() + definition.size() - 1);
|
||||
vars, info.bounds, instr,
|
||||
PhysRegInterval { definition.physReg(), definition.size() });
|
||||
assert(success);
|
||||
|
||||
update_renames(ctx, register_file, parallelcopy, instr, false);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue