mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-22 22:10:10 +01:00
aco: improve p_create_vector RA for sub-dword operands
These's still improvements needed for sub-dword definitions, but that's not as simple. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4507>
This commit is contained in:
parent
e18711cda3
commit
52cc1f8237
1 changed files with 32 additions and 17 deletions
|
|
@ -481,11 +481,12 @@ bool get_regs_for_copies(ra_ctx& ctx,
|
||||||
std::pair<PhysReg, bool> res;
|
std::pair<PhysReg, bool> res;
|
||||||
if (is_dead_operand) {
|
if (is_dead_operand) {
|
||||||
if (instr->opcode == aco_opcode::p_create_vector) {
|
if (instr->opcode == aco_opcode::p_create_vector) {
|
||||||
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].size(), i++) {
|
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].bytes(), i++) {
|
||||||
if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id) {
|
if (instr->operands[i].isTemp() && instr->operands[i].tempId() == id) {
|
||||||
for (unsigned j = 0; j < size; j++)
|
PhysReg reg(def_reg_lo);
|
||||||
assert(reg_file[def_reg_lo + offset + j] == 0);
|
reg.reg_b += offset;
|
||||||
res = {PhysReg{def_reg_lo + offset}, true};
|
assert(!reg_file.test(reg, var.rc.bytes()));
|
||||||
|
res = {reg, true};
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -893,6 +894,7 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
||||||
{
|
{
|
||||||
/* create_vector instructions have different costs w.r.t. register coalescing */
|
/* create_vector instructions have different costs w.r.t. register coalescing */
|
||||||
uint32_t size = rc.size();
|
uint32_t size = rc.size();
|
||||||
|
uint32_t bytes = rc.bytes();
|
||||||
uint32_t stride = 1;
|
uint32_t stride = 1;
|
||||||
uint32_t lb, ub;
|
uint32_t lb, ub;
|
||||||
if (rc.type() == RegType::vgpr) {
|
if (rc.type() == RegType::vgpr) {
|
||||||
|
|
@ -907,20 +909,25 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
||||||
stride = 4;
|
stride = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//TODO: improve p_create_vector for sub-dword vectors
|
||||||
|
|
||||||
unsigned best_pos = -1;
|
unsigned best_pos = -1;
|
||||||
unsigned num_moves = 0xFF;
|
unsigned num_moves = 0xFF;
|
||||||
bool best_war_hint = true;
|
bool best_war_hint = true;
|
||||||
|
|
||||||
/* test for each operand which definition placement causes the least shuffle instructions */
|
/* test for each operand which definition placement causes the least shuffle instructions */
|
||||||
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].size(), i++) {
|
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].bytes(), i++) {
|
||||||
// TODO: think about, if we can alias live operands on the same register
|
// TODO: think about, if we can alias live operands on the same register
|
||||||
if (!instr->operands[i].isTemp() || !instr->operands[i].isKillBeforeDef() || instr->operands[i].getTemp().type() != rc.type())
|
if (!instr->operands[i].isTemp() || !instr->operands[i].isKillBeforeDef() || instr->operands[i].getTemp().type() != rc.type())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (offset > instr->operands[i].physReg())
|
if (offset > instr->operands[i].physReg().reg_b)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
unsigned reg_lo = instr->operands[i].physReg() - offset;
|
unsigned reg_lo = instr->operands[i].physReg().reg_b - offset;
|
||||||
|
if (reg_lo % 4)
|
||||||
|
continue;
|
||||||
|
reg_lo /= 4;
|
||||||
unsigned reg_hi = reg_lo + size - 1;
|
unsigned reg_hi = reg_lo + size - 1;
|
||||||
unsigned k = 0;
|
unsigned k = 0;
|
||||||
|
|
||||||
|
|
@ -942,24 +949,32 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
||||||
bool linear_vgpr = false;
|
bool linear_vgpr = false;
|
||||||
for (unsigned j = reg_lo; j <= reg_hi && !linear_vgpr; j++) {
|
for (unsigned j = reg_lo; j <= reg_hi && !linear_vgpr; j++) {
|
||||||
if (reg_file[j] != 0) {
|
if (reg_file[j] != 0) {
|
||||||
k++;
|
if (reg_file[j] == 0xF0000000) {
|
||||||
|
PhysReg reg;
|
||||||
|
reg.reg_b = j * 4;
|
||||||
|
unsigned bytes_left = bytes - (j - reg_lo) * 4;
|
||||||
|
for (unsigned k = 0; k < MIN2(bytes_left, 4); k++, reg.reg_b++)
|
||||||
|
k += reg_file.test(reg, 1);
|
||||||
|
} else {
|
||||||
|
k += 4;
|
||||||
/* we cannot split live ranges of linear vgprs */
|
/* we cannot split live ranges of linear vgprs */
|
||||||
if (ctx.assignments[reg_file[j]].rc & (1 << 6))
|
if (ctx.assignments[reg_file[j]].rc & (1 << 6))
|
||||||
linear_vgpr = true;
|
linear_vgpr = true;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
war_hint |= ctx.war_hint[j];
|
war_hint |= ctx.war_hint[j];
|
||||||
}
|
}
|
||||||
if (linear_vgpr || (war_hint && !best_war_hint))
|
if (linear_vgpr || (war_hint && !best_war_hint))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* count operands in wrong positions */
|
/* count operands in wrong positions */
|
||||||
for (unsigned j = 0, offset = 0; j < instr->operands.size(); offset += instr->operands[j].size(), j++) {
|
for (unsigned j = 0, offset = 0; j < instr->operands.size(); offset += instr->operands[j].bytes(), j++) {
|
||||||
if (j == i ||
|
if (j == i ||
|
||||||
!instr->operands[j].isTemp() ||
|
!instr->operands[j].isTemp() ||
|
||||||
instr->operands[j].getTemp().type() != rc.type())
|
instr->operands[j].getTemp().type() != rc.type())
|
||||||
continue;
|
continue;
|
||||||
if (instr->operands[j].physReg() != reg_lo + offset)
|
if (instr->operands[j].physReg().reg_b != reg_lo * 4 + offset)
|
||||||
k += instr->operands[j].size();
|
k += instr->operands[j].bytes();
|
||||||
}
|
}
|
||||||
bool aligned = rc == RegClass::v4 && reg_lo % 4 == 0;
|
bool aligned = rc == RegClass::v4 && reg_lo % 4 == 0;
|
||||||
if (k > num_moves || (!aligned && k == num_moves))
|
if (k > num_moves || (!aligned && k == num_moves))
|
||||||
|
|
@ -970,18 +985,18 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
|
||||||
best_war_hint = war_hint;
|
best_war_hint = war_hint;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_moves >= size)
|
if (num_moves >= bytes)
|
||||||
return get_reg(ctx, reg_file, rc, parallelcopies, instr);
|
return get_reg(ctx, reg_file, rc, parallelcopies, instr);
|
||||||
|
|
||||||
/* collect variables to be moved */
|
/* collect variables to be moved */
|
||||||
std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, reg_file, PhysReg{best_pos}, size);
|
std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, reg_file, PhysReg{best_pos}, size);
|
||||||
|
|
||||||
/* move killed operands which aren't yet at the correct position */
|
/* move killed operands which aren't yet at the correct position */
|
||||||
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].size(), i++) {
|
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].bytes(), i++) {
|
||||||
if (instr->operands[i].isTemp() &&
|
if (instr->operands[i].isTemp() &&
|
||||||
instr->operands[i].isFirstKillBeforeDef() &&
|
instr->operands[i].isFirstKillBeforeDef() &&
|
||||||
instr->operands[i].getTemp().type() == rc.type() &&
|
instr->operands[i].getTemp().type() == rc.type() &&
|
||||||
instr->operands[i].physReg() != best_pos + offset)
|
instr->operands[i].physReg().reg_b != best_pos * 4 + offset)
|
||||||
vars.emplace(instr->operands[i].bytes(), instr->operands[i].tempId());
|
vars.emplace(instr->operands[i].bytes(), instr->operands[i].tempId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue