mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-03 18:38:11 +02:00
aco/ra: count constant moves in get_reg_create_vector()
Also implements a correct_pos_mask to keep track which operands are already at the right target position. To mitigate some regressions, call get_reg_impl() less often. Totals from 1229 (0.91% of 134913) affected shaders: (GFX10.3) VGPRs: 60216 -> 59848 (-0.61%) CodeSize: 3716496 -> 3711268 (-0.14%); split: -0.19%, +0.05% MaxWaves: 27952 -> 28004 (+0.19%) Instrs: 685983 -> 685035 (-0.14%); split: -0.20%, +0.06% Latency: 6727587 -> 6725340 (-0.03%); split: -0.06%, +0.02% InvThroughput: 9289043 -> 9289866 (+0.01%); split: -0.02%, +0.03% VClause: 17730 -> 17740 (+0.06%); split: -0.25%, +0.30% Copies: 54352 -> 53420 (-1.71%); split: -2.46%, +0.75% Branches: 12122 -> 12121 (-0.01%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11526>
This commit is contained in:
parent
7ad1eb4e8c
commit
9181e8ceba
1 changed files with 24 additions and 15 deletions
|
|
@ -1680,6 +1680,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
|
|||
PhysReg best_pos{0xFFF};
|
||||
unsigned num_moves = 0xFF;
|
||||
bool best_avoid = true;
|
||||
uint32_t correct_pos_mask = 0;
|
||||
|
||||
/* test for each operand which definition placement causes the least shuffle instructions */
|
||||
for (unsigned i = 0, offset = 0; i < instr->operands.size();
|
||||
|
|
@ -1744,13 +1745,14 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
|
|||
continue;
|
||||
|
||||
/* count operands in wrong positions */
|
||||
uint32_t correct_pos_mask_new = 0;
|
||||
for (unsigned j = 0, offset2 = 0; j < instr->operands.size();
|
||||
offset2 += instr->operands[j].bytes(), j++) {
|
||||
if (j == i || !instr->operands[j].isTemp() ||
|
||||
instr->operands[j].getTemp().type() != rc.type())
|
||||
continue;
|
||||
if (instr->operands[j].physReg().reg_b != reg_win.lo() * 4 + offset2)
|
||||
k += instr->operands[j].bytes();
|
||||
Operand& op = instr->operands[j];
|
||||
if (op.isTemp() && op.physReg().reg_b == reg_win.lo() * 4 + offset2)
|
||||
correct_pos_mask_new |= 1 << j;
|
||||
else
|
||||
k += op.bytes();
|
||||
}
|
||||
bool aligned = rc == RegClass::v4 && reg_win.lo() % 4 == 0;
|
||||
if (k > num_moves || (!aligned && k == num_moves))
|
||||
|
|
@ -1759,18 +1761,28 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
|
|||
best_pos = reg_win.lo();
|
||||
num_moves = k;
|
||||
best_avoid = avoid;
|
||||
correct_pos_mask = correct_pos_mask_new;
|
||||
}
|
||||
|
||||
if (num_moves >= bytes)
|
||||
/* too many moves: try the generic get_reg() function */
|
||||
if (num_moves >= 2 * bytes) {
|
||||
return get_reg(ctx, reg_file, temp, parallelcopies, instr);
|
||||
} else if (num_moves > bytes) {
|
||||
DefInfo info(ctx, instr, rc, -1);
|
||||
std::pair<PhysReg, bool> res = get_reg_simple(ctx, reg_file, info);
|
||||
if (res.second)
|
||||
return res.first;
|
||||
}
|
||||
|
||||
/* re-enable killed operands which are in the wrong position */
|
||||
RegisterFile tmp_file(reg_file);
|
||||
for (unsigned i = 0, offset = 0; i < instr->operands.size();
|
||||
offset += instr->operands[i].bytes(), i++) {
|
||||
if (instr->operands[i].isTemp() && instr->operands[i].isFirstKillBeforeDef() &&
|
||||
instr->operands[i].physReg().reg_b != best_pos.reg_b + offset)
|
||||
tmp_file.fill(instr->operands[i]);
|
||||
for (Operand& op : instr->operands) {
|
||||
if (op.isTemp() && op.isFirstKillBeforeDef())
|
||||
tmp_file.fill(op);
|
||||
}
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
if ((correct_pos_mask >> i) & 1u && instr->operands[i].isKill())
|
||||
tmp_file.clear(instr->operands[i]);
|
||||
}
|
||||
|
||||
/* collect variables to be moved */
|
||||
|
|
@ -1782,7 +1794,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
|
|||
if (!instr->operands[i].isTemp() || !instr->operands[i].isFirstKillBeforeDef() ||
|
||||
instr->operands[i].getTemp().type() != rc.type())
|
||||
continue;
|
||||
bool correct_pos = instr->operands[i].physReg().reg_b == best_pos.reg_b + offset;
|
||||
bool correct_pos = !tmp_file.test(instr->operands[i].physReg(), instr->operands[i].bytes());
|
||||
/* GFX9+: move killed operands which aren't yet at the correct position
|
||||
* Moving all killed operands generally leads to more register swaps.
|
||||
* This is only done on GFX9+ because of the cheap v_swap instruction.
|
||||
|
|
@ -1790,9 +1802,6 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
|
|||
if (ctx.program->chip_class >= GFX9 && !correct_pos) {
|
||||
vars.emplace(instr->operands[i].bytes(), instr->operands[i].tempId());
|
||||
tmp_file.clear(instr->operands[i]);
|
||||
/* fill operands which are in the correct position to avoid overwriting */
|
||||
} else if (correct_pos) {
|
||||
tmp_file.fill(instr->operands[i]);
|
||||
}
|
||||
}
|
||||
bool success = false;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue