aco/ra: refactor collect_vars() to return a sorted vector

The vector of IDs is sorted with decreasing sizes,
and by increasing assigned registers.
This decouples register assingment from ssa IDs.

Totals from 12694 (9.41% of 134913) affected shaders: (GFX10.3)
VGPRs: 757864 -> 757848 (-0.00%); split: -0.00%, +0.00%
CodeSize: 72350540 -> 72348688 (-0.00%); split: -0.02%, +0.02%
MaxWaves: 237018 -> 237020 (+0.00%); split: +0.00%, -0.00%
Instrs: 13545494 -> 13544699 (-0.01%); split: -0.03%, +0.02%
Latency: 148539203 -> 148533292 (-0.00%); split: -0.01%, +0.00%
InvThroughput: 30319086 -> 30320382 (+0.00%); split: -0.01%, +0.01%
VClause: 326875 -> 327028 (+0.05%); split: -0.05%, +0.09%
SClause: 479833 -> 479837 (+0.00%); split: -0.00%, +0.00%
Copies: 862152 -> 860914 (-0.14%); split: -0.43%, +0.28%
Branches: 317775 -> 317777 (+0.00%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11526>
This commit is contained in:
Daniel Schürmann 2021-06-18 17:51:15 +02:00 committed by Marge Bot
parent 61c36b6dc0
commit 70aea6b41a
2 changed files with 25 additions and 25 deletions

View file

@ -1006,19 +1006,28 @@ find_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interva
return vars;
}
/* collect variables from a register area and clear reg_file */
std::set<std::pair<unsigned, unsigned>>
/* collect variables from a register area and clear reg_file
* variables are sorted in decreasing size and
* increasing assigned register
*/
std::vector<unsigned>
collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interval)
{
std::vector<unsigned> ids = find_vars(ctx, reg_file, reg_interval);
std::set<std::pair<unsigned, unsigned>> vars;
std::sort(ids.begin(), ids.end(),
[&](unsigned a, unsigned b)
{
assignment& var_a = ctx.assignments[a];
assignment& var_b = ctx.assignments[b];
return var_a.rc.bytes() > var_b.rc.bytes() ||
(var_a.rc.bytes() == var_b.rc.bytes() && var_a.reg < var_b.reg);
});
for (unsigned id : ids) {
assignment& var = ctx.assignments[id];
reg_file.clear(var.reg, var.rc);
vars.emplace(var.rc.bytes(), id);
}
return vars;
return ids;
}
std::pair<PhysReg, bool>
@ -1073,17 +1082,11 @@ get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
bool
get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
std::vector<std::pair<Operand, Definition>>& parallelcopies,
const std::set<std::pair<unsigned, unsigned>>& vars,
const PhysRegInterval bounds, aco_ptr<Instruction>& instr,
const PhysRegInterval def_reg)
const std::vector<unsigned>& vars, const PhysRegInterval bounds,
aco_ptr<Instruction>& instr, const PhysRegInterval def_reg)
{
/* variables are sorted from small sized to large */
/* NOTE: variables are also sorted by ID. this only affects a very small number of shaders
* slightly though. */
// TODO: sort by register instead of id
for (std::set<std::pair<unsigned, unsigned>>::const_reverse_iterator it = vars.rbegin();
it != vars.rend(); ++it) {
unsigned id = it->second;
/* Variables are sorted from large to small and with increasing assigned register */
for (unsigned id : vars) {
assignment& var = ctx.assignments[id];
DefInfo info = DefInfo(ctx, ctx.pseudo_dummy, var.rc, -1);
uint32_t size = info.size;
@ -1203,7 +1206,7 @@ get_regs_for_copies(ra_ctx& ctx, RegisterFile& reg_file,
PhysRegInterval reg_win{best_pos, size};
/* collect variables and block reg file */
std::set<std::pair<unsigned, unsigned>> new_vars = collect_vars(ctx, reg_file, reg_win);
std::vector<unsigned> new_vars = collect_vars(ctx, reg_file, reg_win);
/* mark the area as blocked */
reg_file.block(reg_win.lo(), var.rc);
@ -1351,7 +1354,7 @@ get_reg_impl(ra_ctx& ctx, RegisterFile& reg_file,
}
}
std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, tmp_file, best_win);
std::vector<unsigned> vars = collect_vars(ctx, tmp_file, best_win);
/* re-enable killed operands */
if (!is_phi(instr) && instr->opcode != aco_opcode::p_create_vector) {
@ -1820,8 +1823,7 @@ get_reg_create_vector(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
}
/* collect variables to be moved */
std::set<std::pair<unsigned, unsigned>> vars =
collect_vars(ctx, tmp_file, PhysRegInterval{best_pos, size});
std::vector<unsigned> vars = collect_vars(ctx, tmp_file, PhysRegInterval{best_pos, size});
bool success = false;
std::vector<std::pair<Operand, Definition>> pc;
@ -1952,8 +1954,7 @@ get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
RegisterFile tmp_file(register_file);
std::set<std::pair<unsigned, unsigned>> blocking_vars =
collect_vars(ctx, tmp_file, target);
std::vector<unsigned> blocking_vars = collect_vars(ctx, tmp_file, target);
tmp_file.clear(src, operand.regClass()); // TODO: try to avoid moving block vars to src
tmp_file.block(operand.physReg(), operand.regClass());
@ -2734,8 +2735,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
const PhysRegInterval def_regs{definition.physReg(), definition.size()};
/* create parallelcopy pair to move blocking vars */
std::set<std::pair<unsigned, unsigned>> vars =
collect_vars(ctx, register_file, def_regs);
std::vector<unsigned> vars = collect_vars(ctx, register_file, def_regs);
RegisterFile tmp_file(register_file);
/* re-enable the killed operands, so that we don't move the blocking vars there */

View file

@ -136,13 +136,13 @@ BEGIN_TEST(regalloc.precolor.vector.collect)
if (!setup_cs("s2 s1 s1", GFX10))
return;
//! s1: %tmp2_2:s[0], s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp1:s[2], %tmp0:s[0-1]
//! s1: %tmp1_2:s[0], s1: %tmp2_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp1:s[2], %tmp2:s[3], %tmp0:s[0-1]
//! p_unit_test %tmp0_2:s[2-3]
Operand op(inputs[0]);
op.setFixed(PhysReg(2));
bld.pseudo(aco_opcode::p_unit_test, op);
//! p_unit_test %tmp1_2:s[1], %tmp2_2:s[0]
//! p_unit_test %tmp1_2:s[0], %tmp2_2:s[1]
bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);
finish_ra_test(ra_test_policy());