mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-11 20:00:38 +01:00
aco/ra: move variables from affinity register to avoid waitcnt
If we don't use this affinity register, we're likely to end up moving the temporary later. If it's a memory instruction destination, that's probably more expensive than just copying the blocking variables. fossil-db (navi31): Totals from 504 (0.63% of 79825) affected shaders: Instrs: 4108284 -> 4109026 (+0.02%); split: -0.01%, +0.03% CodeSize: 21226764 -> 21229764 (+0.01%); split: -0.01%, +0.02% Latency: 26931635 -> 26806989 (-0.46%); split: -0.47%, +0.00% InvThroughput: 8443520 -> 8439235 (-0.05%); split: -0.06%, +0.01% VClause: 99209 -> 99314 (+0.11%); split: -0.00%, +0.11% SClause: 85089 -> 85085 (-0.00%) Copies: 340323 -> 340993 (+0.20%); split: -0.06%, +0.26% Branches: 117225 -> 117209 (-0.01%); split: -0.02%, +0.00% VALU: 2421859 -> 2422529 (+0.03%); split: -0.01%, +0.04% SALU: 503465 -> 503470 (+0.00%); split: -0.00%, +0.00% fossil-db (navi21): Totals from 582 (0.73% of 79825) affected shaders: Instrs: 3714908 -> 3714990 (+0.00%); split: -0.02%, +0.02% CodeSize: 19977880 -> 19973076 (-0.02%); split: -0.04%, +0.01% VGPRs: 40480 -> 40496 (+0.04%) Latency: 26028895 -> 25772711 (-0.98%); split: -0.99%, +0.00% InvThroughput: 9827389 -> 9818194 (-0.09%); split: -0.10%, +0.01% VClause: 103702 -> 103815 (+0.11%); split: -0.02%, +0.13% SClause: 90861 -> 90857 (-0.00%) Copies: 335276 -> 335992 (+0.21%); split: -0.09%, +0.30% Branches: 123912 -> 123897 (-0.01%); split: -0.02%, +0.00% VALU: 2466032 -> 2466748 (+0.03%); split: -0.01%, +0.04% SALU: 533658 -> 533667 (+0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38262>
This commit is contained in:
parent
681ec4cba7
commit
310f588f92
1 changed files with 62 additions and 11 deletions
|
|
@ -1214,14 +1214,9 @@ find_vars(ra_ctx& ctx, const RegisterFile& reg_file, const PhysRegInterval reg_i
|
|||
return vars;
|
||||
}
|
||||
|
||||
/* collect variables from a register area and clear reg_file
|
||||
* variables are sorted in decreasing size and
|
||||
* increasing assigned register
|
||||
*/
|
||||
std::vector<unsigned>
|
||||
collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interval)
|
||||
void
|
||||
collect_vars(ra_ctx& ctx, RegisterFile& reg_file, std::vector<unsigned>& ids)
|
||||
{
|
||||
std::vector<unsigned> ids = find_vars(ctx, reg_file, reg_interval);
|
||||
std::sort(ids.begin(), ids.end(),
|
||||
[&](unsigned a, unsigned b)
|
||||
{
|
||||
|
|
@ -1235,6 +1230,17 @@ collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_inte
|
|||
assignment& var = ctx.assignments[id];
|
||||
reg_file.clear(var.reg, var.rc);
|
||||
}
|
||||
}
|
||||
|
||||
/* collect variables from a register area and clear reg_file
|
||||
* variables are sorted in decreasing size and
|
||||
* increasing assigned register
|
||||
*/
|
||||
std::vector<unsigned>
|
||||
collect_vars(ra_ctx& ctx, RegisterFile& reg_file, const PhysRegInterval reg_interval)
|
||||
{
|
||||
std::vector<unsigned> ids = find_vars(ctx, reg_file, reg_interval);
|
||||
collect_vars(ctx, reg_file, ids);
|
||||
return ids;
|
||||
}
|
||||
|
||||
|
|
@ -1982,6 +1988,49 @@ should_compact_linear_vgprs(ra_ctx& ctx, const RegisterFile& reg_file)
|
|||
return max_vgpr_usage > get_reg_bounds(ctx, RegType::vgpr, false).size;
|
||||
}
|
||||
|
||||
std::optional<PhysReg>
|
||||
get_reg_affinity(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp,
|
||||
std::vector<parallelcopy>& parallelcopies, aco_ptr<Instruction>& instr,
|
||||
int operand_index, assignment& affinity)
|
||||
{
|
||||
/* check if the target register is blocked */
|
||||
if (operand_index == -1 && reg_file.test(affinity.reg, temp.bytes())) {
|
||||
/* It is cheaper to just assign a different register. */
|
||||
if (ctx.assignments[temp.id()].weight == 0)
|
||||
return {};
|
||||
|
||||
const PhysRegInterval def_regs{PhysReg(affinity.reg.reg()), temp.size()};
|
||||
std::vector<unsigned> vars = find_vars(ctx, reg_file, def_regs);
|
||||
|
||||
/* Bail if the cost of moving the blocking var is likely more expensive
|
||||
* than assigning a different register.
|
||||
*/
|
||||
if (std::any_of(vars.begin(), vars.end(), [&](unsigned id) -> bool
|
||||
{ return ctx.assignments[id].weight >= ctx.assignments[temp.id()].weight; }))
|
||||
return {};
|
||||
|
||||
RegisterFile tmp_file(reg_file);
|
||||
collect_vars(ctx, tmp_file, vars);
|
||||
|
||||
/* re-enable the killed operands, so that we don't move the blocking vars there */
|
||||
if (!is_phi(instr))
|
||||
tmp_file.fill_killed_operands(instr.get());
|
||||
|
||||
/* create parallelcopy to move blocking vars */
|
||||
std::vector<parallelcopy> pc;
|
||||
if (get_reg_specified(ctx, tmp_file, temp.regClass(), instr, affinity.reg, operand_index) &&
|
||||
get_regs_for_copies(ctx, tmp_file, parallelcopies, vars, instr, def_regs)) {
|
||||
parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end());
|
||||
return affinity.reg;
|
||||
}
|
||||
} else if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, affinity.reg,
|
||||
operand_index)) {
|
||||
return affinity.reg;
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
PhysReg
|
||||
get_reg(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp,
|
||||
std::vector<parallelcopy>& parallelcopies, aco_ptr<Instruction>& instr,
|
||||
|
|
@ -2008,11 +2057,15 @@ get_reg(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp,
|
|||
}
|
||||
}
|
||||
|
||||
std::optional<PhysReg> res;
|
||||
|
||||
if (ctx.assignments[temp.id()].affinity) {
|
||||
assignment& affinity = ctx.assignments[ctx.assignments[temp.id()].affinity];
|
||||
if (affinity.assigned) {
|
||||
if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, affinity.reg, operand_index))
|
||||
return affinity.reg;
|
||||
res =
|
||||
get_reg_affinity(ctx, reg_file, temp, parallelcopies, instr, operand_index, affinity);
|
||||
if (res)
|
||||
return *res;
|
||||
}
|
||||
}
|
||||
if (ctx.assignments[temp.id()].precolor_affinity) {
|
||||
|
|
@ -2021,8 +2074,6 @@ get_reg(ra_ctx& ctx, const RegisterFile& reg_file, Temp temp,
|
|||
return ctx.assignments[temp.id()].reg;
|
||||
}
|
||||
|
||||
std::optional<PhysReg> res;
|
||||
|
||||
if (ctx.vectors.find(temp.id()) != ctx.vectors.end()) {
|
||||
res = get_reg_vector(ctx, reg_file, temp, instr, operand_index);
|
||||
if (res)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue