mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 15:50:11 +01:00
aco/cssa: don't create parallelcopies for constants and exec
if we are able to spill these directly. Totals from 4913 (3.60% of 136546) affected shaders (Raven): SpillSGPRs: 16021 -> 15451 (-3.56%); split: -3.87%, +0.31% CodeSize: 58102020 -> 57371464 (-1.26%); split: -1.26%, +0.00% Instrs: 11411454 -> 11230105 (-1.59%); split: -1.59%, +0.00% Latency: 555706331 -> 550058635 (-1.02%); split: -1.07%, +0.05% InvThroughput: 273023354 -> 271854469 (-0.43%); split: -0.44%, +0.01% SClause: 385168 -> 385371 (+0.05%); split: -0.01%, +0.06% Copies: 1342084 -> 1175762 (-12.39%); split: -12.40%, +0.01% Branches: 392619 -> 378662 (-3.55%); split: -3.56%, +0.00% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9196>
This commit is contained in:
parent
18ba93e673
commit
b6a28aaa8b
1 changed files with 26 additions and 1 deletions
|
|
@ -78,10 +78,17 @@ void collect_parallelcopies(cssa_ctx& ctx)
|
||||||
phi->opcode != aco_opcode::p_linear_phi)
|
phi->opcode != aco_opcode::p_linear_phi)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
const Definition& def = phi->definitions[0];
|
||||||
|
|
||||||
|
/* if the definition is not temp, it is the exec mask.
|
||||||
|
* We can reload the exec mask directly from the spill slot.
|
||||||
|
*/
|
||||||
|
if (!def.isTemp())
|
||||||
|
continue;
|
||||||
|
|
||||||
std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ?
|
std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ?
|
||||||
block.logical_preds :
|
block.logical_preds :
|
||||||
block.linear_preds;
|
block.linear_preds;
|
||||||
const Definition& def = phi->definitions[0];
|
|
||||||
uint32_t index = ctx.merge_sets.size();
|
uint32_t index = ctx.merge_sets.size();
|
||||||
merge_set set;
|
merge_set set;
|
||||||
|
|
||||||
|
|
@ -91,6 +98,20 @@ void collect_parallelcopies(cssa_ctx& ctx)
|
||||||
if (op.isUndefined())
|
if (op.isUndefined())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (def.regClass().type() == RegType::sgpr && !op.isTemp()) {
|
||||||
|
/* SGPR inline constants and literals on GFX10+ can be spilled
|
||||||
|
* and reloaded directly (without intermediate register) */
|
||||||
|
if (op.isConstant()) {
|
||||||
|
if (ctx.program->chip_class >= GFX10)
|
||||||
|
continue;
|
||||||
|
if (op.size() == 1 && !op.isLiteral())
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
assert(op.isFixed() && op.physReg() == exec);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* create new temporary and rename operands */
|
/* create new temporary and rename operands */
|
||||||
Temp tmp = bld.tmp(def.regClass());
|
Temp tmp = bld.tmp(def.regClass());
|
||||||
ctx.parallelcopies[preds[i]].emplace_back(copy{Definition(tmp), op});
|
ctx.parallelcopies[preds[i]].emplace_back(copy{Definition(tmp), op});
|
||||||
|
|
@ -107,6 +128,10 @@ void collect_parallelcopies(cssa_ctx& ctx)
|
||||||
|
|
||||||
has_preheader_copy |= i == 0 && block.kind & block_kind_loop_header;
|
has_preheader_copy |= i == 0 && block.kind & block_kind_loop_header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (set.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
/* place the definition in dominance-order */
|
/* place the definition in dominance-order */
|
||||||
if (def.isTemp()) {
|
if (def.isTemp()) {
|
||||||
if (has_preheader_copy)
|
if (has_preheader_copy)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue