mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 08:40:11 +01:00
aco: end reduce tmp after control flow, when used within control flow
In the case of:
v0 = start_linear_vgpr
if (...) {
} else {
use_linear_vgpr(v0)
}
v0 = phi
We need a p_end_linear_vgpr to ensure that the phi does not use the same
VGPR as the linear VGPR.
fossil-db (gfx1100):
Totals from 3763 (2.80% of 134574) affected shaders:
MaxWaves: 90296 -> 90164 (-0.15%)
Instrs: 6857726 -> 6856608 (-0.02%); split: -0.03%, +0.01%
CodeSize: 35382188 -> 35377688 (-0.01%); split: -0.02%, +0.01%
VGPRs: 234864 -> 235692 (+0.35%); split: -0.01%, +0.36%
Latency: 47471923 -> 47474965 (+0.01%); split: -0.03%, +0.04%
InvThroughput: 5640320 -> 5639736 (-0.01%); split: -0.04%, +0.03%
VClause: 93098 -> 93107 (+0.01%); split: -0.01%, +0.02%
SClause: 214137 -> 214130 (-0.00%); split: -0.00%, +0.00%
Copies: 369895 -> 369305 (-0.16%); split: -0.31%, +0.15%
Branches: 164996 -> 164504 (-0.30%); split: -0.30%, +0.00%
PreVGPRs: 210655 -> 211438 (+0.37%)
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20621>
(cherry picked from commit 44fdd2ebcb)
This commit is contained in:
parent
1cafb71a19
commit
7c53e5748b
2 changed files with 25 additions and 22 deletions
|
|
@ -265,7 +265,7 @@
|
|||
"description": "aco: end reduce tmp after control flow, when used within control flow",
|
||||
"nominated": true,
|
||||
"nomination_type": 0,
|
||||
"resolution": 0,
|
||||
"resolution": 1,
|
||||
"main_sha": null,
|
||||
"because_sha": null
|
||||
},
|
||||
|
|
|
|||
|
|
@ -64,31 +64,36 @@ setup_reduce_temp(Program* program)
|
|||
Temp vtmp(0, RegClass(RegType::vgpr, maxSize).as_linear());
|
||||
int inserted_at = -1;
|
||||
int vtmp_inserted_at = -1;
|
||||
bool reduceTmp_in_loop = false;
|
||||
bool vtmp_in_loop = false;
|
||||
|
||||
for (Block& block : program->blocks) {
|
||||
|
||||
/* insert p_end_linear_vgpr after the outermost loop */
|
||||
if (reduceTmp_in_loop && block.loop_nest_depth == 0) {
|
||||
assert(inserted_at == (int)last_top_level_block_idx);
|
||||
|
||||
aco_ptr<Instruction> end{create_instruction<Instruction>(
|
||||
aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_in_loop ? 2 : 1, 0)};
|
||||
end->operands[0] = Operand(reduceTmp);
|
||||
if (vtmp_in_loop)
|
||||
end->operands[1] = Operand(vtmp);
|
||||
/* insert after the phis of the loop exit block */
|
||||
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
|
||||
while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
|
||||
++it;
|
||||
block.instructions.insert(it, std::move(end));
|
||||
reduceTmp_in_loop = false;
|
||||
}
|
||||
|
||||
if (block.kind & block_kind_top_level)
|
||||
if (block.kind & block_kind_top_level) {
|
||||
last_top_level_block_idx = block.index;
|
||||
|
||||
/* TODO: this could be improved in this case:
|
||||
* start_linear_vgpr
|
||||
* if (...) {
|
||||
* use_linear_vgpr
|
||||
* }
|
||||
* end_linear_vgpr
|
||||
* Here, the linear vgpr is used before any phi copies, so this isn't necessary.
|
||||
*/
|
||||
if (inserted_at >= 0) {
|
||||
aco_ptr<Instruction> end{create_instruction<Instruction>(
|
||||
aco_opcode::p_end_linear_vgpr, Format::PSEUDO, vtmp_inserted_at >= 0 ? 2 : 1, 0)};
|
||||
end->operands[0] = Operand(reduceTmp);
|
||||
if (vtmp_inserted_at >= 0)
|
||||
end->operands[1] = Operand(vtmp);
|
||||
/* insert after the phis of the block */
|
||||
std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.begin();
|
||||
while ((*it)->opcode == aco_opcode::p_linear_phi || (*it)->opcode == aco_opcode::p_phi)
|
||||
++it;
|
||||
block.instructions.insert(it, std::move(end));
|
||||
inserted_at = vtmp_inserted_at = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasReductions[block.index])
|
||||
continue;
|
||||
|
||||
|
|
@ -100,8 +105,6 @@ setup_reduce_temp(Program* program)
|
|||
instr->opcode != aco_opcode::p_bpermute_gfx11w64)
|
||||
continue;
|
||||
|
||||
reduceTmp_in_loop |= block.loop_nest_depth > 0;
|
||||
|
||||
if ((int)last_top_level_block_idx != inserted_at) {
|
||||
reduceTmp = program->allocateTmp(reduceTmp.regClass());
|
||||
aco_ptr<Pseudo_instruction> create{create_instruction<Pseudo_instruction>(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue