mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 06:58:05 +02:00
aco: relax condition to remove branches in case of few instructions
This patch relaxes the conditions under which we remove branch instructions. Totals from 27246 (20.20% of 134913) affected shaders: (GFX10.3) CodeSize: 193413312 -> 192924928 (-0.25%) Instrs: 36146788 -> 36024692 (-0.34%) Latency: 528374112 -> 528469044 (+0.02%); split: -0.01%, +0.02% InvThroughput: 106198759 -> 106216583 (+0.02%); split: -0.00%, +0.02% Branches: 1040640 -> 918543 (-11.73%) Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8647>
This commit is contained in:
parent
53ca85ac2a
commit
f030b75b7d
1 changed files with 55 additions and 12 deletions
|
|
@ -2196,35 +2196,77 @@ lower_to_hw_instr(Program* program)
|
||||||
}
|
}
|
||||||
} else if (instr->isBranch()) {
|
} else if (instr->isBranch()) {
|
||||||
Pseudo_branch_instruction* branch = &instr->branch();
|
Pseudo_branch_instruction* branch = &instr->branch();
|
||||||
uint32_t target = branch->target[0];
|
const uint32_t target = branch->target[0];
|
||||||
|
const bool uniform_branch = !(branch->opcode == aco_opcode::p_cbranch_z &&
|
||||||
|
branch->operands[0].physReg() == exec);
|
||||||
|
|
||||||
/* check if all blocks from current to target are empty */
|
/* Check if the branch instruction can be removed.
|
||||||
/* In case there are <= 4 SALU or <= 2 VALU instructions, remove the branch */
|
* This is beneficial when executing the next block with an empty exec mask
|
||||||
|
* is faster than the branch instruction itself.
|
||||||
|
*/
|
||||||
bool can_remove = block->index < target;
|
bool can_remove = block->index < target;
|
||||||
unsigned num_scalar = 0;
|
unsigned num_scalar = 0;
|
||||||
unsigned num_vector = 0;
|
unsigned num_vector = 0;
|
||||||
for (unsigned i = block->index + 1; can_remove && i < branch->target[0]; i++) {
|
bool has_sopp = false;
|
||||||
/* uniform branches must not be ignored if they
|
|
||||||
|
/* Check the instructions between branch and target */
|
||||||
|
for (unsigned i = block->index + 1; i < branch->target[0]; i++) {
|
||||||
|
/* Uniform conditional branches must not be ignored if they
|
||||||
* are about to jump over actual instructions */
|
* are about to jump over actual instructions */
|
||||||
if (!program->blocks[i].instructions.empty() &&
|
if (uniform_branch && !program->blocks[i].instructions.empty())
|
||||||
(branch->opcode != aco_opcode::p_cbranch_z ||
|
|
||||||
branch->operands[0].physReg() != exec)) {
|
|
||||||
can_remove = false;
|
can_remove = false;
|
||||||
|
|
||||||
|
if (!can_remove)
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
|
|
||||||
for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) {
|
for (aco_ptr<Instruction>& inst : program->blocks[i].instructions) {
|
||||||
if (inst->isSOPP()) {
|
if (inst->isSOPP()) {
|
||||||
can_remove = false;
|
/* we allow at most one inner branch */
|
||||||
|
if (has_sopp)
|
||||||
|
can_remove = false;
|
||||||
|
|
||||||
|
/* These instructions must conditionally be jumped over */
|
||||||
|
if (inst->opcode == aco_opcode::s_endpgm ||
|
||||||
|
inst->opcode == aco_opcode::s_sendmsg ||
|
||||||
|
inst->opcode == aco_opcode::s_sendmsghalt ||
|
||||||
|
inst->opcode == aco_opcode::s_trap ||
|
||||||
|
inst->opcode == aco_opcode::s_barrier)
|
||||||
|
can_remove = false;
|
||||||
|
|
||||||
|
has_sopp = true;
|
||||||
} else if (inst->isSALU()) {
|
} else if (inst->isSALU()) {
|
||||||
num_scalar++;
|
num_scalar++;
|
||||||
} else if (inst->isVALU()) {
|
} else if (inst->isVALU() || inst->isVINTRP()) {
|
||||||
num_vector++;
|
num_vector++;
|
||||||
|
/* VALU which writes SGPRs are always executed on GFX10+ */
|
||||||
|
if (ctx.program->chip_class >= GFX10) {
|
||||||
|
for (Definition& def : inst->definitions) {
|
||||||
|
if (def.regClass().type() == RegType::sgpr)
|
||||||
|
num_scalar++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (inst->isVMEM() || inst->isFlatLike() || inst->isDS() ||
|
||||||
|
inst->isEXP()) {
|
||||||
|
// TODO: GFX6-9 can use vskip
|
||||||
|
can_remove = false;
|
||||||
|
} else if (inst->isSMEM()) {
|
||||||
|
/* SMEM are at least as expensive as branches */
|
||||||
|
can_remove = false;
|
||||||
|
} else if (inst->isBarrier()) {
|
||||||
|
can_remove = false;
|
||||||
} else {
|
} else {
|
||||||
can_remove = false;
|
can_remove = false;
|
||||||
|
assert(false && "Pseudo instructions should be lowered by this point.");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_scalar + num_vector * 2 > 4)
|
/* Under these conditions, we shouldn't remove the branch */
|
||||||
|
unsigned est_cycles;
|
||||||
|
if (ctx.program->chip_class >= GFX10)
|
||||||
|
est_cycles = num_scalar * 2 + num_vector;
|
||||||
|
else
|
||||||
|
est_cycles = num_scalar * 4 + num_vector * 4;
|
||||||
|
|
||||||
|
if (est_cycles > 16)
|
||||||
can_remove = false;
|
can_remove = false;
|
||||||
|
|
||||||
if (!can_remove)
|
if (!can_remove)
|
||||||
|
|
@ -2235,6 +2277,7 @@ lower_to_hw_instr(Program* program)
|
||||||
if (can_remove)
|
if (can_remove)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/* emit branch instruction */
|
||||||
switch (instr->opcode) {
|
switch (instr->opcode) {
|
||||||
case aco_opcode::p_branch:
|
case aco_opcode::p_branch:
|
||||||
assert(block->linear_succs[0] == target);
|
assert(block->linear_succs[0] == target);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue