mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-21 20:10:14 +01:00
aco: make all exec accesses non-temporaries
So that they are not counted into the register demand. Totals from 107336 (77.00% of 139391) affected shaders (Navi10): VGPRs: 4023452 -> 4023248 (-0.01%); split: -0.01%, +0.01% SpillSGPRs: 14088 -> 12571 (-10.77%); split: -11.03%, +0.26% CodeSize: 266816164 -> 266765528 (-0.02%); split: -0.04%, +0.02% MaxWaves: 1553339 -> 1553374 (+0.00%); split: +0.00%, -0.00% Instrs: 50977701 -> 50973093 (-0.01%); split: -0.02%, +0.01% Cycles: 1733911128 -> 1733605320 (-0.02%); split: -0.05%, +0.03% VMEM: 40867650 -> 40900204 (+0.08%); split: +0.13%, -0.05% SMEM: 6835980 -> 6829073 (-0.10%); split: +0.10%, -0.20% VClause: 1032783 -> 1032788 (+0.00%); split: -0.01%, +0.01% SClause: 2103705 -> 2104115 (+0.02%); split: -0.09%, +0.11% Copies: 3195658 -> 3193656 (-0.06%); split: -0.30%, +0.24% Branches: 1140213 -> 1140120 (-0.01%); split: -0.05%, +0.04% PreSGPRs: 3603785 -> 3437064 (-4.63%); split: -5.13%, +0.50% PreVGPRs: 3321996 -> 3321850 (-0.00%) Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8870>
This commit is contained in:
parent
5d7b3bf1a7
commit
a56ddca4e8
1 changed files with 76 additions and 82 deletions
|
|
@ -311,8 +311,9 @@ void transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx)
|
|||
if (ctx.info[idx].exec.back().second & mask_type_wqm)
|
||||
return;
|
||||
if (ctx.info[idx].exec.back().second & mask_type_global) {
|
||||
Temp exec_mask = ctx.info[idx].exec.back().first;
|
||||
exec_mask = bld.sop1(Builder::s_wqm, bld.def(bld.lm, exec), bld.def(s1, scc), bld.exec(exec_mask));
|
||||
Temp exec_mask = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm), Operand(exec, bld.lm));
|
||||
ctx.info[idx].exec.back().first = exec_mask;
|
||||
exec_mask = bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), Operand(exec, bld.lm));
|
||||
ctx.info[idx].exec.emplace_back(exec_mask, mask_type_global | mask_type_wqm);
|
||||
return;
|
||||
}
|
||||
|
|
@ -320,7 +321,8 @@ void transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx)
|
|||
ctx.info[idx].exec.pop_back();
|
||||
assert(ctx.info[idx].exec.back().second & mask_type_wqm);
|
||||
assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
|
||||
assert(ctx.info[idx].exec.back().first.id());
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
|
||||
ctx.info[idx].exec.back().first);
|
||||
}
|
||||
|
||||
|
|
@ -336,17 +338,24 @@ void transition_to_Exact(exec_ctx& ctx, Builder bld, unsigned idx)
|
|||
ctx.info[idx].exec.pop_back();
|
||||
assert(ctx.info[idx].exec.back().second & mask_type_exact);
|
||||
assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
|
||||
assert(ctx.info[idx].exec.back().first.id());
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
|
||||
ctx.info[idx].exec.back().first);
|
||||
return;
|
||||
}
|
||||
/* otherwise, we create an exact mask and push to the stack */
|
||||
Temp wqm = ctx.info[idx].exec.back().first;
|
||||
Temp exact = bld.tmp(bld.lm);
|
||||
wqm = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.exec(Definition(exact)), ctx.info[idx].exec[0].first, bld.exec(wqm));
|
||||
Temp wqm = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
|
||||
Definition(exec, bld.lm), ctx.info[idx].exec[0].first, Operand(exec, bld.lm));
|
||||
ctx.info[idx].exec.back().first = wqm;
|
||||
ctx.info[idx].exec.emplace_back(exact, mask_type_exact);
|
||||
ctx.info[idx].exec.emplace_back(Temp(0, bld.lm), mask_type_exact);
|
||||
}
|
||||
|
||||
Operand get_exec_op(Temp t)
|
||||
{
|
||||
if (t == Temp())
|
||||
return Operand(exec, t.regClass());
|
||||
else
|
||||
return Operand(t);
|
||||
}
|
||||
|
||||
unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
||||
|
|
@ -360,29 +369,27 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
if (idx == 0) {
|
||||
aco_ptr<Instruction>& startpgm = block->instructions[0];
|
||||
assert(startpgm->opcode == aco_opcode::p_startpgm);
|
||||
Temp exec_mask = startpgm->definitions.back().getTemp();
|
||||
bld.insert(std::move(startpgm));
|
||||
|
||||
/* exec seems to need to be manually initialized with combined shaders */
|
||||
if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
|
||||
bld.copy(bld.exec(Definition(exec_mask)), Operand(UINT32_MAX, bld.lm == s2));
|
||||
instructions[0]->definitions.pop_back();
|
||||
bld.copy(Definition(exec, bld.lm), Operand(UINT32_MAX, bld.lm == s2));
|
||||
}
|
||||
|
||||
if (ctx.handle_wqm) {
|
||||
ctx.info[0].exec.emplace_back(exec_mask, mask_type_global | mask_type_exact | mask_type_initial);
|
||||
ctx.info[0].exec.emplace_back(Temp(0, bld.lm), mask_type_global | mask_type_exact | mask_type_initial);
|
||||
/* if this block only needs WQM, initialize already */
|
||||
if (ctx.info[0].block_needs == WQM)
|
||||
transition_to_WQM(ctx, bld, 0);
|
||||
} else {
|
||||
uint8_t mask = mask_type_global;
|
||||
if (ctx.program->needs_wqm) {
|
||||
exec_mask = bld.sop1(Builder::s_wqm, bld.def(bld.lm, exec), bld.def(s1, scc), bld.exec(exec_mask));
|
||||
bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), Operand(exec, bld.lm));
|
||||
mask |= mask_type_wqm;
|
||||
} else {
|
||||
mask |= mask_type_exact;
|
||||
}
|
||||
ctx.info[0].exec.emplace_back(exec_mask, mask);
|
||||
ctx.info[0].exec.emplace_back(Temp(0, bld.lm), mask);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
@ -402,7 +409,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
for (int i = 0; i < info.num_exec_masks - 1; i++) {
|
||||
phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1));
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
phi->operands[0] = Operand(ctx.info[preds[0]].exec[i].first);
|
||||
phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec[i].first);
|
||||
ctx.info[idx].exec[i].first = bld.insert(std::move(phi));
|
||||
}
|
||||
}
|
||||
|
|
@ -412,7 +419,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
/* this phi might be trivial but ensures a parallelcopy on the loop header */
|
||||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
phi->operands[0] = Operand(ctx.info[preds[0]].exec[info.num_exec_masks - 1].first);
|
||||
phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec[info.num_exec_masks - 1].first);
|
||||
ctx.info[idx].exec.back().first = bld.insert(std::move(phi));
|
||||
}
|
||||
|
||||
|
|
@ -421,8 +428,8 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
if (info.has_divergent_continue)
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
else
|
||||
phi->definitions[0] = bld.def(bld.lm, exec);
|
||||
phi->operands[0] = Operand(ctx.info[preds[0]].exec.back().first);
|
||||
phi->definitions[0] = Definition(exec, bld.lm);
|
||||
phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
|
||||
Temp loop_active = bld.insert(std::move(phi));
|
||||
|
||||
if (info.has_divergent_break) {
|
||||
|
|
@ -442,7 +449,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
}
|
||||
uint8_t mask_type = ctx.info[idx].exec.back().second & (mask_type_wqm | mask_type_exact);
|
||||
assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
|
||||
ctx.info[idx].exec.emplace_back(bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
|
||||
ctx.info[idx].exec.emplace_back(bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
|
||||
ctx.info[idx].exec.back().first), mask_type);
|
||||
}
|
||||
|
||||
|
|
@ -465,7 +472,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
aco_ptr<Instruction>& phi = header->instructions[instr_idx];
|
||||
assert(phi->opcode == aco_opcode::p_linear_phi);
|
||||
for (unsigned i = 1; i < phi->operands.size(); i++)
|
||||
phi->operands[i] = Operand(ctx.info[header_preds[i]].exec[instr_idx].first);
|
||||
phi->operands[i] = get_exec_op(ctx.info[header_preds[i]].exec[instr_idx].first);
|
||||
instr_idx++;
|
||||
}
|
||||
}
|
||||
|
|
@ -474,14 +481,14 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
aco_ptr<Instruction>& phi = header->instructions[instr_idx++];
|
||||
assert(phi->opcode == aco_opcode::p_linear_phi);
|
||||
for (unsigned i = 1; i < phi->operands.size(); i++)
|
||||
phi->operands[i] = Operand(ctx.info[header_preds[i]].exec[info.num_exec_masks - 1].first);
|
||||
phi->operands[i] = get_exec_op(ctx.info[header_preds[i]].exec[info.num_exec_masks - 1].first);
|
||||
}
|
||||
|
||||
if (info.has_divergent_break) {
|
||||
aco_ptr<Instruction>& phi = header->instructions[instr_idx];
|
||||
assert(phi->opcode == aco_opcode::p_linear_phi);
|
||||
for (unsigned i = 1; i < phi->operands.size(); i++)
|
||||
phi->operands[i] = Operand(ctx.info[header_preds[i]].exec[info.num_exec_masks].first);
|
||||
phi->operands[i] = get_exec_op(ctx.info[header_preds[i]].exec[info.num_exec_masks].first);
|
||||
}
|
||||
|
||||
assert(!(block->kind & block_kind_top_level) || info.num_exec_masks <= 2);
|
||||
|
|
@ -520,11 +527,11 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
|
||||
phi->definitions[0] = bld.def(bld.lm);
|
||||
if (exec_idx == info.num_exec_masks - 1u) {
|
||||
phi->definitions[0].setFixed(exec);
|
||||
phi->definitions[0] = Definition(exec, bld.lm);
|
||||
need_parallelcopy = false;
|
||||
}
|
||||
for (unsigned i = 0; i < phi->operands.size(); i++)
|
||||
phi->operands[i] = Operand(ctx.info[preds[i]].exec[exec_idx].first);
|
||||
phi->operands[i] = get_exec_op(ctx.info[preds[i]].exec[exec_idx].first);
|
||||
ctx.info[idx].exec.emplace_back(bld.insert(std::move(phi)), type);
|
||||
}
|
||||
}
|
||||
|
|
@ -553,13 +560,13 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
}
|
||||
|
||||
assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
|
||||
if (need_parallelcopy) {
|
||||
if (need_parallelcopy && get_exec_op(ctx.info[idx].exec.back().first).isTemp()) {
|
||||
/* only create this parallelcopy is needed, since the operand isn't
|
||||
* fixed to exec which causes the spiller to miscalculate register demand */
|
||||
/* TODO: Fix register_demand calculation for spilling on loop exits.
|
||||
* The problem is only mitigated because the register demand could be
|
||||
* higher if the exec phi doesn't get assigned to exec. */
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
|
||||
ctx.info[idx].exec.back().first);
|
||||
}
|
||||
|
||||
|
|
@ -582,16 +589,17 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
|
||||
/* create phis for diverged exec masks */
|
||||
for (unsigned i = 0; i < num_exec_masks; i++) {
|
||||
bool in_exec = i == num_exec_masks - 1 && !(block->kind & block_kind_merge);
|
||||
if (!in_exec && ctx.info[preds[0]].exec[i].first == ctx.info[preds[1]].exec[i].first) {
|
||||
/* skip trivial phis */
|
||||
if (ctx.info[preds[0]].exec[i].first == ctx.info[preds[1]].exec[i].first) {
|
||||
assert(ctx.info[preds[0]].exec[i].second == ctx.info[preds[1]].exec[i].second);
|
||||
ctx.info[idx].exec.emplace_back(ctx.info[preds[0]].exec[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
Temp phi = bld.pseudo(aco_opcode::p_linear_phi, in_exec ? bld.def(bld.lm, exec) : bld.def(bld.lm),
|
||||
ctx.info[preds[0]].exec[i].first,
|
||||
ctx.info[preds[1]].exec[i].first);
|
||||
bool in_exec = i == num_exec_masks - 1 && !(block->kind & block_kind_merge);
|
||||
Temp phi = bld.pseudo(aco_opcode::p_linear_phi, in_exec ? Definition(exec, bld.lm) : bld.def(bld.lm),
|
||||
get_exec_op(ctx.info[preds[0]].exec[i].first),
|
||||
get_exec_op(ctx.info[preds[1]].exec[i].first));
|
||||
uint8_t mask_type = ctx.info[preds[0]].exec[i].second & ctx.info[preds[1]].exec[i].second;
|
||||
ctx.info[idx].exec.emplace_back(phi, mask_type);
|
||||
}
|
||||
|
|
@ -620,10 +628,10 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
transition_to_Exact(ctx, bld, idx);
|
||||
}
|
||||
|
||||
if (block->kind & block_kind_merge) {
|
||||
if (block->kind & block_kind_merge && ctx.info[idx].exec.back().first != Temp()) {
|
||||
Temp restore = ctx.info[idx].exec.back().first;
|
||||
assert(restore.size() == bld.lm.size());
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec), restore);
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), restore);
|
||||
}
|
||||
|
||||
return i;
|
||||
|
|
@ -671,20 +679,22 @@ void process_instructions(exec_ctx& ctx, Block* block,
|
|||
}
|
||||
int num = ctx.info[block->index].exec.size();
|
||||
assert(num);
|
||||
Operand cond = instr->operands[0];
|
||||
for (int i = num - 1; i >= 0; i--) {
|
||||
|
||||
/* discard from current exec */
|
||||
const Operand cond = instr->operands[0];
|
||||
Temp exit_cond = bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc),
|
||||
Operand(exec, bld.lm), cond).def(1).getTemp();
|
||||
|
||||
/* discard from inner to outer exec mask on stack */
|
||||
for (int i = num - 2; i >= 0; i--) {
|
||||
Instruction *andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
|
||||
ctx.info[block->index].exec[i].first, cond);
|
||||
if (i == num - 1) {
|
||||
andn2->operands[0].setFixed(exec);
|
||||
andn2->definitions[0].setFixed(exec);
|
||||
}
|
||||
if (i == 0) {
|
||||
instr->opcode = aco_opcode::p_exit_early_if;
|
||||
instr->operands[0] = bld.scc(andn2->definitions[1].getTemp());
|
||||
}
|
||||
ctx.info[block->index].exec[i].first = andn2->definitions[0].getTemp();
|
||||
exit_cond = andn2->definitions[1].getTemp();
|
||||
}
|
||||
|
||||
instr->opcode = aco_opcode::p_exit_early_if;
|
||||
instr->operands[0] = bld.scc(exit_cond);
|
||||
assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
|
||||
|
||||
} else if (needs == WQM && state != WQM) {
|
||||
|
|
@ -720,7 +730,7 @@ void process_instructions(exec_ctx& ctx, Block* block,
|
|||
assert(exact_mask.second & mask_type_exact);
|
||||
|
||||
instr.reset(create_instruction<SOP2_instruction>(bld.w64or32(Builder::s_andn2), Format::SOP2, 2, 2));
|
||||
instr->operands[0] = Operand(ctx.info[block->index].exec.back().first); /* current exec */
|
||||
instr->operands[0] = Operand(exec, bld.lm); /* current exec */
|
||||
instr->operands[1] = Operand(exact_mask.first);
|
||||
instr->definitions[0] = dst;
|
||||
instr->definitions[1] = bld.def(s1, scc);
|
||||
|
|
@ -735,18 +745,14 @@ void process_instructions(exec_ctx& ctx, Block* block,
|
|||
if (instr->operands[0].isConstant()) {
|
||||
assert(instr->operands[0].constantValue() == -1u);
|
||||
/* transition to exact and set exec to zero */
|
||||
Temp old_exec = ctx.info[block->index].exec.back().first;
|
||||
Temp new_exec = bld.tmp(bld.lm);
|
||||
exit_cond = bld.tmp(s1);
|
||||
cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)),
|
||||
bld.exec(Definition(new_exec)), Operand(0u), bld.exec(old_exec));
|
||||
Definition(exec, bld.lm), Operand(0u), Operand(exec, bld.lm));
|
||||
|
||||
num = ctx.info[block->index].exec.size() - 2;
|
||||
if (ctx.info[block->index].exec.back().second & mask_type_exact) {
|
||||
ctx.info[block->index].exec.back().first = new_exec;
|
||||
} else {
|
||||
if (!(ctx.info[block->index].exec.back().second & mask_type_exact)) {
|
||||
ctx.info[block->index].exec.back().first = cond;
|
||||
ctx.info[block->index].exec.emplace_back(new_exec, mask_type_exact);
|
||||
ctx.info[block->index].exec.emplace_back(Temp(0, bld.lm), mask_type_exact);
|
||||
}
|
||||
} else {
|
||||
/* demote_if: transition to exact */
|
||||
|
|
@ -761,8 +767,8 @@ void process_instructions(exec_ctx& ctx, Block* block,
|
|||
Instruction *andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
|
||||
ctx.info[block->index].exec[i].first, cond);
|
||||
if (i == (int)ctx.info[block->index].exec.size() - 1) {
|
||||
andn2->operands[0].setFixed(exec);
|
||||
andn2->definitions[0].setFixed(exec);
|
||||
andn2->operands[0] = Operand(exec, bld.lm);
|
||||
andn2->definitions[0] = Definition(exec, bld.lm);
|
||||
}
|
||||
|
||||
ctx.info[block->index].exec[i].first = andn2->definitions[0].getTemp();
|
||||
|
|
@ -873,7 +879,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
/* For normal breaks, this is the exec mask. For discard+break, it's the
|
||||
* old exec mask before it was zero'd.
|
||||
*/
|
||||
Operand break_cond = bld.exec(ctx.info[idx].exec.back().first);
|
||||
Operand break_cond = Operand(exec, bld.lm);
|
||||
|
||||
if (block->kind & block_kind_discard) {
|
||||
|
||||
|
|
@ -890,17 +896,14 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
num = ctx.info[idx].exec.size() - 1;
|
||||
}
|
||||
|
||||
Temp old_exec = ctx.info[idx].exec.back().first;
|
||||
Temp new_exec = bld.tmp(bld.lm);
|
||||
Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.exec(Definition(new_exec)), Operand(0u), bld.exec(old_exec));
|
||||
ctx.info[idx].exec.back().first = new_exec;
|
||||
Definition(exec, bld.lm), Operand(0u), Operand(exec, bld.lm));
|
||||
|
||||
for (int i = num - 1; i >= 0; i--) {
|
||||
Instruction *andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
|
||||
ctx.info[block->index].exec[i].first, cond);
|
||||
get_exec_op(ctx.info[block->index].exec[i].first), cond);
|
||||
if (i == (int)ctx.info[idx].exec.size() - 1)
|
||||
andn2->definitions[0].setFixed(exec);
|
||||
andn2->definitions[0] = Definition(exec, bld.lm);
|
||||
if (i == 0)
|
||||
bld.pseudo(aco_opcode::p_exit_early_if, bld.scc(andn2->definitions[1].getTemp()));
|
||||
ctx.info[block->index].exec[i].first = andn2->definitions[0].getTemp();
|
||||
|
|
@ -925,8 +928,8 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
}
|
||||
|
||||
if (need_parallelcopy)
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec), ctx.info[idx].exec.back().first);
|
||||
bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.exec(ctx.info[idx].exec.back().first), block->linear_succs[1], block->linear_succs[0]);
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), ctx.info[idx].exec.back().first);
|
||||
bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -961,19 +964,17 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
if (ctx.info[idx].block_needs & Exact_Branch)
|
||||
transition_to_Exact(ctx, bld, idx);
|
||||
|
||||
Temp current_exec = ctx.info[idx].exec.back().first;
|
||||
uint8_t mask_type = ctx.info[idx].exec.back().second & (mask_type_wqm | mask_type_exact);
|
||||
|
||||
Temp then_mask = bld.tmp(bld.lm);
|
||||
Temp old_exec = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
|
||||
bld.exec(Definition(then_mask)), cond, bld.exec(current_exec));
|
||||
Definition(exec, bld.lm), cond, Operand(exec, bld.lm));
|
||||
|
||||
ctx.info[idx].exec.back().first = old_exec;
|
||||
|
||||
/* add next current exec to the stack */
|
||||
ctx.info[idx].exec.emplace_back(then_mask, mask_type);
|
||||
ctx.info[idx].exec.emplace_back(Temp(0, bld.lm), mask_type);
|
||||
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), bld.exec(then_mask), block->linear_succs[1], block->linear_succs[0]);
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -981,17 +982,11 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
// exec = s_andn2_b64 (original_exec, exec)
|
||||
assert(block->instructions.back()->opcode == aco_opcode::p_cbranch_nz);
|
||||
block->instructions.pop_back();
|
||||
Temp then_mask = ctx.info[idx].exec.back().first;
|
||||
uint8_t mask_type = ctx.info[idx].exec.back().second;
|
||||
ctx.info[idx].exec.pop_back();
|
||||
Temp orig_exec = ctx.info[idx].exec.back().first;
|
||||
Temp else_mask = bld.sop2(Builder::s_andn2, bld.def(bld.lm, exec),
|
||||
bld.def(s1, scc), orig_exec, bld.exec(then_mask));
|
||||
assert(ctx.info[idx].exec.size() >= 2);
|
||||
Temp orig_exec = ctx.info[idx].exec[ctx.info[idx].exec.size() - 2].first;
|
||||
bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc), orig_exec, Operand(exec, bld.lm));
|
||||
|
||||
/* add next current exec to the stack */
|
||||
ctx.info[idx].exec.emplace_back(else_mask, mask_type);
|
||||
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), bld.exec(else_mask), block->linear_succs[1], block->linear_succs[0]);
|
||||
bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -1016,7 +1011,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
|
||||
Block& succ = ctx.program->blocks[succ_idx];
|
||||
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
|
||||
ctx.info[idx].exec.back().first = bld.copy(bld.def(bld.lm, exec), Operand(0u, bld.lm == s2));
|
||||
bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2));
|
||||
}
|
||||
|
||||
bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]);
|
||||
|
|
@ -1027,7 +1022,6 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
assert(block->instructions.back()->opcode == aco_opcode::p_branch);
|
||||
block->instructions.pop_back();
|
||||
|
||||
Temp current_exec = ctx.info[idx].exec.back().first;
|
||||
Temp cond = Temp();
|
||||
for (int exec_idx = ctx.info[idx].exec.size() - 2; exec_idx >= 0; exec_idx--) {
|
||||
if (ctx.info[idx].exec[exec_idx].second & mask_type_loop)
|
||||
|
|
@ -1035,7 +1029,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
cond = bld.tmp(s1);
|
||||
Temp exec_mask = ctx.info[idx].exec[exec_idx].first;
|
||||
exec_mask = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.scc(Definition(cond)),
|
||||
exec_mask, bld.exec(current_exec));
|
||||
exec_mask, Operand(exec, bld.lm));
|
||||
ctx.info[idx].exec[exec_idx].first = exec_mask;
|
||||
}
|
||||
assert(cond != Temp());
|
||||
|
|
@ -1045,7 +1039,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
|
|||
unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
|
||||
Block& succ = ctx.program->blocks[succ_idx];
|
||||
if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
|
||||
ctx.info[idx].exec.back().first = bld.copy(bld.def(bld.lm, exec), Operand(0u, bld.lm == s2));
|
||||
bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2));
|
||||
}
|
||||
|
||||
bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue