mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 17:20:10 +01:00
aco: create better code for boolean phis with constant operands
fossil-db (Navi): Totals from 6394 (4.70% of 135946) affected shaders: SGPRs: 651408 -> 651344 (-0.01%) SpillSGPRs: 52102 -> 52019 (-0.16%) CodeSize: 68369664 -> 68229180 (-0.21%); split: -0.21%, +0.00% Instrs: 13236611 -> 13202126 (-0.26%); split: -0.26%, +0.00% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3388>
This commit is contained in:
parent
47b0653d5d
commit
f622e80494
2 changed files with 62 additions and 20 deletions
|
|
@ -8953,13 +8953,19 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
|
|||
}
|
||||
|
||||
|
||||
Operand get_phi_operand(isel_context *ctx, nir_ssa_def *ssa, RegClass rc)
|
||||
Operand get_phi_operand(isel_context *ctx, nir_ssa_def *ssa, RegClass rc, bool logical)
|
||||
{
|
||||
Temp tmp = get_ssa_temp(ctx, ssa);
|
||||
if (ssa->parent_instr->type == nir_instr_type_ssa_undef)
|
||||
if (ssa->parent_instr->type == nir_instr_type_ssa_undef) {
|
||||
return Operand(rc);
|
||||
else
|
||||
} else if (logical && ssa->bit_size == 1 && ssa->parent_instr->type == nir_instr_type_load_const) {
|
||||
if (ctx->program->wave_size == 64)
|
||||
return Operand(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT64_MAX : 0u);
|
||||
else
|
||||
return Operand(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT32_MAX : 0u);
|
||||
} else {
|
||||
return Operand(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void visit_phi(isel_context *ctx, nir_phi_instr *instr)
|
||||
|
|
@ -9002,7 +9008,7 @@ void visit_phi(isel_context *ctx, nir_phi_instr *instr)
|
|||
if (!(ctx->block->kind & block_kind_loop_header) && cur_pred_idx >= preds.size())
|
||||
continue;
|
||||
cur_pred_idx++;
|
||||
Operand op = get_phi_operand(ctx, src.second, dst.regClass());
|
||||
Operand op = get_phi_operand(ctx, src.second, dst.regClass(), logical);
|
||||
operands[num_operands++] = op;
|
||||
num_defined += !op.isUndefined();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,9 +96,56 @@ void insert_before_logical_end(Block *block, aco_ptr<Instruction> instr)
|
|||
if (it == block->instructions.crend()) {
|
||||
assert(block->instructions.back()->format == Format::PSEUDO_BRANCH);
|
||||
block->instructions.insert(std::prev(block->instructions.end()), std::move(instr));
|
||||
}
|
||||
else
|
||||
} else {
|
||||
block->instructions.insert(std::prev(it.base()), std::move(instr));
|
||||
}
|
||||
}
|
||||
|
||||
void build_merge_code(Program *program, Block *block, Definition dst, Operand prev, Operand cur)
|
||||
{
|
||||
Builder bld(program);
|
||||
|
||||
auto IsLogicalEnd = [] (const aco_ptr<Instruction>& instr) -> bool {
|
||||
return instr->opcode == aco_opcode::p_logical_end;
|
||||
};
|
||||
auto it = std::find_if(block->instructions.rbegin(), block->instructions.rend(), IsLogicalEnd);
|
||||
assert(it != block->instructions.rend());
|
||||
bld.reset(&block->instructions, std::prev(it.base()));
|
||||
|
||||
if (prev.isUndefined()) {
|
||||
bld.sop1(Builder::s_mov, dst, cur);
|
||||
return;
|
||||
}
|
||||
|
||||
bool prev_is_constant = prev.isConstant() && prev.constantValue64(true) + 1u < 2u;
|
||||
bool cur_is_constant = cur.isConstant() && cur.constantValue64(true) + 1u < 2u;
|
||||
|
||||
if (!prev_is_constant) {
|
||||
if (!cur_is_constant) {
|
||||
Temp tmp1 = bld.tmp(bld.lm), tmp2 = bld.tmp(bld.lm);
|
||||
bld.sop2(Builder::s_andn2, Definition(tmp1), bld.def(s1, scc), prev, Operand(exec, bld.lm));
|
||||
bld.sop2(Builder::s_and, Definition(tmp2), bld.def(s1, scc), cur, Operand(exec, bld.lm));
|
||||
bld.sop2(Builder::s_or, dst, bld.def(s1, scc), tmp1, tmp2);
|
||||
} else if (cur.constantValue64(true)) {
|
||||
bld.sop2(Builder::s_or, dst, bld.def(s1, scc), prev, Operand(exec, bld.lm));
|
||||
} else {
|
||||
bld.sop2(Builder::s_andn2, dst, bld.def(s1, scc), prev, Operand(exec, bld.lm));
|
||||
}
|
||||
} else if (prev.constantValue64(true)) {
|
||||
if (!cur_is_constant)
|
||||
bld.sop2(Builder::s_orn2, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm));
|
||||
else if (cur.constantValue64(true))
|
||||
bld.sop1(Builder::s_mov, dst, program->wave_size == 64 ? Operand(UINT64_MAX) : Operand(UINT32_MAX));
|
||||
else
|
||||
bld.sop1(Builder::s_not, dst, bld.def(s1, scc), Operand(exec, bld.lm));
|
||||
} else {
|
||||
if (!cur_is_constant)
|
||||
bld.sop2(Builder::s_and, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm));
|
||||
else if (cur.constantValue64(true))
|
||||
bld.sop1(Builder::s_mov, dst, Operand(exec, bld.lm));
|
||||
else
|
||||
bld.sop1(Builder::s_mov, dst, program->wave_size == 64 ? Operand((uint64_t)0u) : Operand(0u));
|
||||
}
|
||||
}
|
||||
|
||||
void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block, aco_ptr<Instruction>& phi)
|
||||
|
|
@ -144,20 +191,9 @@ void lower_divergent_bool_phi(Program *program, ssa_state *state, Block *block,
|
|||
Temp new_cur = {state->writes.at(pred->index), program->lane_mask};
|
||||
assert(new_cur.regClass() == bld.lm);
|
||||
|
||||
if (cur.isUndefined()) {
|
||||
insert_before_logical_end(pred, bld.sop1(aco_opcode::s_mov_b64, Definition(new_cur), phi->operands[i]).get_ptr());
|
||||
} else {
|
||||
Temp tmp1 = bld.tmp(bld.lm), tmp2 = bld.tmp(bld.lm);
|
||||
insert_before_logical_end(pred,
|
||||
bld.sop2(Builder::s_andn2, Definition(tmp1), bld.def(s1, scc),
|
||||
cur, Operand(exec, bld.lm)).get_ptr());
|
||||
insert_before_logical_end(pred,
|
||||
bld.sop2(Builder::s_and, Definition(tmp2), bld.def(s1, scc),
|
||||
phi->operands[i].getTemp(), Operand(exec, bld.lm)).get_ptr());
|
||||
insert_before_logical_end(pred,
|
||||
bld.sop2(Builder::s_or, Definition(new_cur), bld.def(s1, scc),
|
||||
tmp1, tmp2).get_ptr());
|
||||
}
|
||||
if (i == 1 && (block->kind & block_kind_merge) && phi->operands[0].isConstant())
|
||||
cur = phi->operands[0];
|
||||
build_merge_code(program, pred, Definition(new_cur), cur, phi->operands[i]);
|
||||
}
|
||||
|
||||
unsigned num_preds = block->linear_preds.size();
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue