mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 00:58:05 +02:00
aco: Use v_cmpx pre GFX10.
Foz-DB Vega10: Totals from 29508 (21.85% of 135041) affected shaders: CodeSize: 184345656 -> 184345820 (+0.00%) Instrs: 35906154 -> 35906195 (+0.00%) Latency: 581696114 -> 581530021 (-0.03%); split: -0.03%, +0.00% InvThroughput: 245625572 -> 245561351 (-0.03%); split: -0.03%, +0.00% Copies: 3134925 -> 3278672 (+4.59%) Signed-off-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18049>
This commit is contained in:
parent
393e577435
commit
7b9d3ebe42
1 changed files with 50 additions and 13 deletions
|
|
@ -355,24 +355,24 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
|
|||
if (exec_val->definitions.size() > 1)
|
||||
return;
|
||||
|
||||
/* Check if a suitable v_cmpx opcode exists. */
|
||||
const aco_opcode v_cmpx_op =
|
||||
exec_val->isVOPC() ? get_vcmpx(exec_val->opcode) : aco_opcode::num_opcodes;
|
||||
const bool vopc = v_cmpx_op != aco_opcode::num_opcodes;
|
||||
|
||||
/* If s_and_saveexec is used, we'll need to insert a new instruction to save the old exec. */
|
||||
const bool save_original_exec = exec_copy->opcode == aco_opcode::s_and_saveexec_b32 ||
|
||||
exec_copy->opcode == aco_opcode::s_and_saveexec_b64;
|
||||
/* Position where the original exec mask copy should be inserted. */
|
||||
const int save_original_exec_idx = exec_val_idx;
|
||||
/* The copy can be removed when it kills its operand. */
|
||||
const bool can_remove_copy = exec_copy->operands[0].isKill();
|
||||
/* The copy can be removed when it kills its operand.
|
||||
* v_cmpx also writes the original destination pre GFX10.
|
||||
*/
|
||||
const bool can_remove_copy =
|
||||
exec_copy->operands[0].isKill() || (vopc && ctx.program->gfx_level < GFX10);
|
||||
/* Whether exec_val and exec_copy are adjacent (with p_logical_end inbetween). */
|
||||
const bool val_and_copy_adjacent = exec_val_idx == exec_copy_idx - 2;
|
||||
|
||||
/* Only use v_cmpx on GFX10+ where it doesn't always clobber the VCC.
|
||||
* Also check if a suitable v_cmpx opcode exists.
|
||||
*/
|
||||
const aco_opcode v_cmpx_op =
|
||||
exec_val->isVOPC() ? get_vcmpx(exec_val->opcode) : aco_opcode::num_opcodes;
|
||||
const bool usable_vcmpx = ctx.program->gfx_level >= GFX10 && v_cmpx_op != aco_opcode::num_opcodes;
|
||||
const bool vopc = exec_val->isVOPC() && usable_vcmpx;
|
||||
|
||||
/* Always allow reassigning when the value is written by (usable) VOPC.
|
||||
* Note, VOPC implicitly contains "& exec" because it yields zero on inactive lanes.
|
||||
* Additionally, when value is copied as-is, also allow SALU and parallelcopies.
|
||||
|
|
@ -422,16 +422,53 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
|
|||
for (const Operand& op : exec_val->operands)
|
||||
if (regs_intersect(exec_copy_def, op))
|
||||
return;
|
||||
/* We would write over the saved exec value in this case. */
|
||||
if (((vopc && ctx.program->gfx_level < GFX10) || !can_remove_copy) &&
|
||||
regs_intersect(exec_copy_def, exec_wr_def))
|
||||
return;
|
||||
}
|
||||
|
||||
/* Reassign the instruction to write exec directly. */
|
||||
exec_val->definitions[0] = Definition(exec, ctx.program->lane_mask);
|
||||
|
||||
if (vopc) {
|
||||
/* Add one extra definition for exec and copy the VOP3-specific fields if present. */
|
||||
if (ctx.program->gfx_level < GFX10) {
|
||||
if (exec_val->isSDWA() || exec_val->isDPP()) {
|
||||
/* This might work but it needs testing and more code to copy the instruction. */
|
||||
return;
|
||||
}
|
||||
else if (!exec_val->isVOP3()) {
|
||||
aco_ptr<Instruction> tmp = std::move(exec_val);
|
||||
exec_val.reset(create_instruction<VOPC_instruction>(
|
||||
tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));
|
||||
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), exec_val->operands.begin());
|
||||
std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(),
|
||||
exec_val->definitions.begin());
|
||||
} else {
|
||||
aco_ptr<Instruction> tmp = std::move(exec_val);
|
||||
exec_val.reset(create_instruction<VOP3_instruction>(
|
||||
tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));
|
||||
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), exec_val->operands.begin());
|
||||
std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(),
|
||||
exec_val->definitions.begin());
|
||||
|
||||
VOP3_instruction& src = tmp->vop3();
|
||||
VOP3_instruction& dst = exec_val->vop3();
|
||||
dst.opsel = src.opsel;
|
||||
dst.omod = src.omod;
|
||||
dst.clamp = src.clamp;
|
||||
std::copy(std::cbegin(src.abs), std::cend(src.abs), std::begin(dst.abs));
|
||||
std::copy(std::cbegin(src.neg), std::cend(src.neg), std::begin(dst.neg));
|
||||
}
|
||||
}
|
||||
|
||||
/* Set v_cmpx opcode. */
|
||||
exec_val->opcode = v_cmpx_op;
|
||||
|
||||
*exec_val->definitions.rbegin() = Definition(exec, ctx.program->lane_mask);
|
||||
|
||||
/* TODO: change instruction from VOP3 to plain VOPC when possible. */
|
||||
} else {
|
||||
/* Reassign the instruction to write exec directly. */
|
||||
exec_val->definitions[0] = Definition(exec, ctx.program->lane_mask);
|
||||
}
|
||||
|
||||
if (!val_and_copy_adjacent) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue