aco: combine DPP into VALU after RA

Mostly helps a bunch of Cyberpunk 2077 shaders.

fossil-db (Siena Cichlid):
Totals from 26 (0.02% of 150170) affected shaders:
CodeSize: 83208 -> 81528 (-2.02%)
Instrs: 14728 -> 14308 (-2.85%)
Latency: 48041 -> 47793 (-0.52%)
InvThroughput: 10836 -> 10578 (-2.38%)

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11924>
This commit is contained in:
Rhys Perry 2020-06-30 15:33:18 +01:00 committed by Marge Bot
parent 2e6834d4f6
commit 4ac47ad1cd

View file

@ -22,6 +22,7 @@
*
*/
#include "aco_builder.h"
#include "aco_ir.h"
#include <algorithm>
@ -338,6 +339,58 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
}
void
try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
if (!instr->isVALU() || instr->isDPP() || !can_use_DPP(instr, false))
return;
for (unsigned i = 0; i < MIN2(2, instr->operands.size()); i++) {
Idx op_instr_idx = last_writer_idx(ctx, instr->operands[i]);
if (!op_instr_idx.found())
continue;
Instruction* mov = ctx.get(op_instr_idx);
if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP())
continue;
/* If we aren't going to remove the v_mov_b32, we have to ensure that it doesn't overwrite
* it's own operand before we use it.
*/
if (mov->definitions[0].physReg() == mov->operands[0].physReg() &&
(!mov->definitions[0].tempId() || ctx.uses[mov->definitions[0].tempId()] > 1))
continue;
Idx mov_src_idx = last_writer_idx(ctx, mov->operands[0]);
if (is_instr_after(mov_src_idx, op_instr_idx))
continue;
if (i && !can_swap_operands(instr, &instr->opcode))
continue;
/* anything else doesn't make sense in SSA */
assert(mov->dpp().row_mask == 0xf && mov->dpp().bank_mask == 0xf);
if (--ctx.uses[mov->definitions[0].tempId()])
ctx.uses[mov->operands[0].tempId()]++;
convert_to_DPP(instr);
DPP_instruction* dpp = &instr->dpp();
if (i) {
std::swap(dpp->operands[0], dpp->operands[1]);
std::swap(dpp->neg[0], dpp->neg[1]);
std::swap(dpp->abs[0], dpp->abs[1]);
}
dpp->operands[0] = mov->operands[0];
dpp->dpp_ctrl = mov->dpp().dpp_ctrl;
dpp->bound_ctrl = true;
dpp->neg[0] ^= mov->dpp().neg[0] && !dpp->abs[0];
dpp->abs[0] |= mov->dpp().abs[0];
return;
}
}
void
process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
@ -345,6 +398,8 @@ process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
try_optimize_scc_nocompare(ctx, instr);
try_combine_dpp(ctx, instr);
if (instr)
save_reg_writes(ctx, instr);