mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-29 16:40:13 +01:00
aco: combine DPP into VALU after RA
Mostly helps a bunch of Cyberpunk 2077 shaders. fossil-db (Siena Cichlid): Totals from 26 (0.02% of 150170) affected shaders: CodeSize: 83208 -> 81528 (-2.02%) Instrs: 14728 -> 14308 (-2.85%) Latency: 48041 -> 47793 (-0.52%) InvThroughput: 10836 -> 10578 (-2.38%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11924>
This commit is contained in:
parent
2e6834d4f6
commit
4ac47ad1cd
1 changed files with 55 additions and 0 deletions
|
|
@ -22,6 +22,7 @@
|
|||
*
|
||||
*/
|
||||
|
||||
#include "aco_builder.h"
|
||||
#include "aco_ir.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
|
@ -338,6 +339,58 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
try_combine_dpp(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (!instr->isVALU() || instr->isDPP() || !can_use_DPP(instr, false))
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < MIN2(2, instr->operands.size()); i++) {
|
||||
Idx op_instr_idx = last_writer_idx(ctx, instr->operands[i]);
|
||||
if (!op_instr_idx.found())
|
||||
continue;
|
||||
|
||||
Instruction* mov = ctx.get(op_instr_idx);
|
||||
if (mov->opcode != aco_opcode::v_mov_b32 || !mov->isDPP())
|
||||
continue;
|
||||
|
||||
/* If we aren't going to remove the v_mov_b32, we have to ensure that it doesn't overwrite
|
||||
* it's own operand before we use it.
|
||||
*/
|
||||
if (mov->definitions[0].physReg() == mov->operands[0].physReg() &&
|
||||
(!mov->definitions[0].tempId() || ctx.uses[mov->definitions[0].tempId()] > 1))
|
||||
continue;
|
||||
|
||||
Idx mov_src_idx = last_writer_idx(ctx, mov->operands[0]);
|
||||
if (is_instr_after(mov_src_idx, op_instr_idx))
|
||||
continue;
|
||||
|
||||
if (i && !can_swap_operands(instr, &instr->opcode))
|
||||
continue;
|
||||
|
||||
/* anything else doesn't make sense in SSA */
|
||||
assert(mov->dpp().row_mask == 0xf && mov->dpp().bank_mask == 0xf);
|
||||
|
||||
if (--ctx.uses[mov->definitions[0].tempId()])
|
||||
ctx.uses[mov->operands[0].tempId()]++;
|
||||
|
||||
convert_to_DPP(instr);
|
||||
|
||||
DPP_instruction* dpp = &instr->dpp();
|
||||
if (i) {
|
||||
std::swap(dpp->operands[0], dpp->operands[1]);
|
||||
std::swap(dpp->neg[0], dpp->neg[1]);
|
||||
std::swap(dpp->abs[0], dpp->abs[1]);
|
||||
}
|
||||
dpp->operands[0] = mov->operands[0];
|
||||
dpp->dpp_ctrl = mov->dpp().dpp_ctrl;
|
||||
dpp->bound_ctrl = true;
|
||||
dpp->neg[0] ^= mov->dpp().neg[0] && !dpp->abs[0];
|
||||
dpp->abs[0] |= mov->dpp().abs[0];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
|
|
@ -345,6 +398,8 @@ process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
|
||||
try_optimize_scc_nocompare(ctx, instr);
|
||||
|
||||
try_combine_dpp(ctx, instr);
|
||||
|
||||
if (instr)
|
||||
save_reg_writes(ctx, instr);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue