mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 19:40:10 +01:00
aco/optimizer: optimize extract(extract())
Totals from 53 (0.04% of 134572) affected shaders: (GFX10.3) SpillVGPRs: 1780 -> 1776 (-0.22%); split: -0.34%, +0.11% CodeSize: 968352 -> 963196 (-0.53%); split: -0.55%, +0.02% Scratch: 180224 -> 178176 (-1.14%) Instrs: 169800 -> 169158 (-0.38%); split: -0.39%, +0.01% Latency: 6186064 -> 6141408 (-0.72%); split: -1.16%, +0.44% InvThroughput: 2605044 -> 2582967 (-0.85%); split: -1.37%, +0.52% VClause: 4851 -> 4866 (+0.31%); split: -0.16%, +0.47% SClause: 1744 -> 1746 (+0.11%) Copies: 42874 -> 42325 (-1.28%); split: -1.40%, +0.12% Branches: 5762 -> 5765 (+0.05%); split: -0.02%, +0.07% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13576>
This commit is contained in:
parent
5ad9c20d4a
commit
fb622775b5
1 changed files with 37 additions and 3 deletions
|
|
@ -527,6 +527,7 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
|
|||
return false;
|
||||
break;
|
||||
case aco_opcode::p_extract_vector:
|
||||
case aco_opcode::p_extract:
|
||||
if (temp.type() == RegType::sgpr && !can_accept_sgpr)
|
||||
return false;
|
||||
break;
|
||||
|
|
@ -991,9 +992,21 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
|
|||
can_use_opsel(ctx.program->chip_class, instr->opcode, idx, sel.offset()) &&
|
||||
!(instr->vop3().opsel & (1 << idx))) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
} else if (instr->opcode == aco_opcode::p_extract) {
|
||||
SubdwordSel instrSel = parse_extract(instr.get());
|
||||
|
||||
/* the outer offset must be within extracted range */
|
||||
if (instrSel.offset() >= sel.size())
|
||||
return false;
|
||||
|
||||
/* don't remove the sign-extension when increasing the size further */
|
||||
if (instrSel.size() > sel.size() && !instrSel.sign_extend() && sel.sign_extend())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Combine an p_extract (or p_insert, in some cases) instruction with instr.
|
||||
|
|
@ -1033,6 +1046,18 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
|
|||
} else if (instr->isVOP3()) {
|
||||
if (sel.offset())
|
||||
instr->vop3().opsel |= 1 << idx;
|
||||
} else if (instr->opcode == aco_opcode::p_extract) {
|
||||
SubdwordSel instrSel = parse_extract(instr.get());
|
||||
|
||||
unsigned size = std::min(sel.size(), instrSel.size());
|
||||
unsigned offset = sel.offset() + instrSel.offset();
|
||||
unsigned sign_extend =
|
||||
instrSel.sign_extend() && (sel.sign_extend() || instrSel.size() <= sel.size());
|
||||
|
||||
instr->operands[1] = Operand::c32(offset / size);
|
||||
instr->operands[2] = Operand::c32(size * 8u);
|
||||
instr->operands[3] = Operand::c32(sign_extend);
|
||||
return;
|
||||
}
|
||||
|
||||
/* output modifier and label_vopc seem to be the only one worth keeping at the moment */
|
||||
|
|
@ -3406,8 +3431,17 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
if (instr->isSDWA() || instr->isDPP())
|
||||
return;
|
||||
|
||||
if (instr->opcode == aco_opcode::p_extract)
|
||||
if (instr->opcode == aco_opcode::p_extract) {
|
||||
ssa_info& info = ctx.info[instr->operands[0].tempId()];
|
||||
if (info.is_extract() && can_apply_extract(ctx, instr, 0, info)) {
|
||||
apply_extract(ctx, instr, 0, info);
|
||||
if (--ctx.uses[instr->operands[0].tempId()])
|
||||
ctx.uses[info.instr->operands[0].tempId()]++;
|
||||
instr->operands[0].setTemp(info.instr->operands[0].getTemp());
|
||||
}
|
||||
|
||||
apply_ds_extract(ctx, instr);
|
||||
}
|
||||
|
||||
/* TODO: There are still some peephole optimizations that could be done:
|
||||
* - abs(a - b) -> s_absdiff_i32
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue