mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 15:20:17 +01:00
aco/optimizer: apply f2f16 conversion with the new helpers
Foz-DB Navi21: Totals from 183 (0.23% of 79789) affected shaders: Instrs: 158014 -> 157170 (-0.53%); split: -0.54%, +0.01% CodeSize: 836444 -> 830148 (-0.75%); split: -0.76%, +0.01% Latency: 593790 -> 592580 (-0.20%); split: -0.39%, +0.19% InvThroughput: 150243 -> 148783 (-0.97%); split: -0.98%, +0.00% VClause: 1301 -> 1312 (+0.85%); split: -0.31%, +1.15% SClause: 2608 -> 2606 (-0.08%) PreVGPRs: 8706 -> 8674 (-0.37%) VALU: 102042 -> 101067 (-0.96%); split: -0.97%, +0.01% Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35272>
This commit is contained in:
parent
2572528d31
commit
2d410cf18e
1 changed files with 34 additions and 64 deletions
|
|
@ -2064,6 +2064,32 @@ parse_operand(opt_ctx& ctx, Temp tmp, alu_opt_op& op_info, aco_type& type)
|
|||
return true;
|
||||
}
|
||||
|
||||
if (info.parent_instr->opcode == aco_opcode::v_cvt_f32_f16 ||
|
||||
info.parent_instr->opcode == aco_opcode::s_cvt_f32_f16 ||
|
||||
info.parent_instr->opcode == aco_opcode::s_cvt_hi_f32_f16) {
|
||||
Instruction* instr = info.parent_instr;
|
||||
if (instr->isVALU() && (instr->valu().clamp || instr->valu().omod))
|
||||
return false;
|
||||
if (instr->isDPP() || (instr->isSDWA() && instr->sdwa().dst_sel.size() != 4))
|
||||
return false;
|
||||
|
||||
if (instr->isVALU() && instr->valu().abs[0])
|
||||
op_info.abs[0] = true;
|
||||
if (instr->isVALU() && instr->valu().neg[0])
|
||||
op_info.neg[0] = true;
|
||||
|
||||
if (instr->isSDWA())
|
||||
op_info.extract[0] = instr->sdwa().sel[0];
|
||||
else if (instr->isVALU() && instr->valu().opsel[0])
|
||||
op_info.extract[0] = SubdwordSel::uword1;
|
||||
else if (info.parent_instr->opcode == aco_opcode::s_cvt_hi_f32_f16)
|
||||
op_info.extract[0] = SubdwordSel::uword1;
|
||||
|
||||
op_info.f16_to_f32 = true;
|
||||
op_info.op = instr->operands[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
if (info.is_temp() || info.is_fcanonicalize() || info.is_abs() || info.is_neg()) {
|
||||
op_info.op = Operand(info.temp);
|
||||
if (info.is_abs())
|
||||
|
|
@ -2094,6 +2120,12 @@ combine_operand(opt_ctx& ctx, alu_opt_op& inner, const aco_type& inner_type,
|
|||
if (has_imod && outer_type.bit_size != inner_type.bit_size)
|
||||
return false;
|
||||
|
||||
if (outer.f16_to_f32) {
|
||||
if (inner_type.num_components != 1 || inner.extract[0].size() != 4 || inner.f16_to_f32)
|
||||
return false;
|
||||
inner.f16_to_f32 = true;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < inner_type.num_components; i++) {
|
||||
unsigned offset = inner.extract[i].offset() * 8;
|
||||
unsigned size = MIN2(inner.extract[i].size() * 8, inner_type.bit_size);
|
||||
|
|
@ -2208,7 +2240,8 @@ alu_propagate_temp_const(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool uses_va
|
|||
|
||||
alu_opt_op outer;
|
||||
aco_type outer_type;
|
||||
if (!parse_operand(ctx, info.operands[i].op.getTemp(), outer, outer_type)) {
|
||||
if (!parse_operand(ctx, info.operands[i].op.getTemp(), outer, outer_type) ||
|
||||
(!uses_valid && outer.f16_to_f32)) {
|
||||
operand_mask &= ~BITFIELD_BIT(i);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -4360,68 +4393,6 @@ combine_output_conversion(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (!can_use_mad_mix(ctx, instr))
|
||||
return;
|
||||
|
||||
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||
if (!instr->operands[i].isTemp())
|
||||
continue;
|
||||
Temp tmp = instr->operands[i].getTemp();
|
||||
|
||||
Instruction* conv = ctx.info[tmp.id()].parent_instr;
|
||||
if (conv->opcode != aco_opcode::v_cvt_f32_f16 || !conv->operands[0].isTemp() ||
|
||||
conv->valu().clamp || conv->valu().omod) {
|
||||
continue;
|
||||
} else if (conv->isSDWA() &&
|
||||
(conv->sdwa().dst_sel.size() != 4 || conv->sdwa().sel[0].size() != 2)) {
|
||||
continue;
|
||||
} else if (conv->isDPP()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (get_operand_type(instr, i).bit_size != 32)
|
||||
continue;
|
||||
|
||||
/* Conversion to VOP3P will add inline constant operands, but that shouldn't affect
|
||||
* check_vop3_operands(). */
|
||||
Operand op[3];
|
||||
for (unsigned j = 0; j < instr->operands.size(); j++)
|
||||
op[j] = instr->operands[j];
|
||||
op[i] = conv->operands[0];
|
||||
if (!check_vop3_operands(ctx, instr->operands.size(), op))
|
||||
continue;
|
||||
if (!conv->operands[0].isOfType(RegType::vgpr) && instr->isDPP())
|
||||
continue;
|
||||
|
||||
if (!instr->isVOP3P()) {
|
||||
bool is_add =
|
||||
instr->opcode != aco_opcode::v_mul_f32 && instr->opcode != aco_opcode::v_fma_f32;
|
||||
to_mad_mix(ctx, instr);
|
||||
i += is_add;
|
||||
}
|
||||
|
||||
if (--ctx.uses[tmp.id()])
|
||||
ctx.uses[conv->operands[0].tempId()]++;
|
||||
instr->operands[i].setTemp(conv->operands[0].getTemp());
|
||||
if (conv->definitions[0].isPrecise())
|
||||
instr->definitions[0].setPrecise(true);
|
||||
instr->valu().opsel_hi[i] = true;
|
||||
if (conv->isSDWA() && conv->sdwa().sel[0].offset() == 2)
|
||||
instr->valu().opsel_lo[i] = true;
|
||||
else
|
||||
instr->valu().opsel_lo[i] = conv->valu().opsel[0];
|
||||
bool neg = conv->valu().neg[0];
|
||||
bool abs = conv->valu().abs[0];
|
||||
if (!instr->valu().abs[i]) {
|
||||
instr->valu().neg[i] ^= neg;
|
||||
instr->valu().abs[i] = abs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we could possibly move the whole label_instruction pass to combine_instruction:
|
||||
// this would mean that we'd have to fix the instruction uses while value propagation
|
||||
|
||||
|
|
@ -4486,7 +4457,6 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
}
|
||||
|
||||
if (instr->isVALU()) {
|
||||
combine_mad_mix(ctx, instr);
|
||||
while (apply_omod_clamp(ctx, instr) || combine_output_conversion(ctx, instr))
|
||||
;
|
||||
apply_insert(ctx, instr);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue