aco: Combine v_cvt_u32_f32 with insert to v_cvt_pk_u8_f32.

No Foz-DB difference on Navi21.

Foz-DB GFX11:
Totals from 746 (0.55% of 134913) affected shaders:
CodeSize: 8430248 -> 8416128 (-0.17%); split: -0.17%, +0.00%
Instrs: 1617202 -> 1614707 (-0.15%)
Latency: 13943398 -> 13934161 (-0.07%); split: -0.07%, +0.00%
InvThroughput: 2601620 -> 2596624 (-0.19%); split: -0.20%, +0.01%
Copies: 114346 -> 114334 (-0.01%); split: -0.01%, +0.00%
PreVGPRs: 48314 -> 48312 (-0.00%)

Signed-off-by: Georg Lehmann <dadschoorse@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18492>
This commit is contained in:
Georg Lehmann 2022-09-08 11:24:27 +02:00 committed by Marge Bot
parent 5a3cc2d453
commit 6d02054047

View file

@ -609,15 +609,15 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
void
to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned add_operands = 0)
{
if (instr->isVOP3())
return;
aco_ptr<Instruction> tmp = std::move(instr);
Format format = asVOP3(tmp->format);
instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(),
tmp->definitions.size()));
instr.reset(create_instruction<VOP3_instruction>(
tmp->opcode, format, tmp->operands.size() + add_operands, tmp->definitions.size()));
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
for (unsigned i = 0; i < instr->definitions.size(); i++) {
instr->definitions[i] = tmp->definitions[i];
@ -3200,13 +3200,22 @@ apply_insert(opt_ctx& ctx, aco_ptr<Instruction>& instr)
SubdwordSel sel = parse_insert(def_info.instr);
assert(sel);
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
return false;
if (instr->opcode == aco_opcode::v_cvt_u32_f32 && instr->format == Format::VOP1 &&
!sel.sign_extend() && sel.size() == 1) {
to_VOP3(ctx, instr, 2);
instr->format = Format::VOP3;
instr->opcode = aco_opcode::v_cvt_pk_u8_f32;
instr->operands[1] = Operand::c32(sel.offset());
instr->operands[2] = Operand::zero();
} else {
if (!can_use_SDWA(ctx.program->gfx_level, instr, true))
return false;
to_SDWA(ctx, instr);
if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
to_SDWA(ctx, instr);
if (instr->sdwa().dst_sel.size() != 4)
return false;
static_cast<SDWA_instruction*>(instr.get())->dst_sel = sel;
}
instr->definitions[0].swapTemp(def_info.instr->definitions[0]);
ctx.info[instr->definitions[0].tempId()].label = 0;