From 6d020540474b43c7e9a3175638bb04be68d80dbb Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Thu, 8 Sep 2022 11:24:27 +0200 Subject: [PATCH] aco: Combine v_cvt_u32_f32 with insert to v_cvt_pk_u8_f32. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No Foz-DB difference on Navi21. Foz-DB GFX11: Totals from 746 (0.55% of 134913) affected shaders: CodeSize: 8430248 -> 8416128 (-0.17%); split: -0.17%, +0.00% Instrs: 1617202 -> 1614707 (-0.15%) Latency: 13943398 -> 13934161 (-0.07%); split: -0.07%, +0.00% InvThroughput: 2601620 -> 2596624 (-0.19%); split: -0.20%, +0.01% Copies: 114346 -> 114334 (-0.01%); split: -0.01%, +0.00% PreVGPRs: 48314 -> 48312 (-0.00%) Signed-off-by: Georg Lehmann Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 0c128dbec88..f4c219aca2a 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -609,15 +609,15 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr& instr) } void -to_VOP3(opt_ctx& ctx, aco_ptr& instr) +to_VOP3(opt_ctx& ctx, aco_ptr& instr, unsigned add_operands = 0) { if (instr->isVOP3()) return; aco_ptr tmp = std::move(instr); Format format = asVOP3(tmp->format); - instr.reset(create_instruction(tmp->opcode, format, tmp->operands.size(), - tmp->definitions.size())); + instr.reset(create_instruction( + tmp->opcode, format, tmp->operands.size() + add_operands, tmp->definitions.size())); std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); for (unsigned i = 0; i < instr->definitions.size(); i++) { instr->definitions[i] = tmp->definitions[i]; @@ -3200,13 +3200,22 @@ apply_insert(opt_ctx& ctx, aco_ptr& instr) SubdwordSel sel = parse_insert(def_info.instr); assert(sel); - if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) - return false; + if (instr->opcode == aco_opcode::v_cvt_u32_f32 && instr->format == Format::VOP1 && + !sel.sign_extend() && sel.size() == 1) { + to_VOP3(ctx, instr, 2); + instr->format = Format::VOP3; + instr->opcode = aco_opcode::v_cvt_pk_u8_f32; + instr->operands[1] = Operand::c32(sel.offset()); + instr->operands[2] = Operand::zero(); + } else { + if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) + return false; - to_SDWA(ctx, instr); - if (instr->sdwa().dst_sel.size() != 4) - return false; - static_cast(instr.get())->dst_sel = sel; + to_SDWA(ctx, instr); + if (instr->sdwa().dst_sel.size() != 4) + return false; + static_cast(instr.get())->dst_sel = sel; + } instr->definitions[0].swapTemp(def_info.instr->definitions[0]); ctx.info[instr->definitions[0].tempId()].label = 0;