diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 0c128dbec88..f4c219aca2a 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -609,15 +609,15 @@ can_apply_sgprs(opt_ctx& ctx, aco_ptr& instr) } void -to_VOP3(opt_ctx& ctx, aco_ptr& instr) +to_VOP3(opt_ctx& ctx, aco_ptr& instr, unsigned add_operands = 0) { if (instr->isVOP3()) return; aco_ptr tmp = std::move(instr); Format format = asVOP3(tmp->format); - instr.reset(create_instruction(tmp->opcode, format, tmp->operands.size(), - tmp->definitions.size())); + instr.reset(create_instruction( + tmp->opcode, format, tmp->operands.size() + add_operands, tmp->definitions.size())); std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); for (unsigned i = 0; i < instr->definitions.size(); i++) { instr->definitions[i] = tmp->definitions[i]; @@ -3200,13 +3200,22 @@ apply_insert(opt_ctx& ctx, aco_ptr& instr) SubdwordSel sel = parse_insert(def_info.instr); assert(sel); - if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) - return false; + if (instr->opcode == aco_opcode::v_cvt_u32_f32 && instr->format == Format::VOP1 && + !sel.sign_extend() && sel.size() == 1) { + to_VOP3(ctx, instr, 2); + instr->format = Format::VOP3; + instr->opcode = aco_opcode::v_cvt_pk_u8_f32; + instr->operands[1] = Operand::c32(sel.offset()); + instr->operands[2] = Operand::zero(); + } else { + if (!can_use_SDWA(ctx.program->gfx_level, instr, true)) + return false; - to_SDWA(ctx, instr); - if (instr->sdwa().dst_sel.size() != 4) - return false; - static_cast(instr.get())->dst_sel = sel; + to_SDWA(ctx, instr); + if (instr->sdwa().dst_sel.size() != 4) + return false; + static_cast(instr.get())->dst_sel = sel; + } instr->definitions[0].swapTemp(def_info.instr->definitions[0]); ctx.info[instr->definitions[0].tempId()].label = 0;