diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index cddc3f194b1..104dea4041f 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -123,6 +123,7 @@ enum Label { label_dpp8 = 1ull << 36, label_f2f32 = 1ull << 37, label_f2f16 = 1ull << 38, + label_split = 1ull << 39, }; static constexpr uint64_t instr_usedef_labels = @@ -132,7 +133,7 @@ static constexpr uint64_t instr_usedef_labels = static constexpr uint64_t instr_mod_labels = label_omod2 | label_omod4 | label_omod5 | label_clamp | label_insert | label_f2f16; -static constexpr uint64_t instr_labels = instr_usedef_labels | instr_mod_labels; +static constexpr uint64_t instr_labels = instr_usedef_labels | instr_mod_labels | label_split; static constexpr uint64_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | label_scc_invert | label_b2i | label_fcanonicalize; @@ -485,6 +486,14 @@ struct ssa_info { bool is_dpp() { return label & (label_dpp16 | label_dpp8); } bool is_dpp16() { return label & label_dpp16; } bool is_dpp8() { return label & label_dpp8; } + + void set_split(Instruction* split) + { + add_label(label_split); + instr = split; + } + + bool is_split() { return label & label_split; } }; struct opt_ctx { @@ -1502,6 +1511,16 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) } } ctx.info[instr->definitions[0].tempId()].set_vec(instr.get()); + + if (instr->operands.size() == 2) { + /* check if this is created from split_vector */ + if (instr->operands[1].isTemp() && ctx.info[instr->operands[1].tempId()].is_split()) { + Instruction* split = ctx.info[instr->operands[1].tempId()].instr; + if (instr->operands[0].isTemp() && + instr->operands[0].getTemp() == split->definitions[0].getTemp()) + ctx.info[instr->definitions[0].tempId()].set_temp(split->operands[0].getTemp()); + } + } break; } case aco_opcode::p_split_vector: { @@ -1516,11 +1535,14 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) } break; } else if (!info.is_vec()) { - if (instr->operands[0].bytes() == 4 && instr->definitions.size() == 2) { - /* D16 subdword split */ - ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); - if (instr->definitions[1].bytes() == 2) + if (instr->definitions.size() == 2 && instr->operands[0].isTemp() && + instr->definitions[0].bytes() == instr->definitions[1].bytes()) { + ctx.info[instr->definitions[1].tempId()].set_split(instr.get()); + if (instr->operands[0].bytes() == 4) { + /* D16 subdword split */ + ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); ctx.info[instr->definitions[1].tempId()].set_extract(instr.get()); + } } break; }