From d70688492c565ad89b9c51c1a2c5fda57f7c49c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Wed, 16 Mar 2022 15:14:29 +0100 Subject: [PATCH] aco/optimizer: re-combine and copy-propagate p_create_vector(p_split_vector) Totals from 309 (0.23% of 134913) affected shaders: (GFX10.3) CodeSize: 1853812 -> 1857732 (+0.21%); split: -0.05%, +0.27% Instrs: 340810 -> 341789 (+0.29%); split: -0.07%, +0.36% Latency: 3301814 -> 3301774 (-0.00%); split: -0.02%, +0.02% InvThroughput: 590473 -> 590914 (+0.07%); split: -0.00%, +0.08% Copies: 28751 -> 29731 (+3.41%); split: -0.87%, +4.28% PreSGPRs: 14010 -> 14028 (+0.13%); split: -0.01%, +0.14% Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_optimizer.cpp | 32 +++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index cddc3f194b1..104dea4041f 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -123,6 +123,7 @@ enum Label { label_dpp8 = 1ull << 36, label_f2f32 = 1ull << 37, label_f2f16 = 1ull << 38, + label_split = 1ull << 39, }; static constexpr uint64_t instr_usedef_labels = @@ -132,7 +133,7 @@ static constexpr uint64_t instr_usedef_labels = static constexpr uint64_t instr_mod_labels = label_omod2 | label_omod4 | label_omod5 | label_clamp | label_insert | label_f2f16; -static constexpr uint64_t instr_labels = instr_usedef_labels | instr_mod_labels; +static constexpr uint64_t instr_labels = instr_usedef_labels | instr_mod_labels | label_split; static constexpr uint64_t temp_labels = label_abs | label_neg | label_temp | label_vcc | label_b2f | label_uniform_bool | label_scc_invert | label_b2i | label_fcanonicalize; @@ -485,6 +486,14 @@ struct ssa_info { bool is_dpp() { return label & (label_dpp16 | label_dpp8); } bool is_dpp16() { return label & label_dpp16; } bool is_dpp8() { return label & label_dpp8; } + + void set_split(Instruction* split) + { + add_label(label_split); + instr = split; + } + + bool is_split() { return label & label_split; } }; struct opt_ctx { @@ -1502,6 +1511,16 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) } } ctx.info[instr->definitions[0].tempId()].set_vec(instr.get()); + + if (instr->operands.size() == 2) { + /* check if this is created from split_vector */ + if (instr->operands[1].isTemp() && ctx.info[instr->operands[1].tempId()].is_split()) { + Instruction* split = ctx.info[instr->operands[1].tempId()].instr; + if (instr->operands[0].isTemp() && + instr->operands[0].getTemp() == split->definitions[0].getTemp()) + ctx.info[instr->definitions[0].tempId()].set_temp(split->operands[0].getTemp()); + } + } break; } case aco_opcode::p_split_vector: { @@ -1516,11 +1535,14 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) } break; } else if (!info.is_vec()) { - if (instr->operands[0].bytes() == 4 && instr->definitions.size() == 2) { - /* D16 subdword split */ - ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); - if (instr->definitions[1].bytes() == 2) + if (instr->definitions.size() == 2 && instr->operands[0].isTemp() && + instr->definitions[0].bytes() == instr->definitions[1].bytes()) { + ctx.info[instr->definitions[1].tempId()].set_split(instr.get()); + if (instr->operands[0].bytes() == 4) { + /* D16 subdword split */ + ctx.info[instr->definitions[0].tempId()].set_temp(instr->operands[0].getTemp()); ctx.info[instr->definitions[1].tempId()].set_extract(instr.get()); + } } break; }