From 01d20680e2e6ef9a9de8a1b4ec66cd569f3b7535 Mon Sep 17 00:00:00 2001 From: Georg Lehmann Date: Sun, 22 Jun 2025 15:07:20 +0200 Subject: [PATCH] aco/optimizer: generalize p_create_vector of split vector opt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Foz-DB Navi48: Totals from 116 (0.14% of 80251) affected shaders: MaxWaves: 2965 -> 2972 (+0.24%) Instrs: 145933 -> 144632 (-0.89%); split: -0.91%, +0.02% CodeSize: 815968 -> 806512 (-1.16%); split: -1.20%, +0.04% VGPRs: 7240 -> 7144 (-1.33%); split: -1.66%, +0.33% Latency: 3065858 -> 3063802 (-0.07%); split: -0.11%, +0.05% InvThroughput: 745395 -> 743506 (-0.25%); split: -0.26%, +0.01% VClause: 3702 -> 3694 (-0.22%); split: -0.65%, +0.43% SClause: 3187 -> 3191 (+0.13%) Copies: 12716 -> 11804 (-7.17%); split: -7.42%, +0.25% Branches: 3501 -> 3503 (+0.06%) PreVGPRs: 5400 -> 5327 (-1.35%); split: -1.41%, +0.06% VALU: 76455 -> 75492 (-1.26%); split: -1.30%, +0.04% SALU: 23594 -> 23595 (+0.00%); split: -0.00%, +0.01% VOPD: 1478 -> 1527 (+3.32%); split: +4.67%, -1.35% Mostly helps FSR4. Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_optimizer.cpp | 67 +++++++++++++++++++----------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index d392b57e527..0a8fa65efae 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -1609,37 +1609,56 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) } } + offset = 0; + for (unsigned i = 0; i < ops.size(); i++) { + if (ops[i].isTemp()) { + if (ctx.info[ops[i].tempId()].is_temp() && + ops[i].regClass() == ctx.info[ops[i].tempId()].temp.regClass()) { + ops[i].setTemp(ctx.info[ops[i].tempId()].temp); + } + + /* If this and the following operands make up all definitions of a `p_split_vector`, + * replace them with the operand of the `p_split_vector` instruction. + */ + Instruction* parent = ctx.info[ops[i].tempId()].parent_instr; + if (parent->opcode == aco_opcode::p_split_vector && + (offset % 4 == 0 || parent->operands[0].bytes() < 4) && + parent->definitions.size() <= ops.size() - i) { + copy_prop = true; + for (unsigned j = 0; copy_prop && j < parent->definitions.size(); j++) { + copy_prop &= ops[i + j].isTemp() && + ops[i + j].getTemp() == parent->definitions[j].getTemp(); + } + + if (copy_prop) { + ops.erase(ops.begin() + i + 1, ops.begin() + i + parent->definitions.size()); + ops[i] = parent->operands[0]; + } + } + } + + offset += ops[i].bytes(); + } + /* combine expanded operands to new vector */ - if (ops.size() != instr->operands.size()) { - assert(ops.size() > instr->operands.size()); + if (ops.size() <= instr->operands.size()) { + while (instr->operands.size() > ops.size()) + instr->operands.pop_back(); + + if (ops.size() == 1) { + instr->opcode = aco_opcode::p_parallelcopy; + if (ops[0].isTemp()) + ctx.info[instr->definitions[0].tempId()].set_temp(ops[0].getTemp()); + } + } else { Definition def = instr->definitions[0]; instr.reset( create_instruction(aco_opcode::p_create_vector, Format::PSEUDO, ops.size(), 1)); - for (unsigned i = 0; i < ops.size(); i++) { - if (ops[i].isTemp() && ctx.info[ops[i].tempId()].is_temp() && - ops[i].regClass() == ctx.info[ops[i].tempId()].temp.regClass()) - ops[i].setTemp(ctx.info[ops[i].tempId()].temp); - instr->operands[i] = ops[i]; - } instr->definitions[0] = def; - } else { - for (unsigned i = 0; i < ops.size(); i++) { - assert(instr->operands[i] == ops[i]); - } } - if (instr->operands.size() == 2 && instr->operands[1].isTemp()) { - /* check if this is created from split_vector */ - ssa_info& info = ctx.info[instr->operands[1].tempId()]; - if (info.parent_instr->opcode == aco_opcode::p_split_vector) { - Instruction* split = info.parent_instr; - if (instr->operands[0].isTemp() && - instr->operands[0].getTemp() == split->definitions[0].getTemp() && - instr->operands[1].getTemp() == split->definitions[1].getTemp() && - instr->definitions[0].regClass() == split->operands[0].regClass()) - ctx.info[instr->definitions[0].tempId()].set_temp(split->operands[0].getTemp()); - } - } + for (unsigned i = 0; i < ops.size(); i++) + instr->operands[i] = ops[i]; break; } case aco_opcode::p_split_vector: {