aco/isel: split vector into dwords/words first

Foz-DB Navi48:
Totals from 361 (0.44% of 82405) affected shaders:
MaxWaves: 5806 -> 5832 (+0.45%)
Instrs: 2343746 -> 2343762 (+0.00%); split: -0.04%, +0.04%
CodeSize: 13270504 -> 13267116 (-0.03%); split: -0.10%, +0.08%
VGPRs: 42008 -> 41708 (-0.71%)
SpillVGPRs: 308 -> 303 (-1.62%)
Scratch: 1574656 -> 1574400 (-0.02%)
Latency: 26571385 -> 22602486 (-14.94%); split: -14.95%, +0.01%
InvThroughput: 5474157 -> 4614777 (-15.70%); split: -15.70%, +0.00%
VClause: 57512 -> 57515 (+0.01%); split: -0.03%, +0.03%
SClause: 56313 -> 56319 (+0.01%)
Copies: 251626 -> 248707 (-1.16%); split: -1.24%, +0.08%
Branches: 89620 -> 89614 (-0.01%)
PreVGPRs: 37361 -> 36910 (-1.21%); split: -1.21%, +0.01%
VALU: 1111534 -> 1108507 (-0.27%); split: -0.29%, +0.02%
SALU: 443684 -> 443687 (+0.00%); split: -0.00%, +0.00%
VMEM: 85287 -> 85277 (-0.01%)
VOPD: 97987 -> 98091 (+0.11%); split: +0.30%, -0.20%

Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39532>
This commit is contained in:
Georg Lehmann 2025-11-26 16:57:56 +01:00 committed by Marge Bot
parent 1a3e627223
commit a0c663378c

View file

@ -148,14 +148,24 @@ emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, RegClass dst_rc)
void
emit_split_vector(isel_context* ctx, Temp vec_src, unsigned num_components)
{
if (vec_src.size() == 1 && vec_src.type() == RegType::sgpr)
return;
unsigned comp_bytes = vec_src.bytes() / num_components;
assert(vec_src.bytes() % num_components == 0 && util_is_power_of_two_nonzero(comp_bytes));
if (num_components == 1)
return;
if (ctx->allocated_vec.find(vec_src.id()) != ctx->allocated_vec.end())
return;
if (num_components > vec_src.size() && vec_src.type() == RegType::sgpr) {
/* sub-dword split: should still help get_alu_src() */
emit_split_vector(ctx, vec_src, vec_src.size());
return;
if (comp_bytes < 4 && num_components > 2) {
/* sub-dword split: split into dwords/words first */
unsigned split_size = vec_src.size() == 1 ? 2 : 4;
if (vec_src.bytes() % split_size == 0) {
emit_split_vector(ctx, vec_src, vec_src.bytes() / split_size);
auto it = ctx->allocated_vec.find(vec_src.id());
for (unsigned i = 0; i < vec_src.bytes() / split_size; i++)
emit_split_vector(ctx, it->second[i], split_size / comp_bytes);
return;
}
}
RegClass rc = RegClass::get(vec_src.type(), vec_src.bytes() / num_components);
aco_ptr<Instruction> split{