From 8e817cf52ba5169e3ca6692a9b4290e0445004b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= Date: Tue, 14 May 2024 16:20:36 +0200 Subject: [PATCH] aco/ra: refactor get_reg_simple() with increased stride. This should avoid some redundant calls. Totals from 153 (0.19% of 79395) affected shaders: (GFX11) Instrs: 301717 -> 301687 (-0.01%); split: -0.06%, +0.05% CodeSize: 1583080 -> 1582988 (-0.01%); split: -0.06%, +0.05% VGPRs: 10068 -> 10348 (+2.78%) Latency: 6685446 -> 6685475 (+0.00%); split: -0.11%, +0.11% InvThroughput: 999241 -> 999316 (+0.01%); split: -0.01%, +0.02% VClause: 3868 -> 3870 (+0.05%) Copies: 23752 -> 23769 (+0.07%); split: -0.27%, +0.34% Branches: 6479 -> 6480 (+0.02%) VALU: 179290 -> 179307 (+0.01%); split: -0.04%, +0.04% Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 4264c1e6281..9b74a3be724 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -900,15 +900,14 @@ get_reg_simple(ra_ctx& ctx, const RegisterFile& reg_file, DefInfo info) uint32_t stride = info.rc.is_subdword() ? DIV_ROUND_UP(info.stride, 4) : info.stride; RegClass rc = info.rc; - DefInfo new_info = info; - new_info.rc = RegClass(rc.type(), size); - for (unsigned new_stride = 16; new_stride > stride; new_stride /= 2) { - if (size % new_stride) - continue; - new_info.stride = new_stride; - std::optional res = get_reg_simple(ctx, reg_file, new_info); - if (res) - return res; + if (stride < size && !rc.is_subdword()) { + DefInfo new_info = info; + new_info.stride = stride * 2; + if (size % new_info.stride == 0) { + std::optional res = get_reg_simple(ctx, reg_file, new_info); + if (res) + return res; + } } auto is_free = [&](PhysReg reg_index)