From 623230a6efa25677173d59e648b84b7fc47bffe5 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 22 Apr 2025 12:40:08 +0100 Subject: [PATCH] aco/ra: change sorting in compact_relocate_vars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit D16 MIMG or pseudo-scalar transcendental instructions might give lower limits to the maximum register available for their definitions, so just try to place them earlier. This is also part of fixing compact_relocate_vars with aligned NPOT def/killed-op space (the second part is the later commit which changes get_stride()). Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 33 ++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index bc9674a53f4..942730ad346 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1525,17 +1525,32 @@ compact_relocate_vars(ra_ctx& ctx, const std::vector& vars, std::sort( sorted.begin(), sorted.end(), - [&ctx](const IDAndInfo& a, const IDAndInfo& b) + [=, &ctx](const IDAndInfo& a, const IDAndInfo& b) { - unsigned a_stride = a.info.stride * (a.info.rc.is_subdword() ? 1 : 4); - unsigned b_stride = b.info.stride * (b.info.rc.is_subdword() ? 1 : 4); - if (a_stride > b_stride) - return true; - if (a_stride < b_stride) - return false; + unsigned a_stride = MAX2(a.info.stride * (a.info.rc.is_subdword() ? 1 : 4), 4); + unsigned b_stride = MAX2(b.info.stride * (b.info.rc.is_subdword() ? 1 : 4), 4); + /* Since the SGPR bounds should always be a multiple of two, we can place + * variables in this order: + * - the usual 4 SGPR aligned variables + * - then the 0xffffffff variable + * - then the unaligned variables + * - and finally the 2 SGPR aligned variables + * This way, we should always be able to place variables if the 0xffffffff one + * had a NPOT size. + * + * This also lets us avoid placing the 0xffffffff variable in VCC if it's s1/s2 + * (required for pseudo-scalar transcendental) and places it first if it's a + * VGPR variable (required for ImageGather4D16Bug). + */ + assert(a.info.rc.type() != RegType::sgpr || get_reg_bounds(ctx, a.info.rc).size % 2 == 0); + assert(a_stride == 16 || a_stride == 8 || a_stride == 4); + assert(b_stride == 16 || b_stride == 8 || b_stride == 4); + if ((a_stride == 16) != (b_stride == 16)) + return a_stride > b_stride; if (a.id == 0xffffffff || b.id == 0xffffffff) - return a.id == - 0xffffffff; /* place 0xffffffff before others if possible, not for any reason */ + return a.id == 0xffffffff; + if (a_stride != b_stride) + return a_stride < b_stride; return ctx.assignments[a.id].reg < ctx.assignments[b.id].reg; });