From 96e7cd89eaccfb08a899a03e40781985ca5e2c3b Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 25 Sep 2024 12:12:56 +0100 Subject: [PATCH] aco: fix is_vector_intact for GFX11 BVH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fossil-db (navi31): Totals from 44 (0.06% of 79395) affected shaders: Instrs: 1539111 -> 1539109 (-0.00%); split: -0.00%, +0.00% CodeSize: 7880452 -> 7880380 (-0.00%); split: -0.00%, +0.00% Latency: 7578794 -> 7578844 (+0.00%); split: -0.00%, +0.00% InvThroughput: 1450872 -> 1450876 (+0.00%); split: -0.00%, +0.00% VClause: 40014 -> 40010 (-0.01%) Copies: 116005 -> 116001 (-0.00%); split: -0.01%, +0.01% VALU: 854630 -> 854626 (-0.00%); split: -0.00%, +0.00% Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index c956782ddb6..5ec96f3c407 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -1538,7 +1538,12 @@ compact_relocate_vars(ra_ctx& ctx, const std::vector& vars, bool is_vector_intact(ra_ctx& ctx, const RegisterFile& reg_file, const vector_info& vec_info) { + unsigned size = 0; + for (unsigned i = 0; i < vec_info.num_parts; i++) + size += vec_info.parts[i].bytes(); + PhysReg first{512}; + int offset = 0; for (unsigned i = 0; i < vec_info.num_parts; i++) { Operand op = vec_info.parts[i]; @@ -1547,21 +1552,23 @@ is_vector_intact(ra_ctx& ctx, const RegisterFile& reg_file, const vector_info& v if (first.reg() == 512) { PhysRegInterval bounds = get_reg_bounds(ctx, RegType::vgpr, false); - first = reg.advance(i * -4); - PhysRegInterval vec = PhysRegInterval{first, vec_info.num_parts}; + first = reg.advance(-offset); + PhysRegInterval vec = PhysRegInterval{first, DIV_ROUND_UP(size, 4)}; if (!bounds.contains(vec)) /* not enough space for other operands */ return false; } else { - if (reg != first.advance(i * 4)) /* not at the best position */ + if (reg != first.advance(offset)) /* not at the best position */ return false; } } else { /* If there's an unexpected temporary, this operand is unlikely to be * placed in the best position. */ - if (first.reg() != 512 && reg_file.test(first.advance(i * 4), 4)) + if (first.reg() != 512 && reg_file.test(first.advance(offset), op.bytes())) return false; } + + offset += op.bytes(); } return true;