From 2d5478fc3fe45791e005ca30f84208553490df5c Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 9 Apr 2026 14:00:15 +0100 Subject: [PATCH] aco/ra: fix v3b VALU at byte>0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can happen with v_cndmask_b32, if we were required to take the sub-dword path in get_reg_simple(). Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_register_allocation.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 438b432df9c..003c1e317cf 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -645,7 +645,9 @@ get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr& return rc.bytes() % 2 == 0 ? 2 : 1; } - assert(rc.bytes() <= 2); + if (rc.bytes() > 2) + return 4; + if (instr->isVALU()) { if (can_use_SDWA(gfx_level, instr, false)) return rc.bytes(); @@ -766,8 +768,11 @@ DefInfo::get_subdword_definition_info(Program* program, const aco_ptrisVALU()) { - if (rc.bytes() == 3) + if (rc.bytes() == 3) { rc = v1; + stride = 4; + return; + } if (can_use_SDWA(gfx_level, instr, false)) return;