From 8a024c985f1c8ab487fc37c8892b455b832c05f0 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Tue, 15 Aug 2023 11:45:01 +0100 Subject: [PATCH] aco: fix p_bpermute_gfx6's exec save/restore with wave32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Samuel Pitoiset Reviewed-by: Daniel Schürmann Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_lower_to_hw_instr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 5334140b443..f34ca358b6d 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1093,7 +1093,7 @@ emit_gfx6_bpermute(Program* program, aco_ptr& instr, Builder& bld) assert(input.physReg() != dst.physReg()); /* Save original EXEC */ - bld.sop1(aco_opcode::s_mov_b64, temp_exec, Operand(exec, s2)); + bld.sop1(Builder::s_mov, temp_exec, Operand(exec, bld.lm)); /* An "unrolled loop" that is executed per each lane. * This takes only a few instructions per lane, as opposed to a "real" loop @@ -1108,7 +1108,7 @@ emit_gfx6_bpermute(Program* program, aco_ptr& instr, Builder& bld) /* On the active lane, move the data we read from lane N to the destination VGPR */ bld.vop1(aco_opcode::v_mov_b32, dst, Operand(vcc, s1)); /* Restore original EXEC */ - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(temp_exec.physReg(), s2)); + bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(temp_exec.physReg(), bld.lm)); } /* RA assumes that the result is always in the low part of the register, so we have to shift,