diff --git a/.pick_status.json b/.pick_status.json index 5208dd7b037..3285f76b9a5 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -4054,7 +4054,7 @@ "description": "aco: fix p_bpermute_gfx6 with input at non-zero byte", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index ea43c3f0cef..efa27872629 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1109,6 +1109,15 @@ emit_gfx6_bpermute(Program* program, aco_ptr& instr, Builder& bld) /* Restore original EXEC */ bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(temp_exec.physReg(), s2)); } + + /* RA assumes that the result is always in the low part of the register, so we have to shift, + * if it's not there already. + */ + if (input.physReg().byte()) { + unsigned right_shift = input.physReg().byte() * 8; + bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand::c32(right_shift), + Operand(dst.physReg(), v1)); + } } struct copy_operation {