diff --git a/.pick_status.json b/.pick_status.json index 33390442401..0aa89e35242 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1054,7 +1054,7 @@ "description": "aco: preserve bitsets after a lane mask is written", "nominated": false, "nomination_type": 3, - "resolution": 4, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 955d264603d..a6290a7f457 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1481,8 +1481,9 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& if (state.program->wave_size == 64 && instr->isSALU() && check_written_regs(instr, ctx.sgpr_read_by_valu_as_lanemask)) { - ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu = ctx.sgpr_read_by_valu_as_lanemask; - ctx.sgpr_read_by_valu_as_lanemask.reset(); + unsigned reg = instr->definitions[0].physReg().reg(); + for (unsigned i = 0; i < instr->definitions[0].size(); i++) + ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu[reg + i] = 1; } if (instr->isVALU()) { diff --git a/src/amd/compiler/tests/test_insert_nops.cpp b/src/amd/compiler/tests/test_insert_nops.cpp index b5cebde420c..275857dc4c6 100644 --- a/src/amd/compiler/tests/test_insert_nops.cpp +++ b/src/amd/compiler/tests/test_insert_nops.cpp @@ -1495,6 +1495,8 @@ BEGIN_TEST(insert_nops.setpc_gfx11) //! v1: %0:v[0] = v_cndmask_b32 0, 0, %0:vcc //! s1: %0:vcc_hi = s_mov_b32 0 //! s_waitcnt_depctr va_vdst(0) sa_sdst(0) + //! v1: %0:v[0] = v_xor3_b32 %0:v[0], %0:s[0], %0:s[0] + //! s_waitcnt_depctr va_vdst(0) //! s_setpc_b64 0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4)); bld.vop2(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(), @@ -1517,6 +1519,8 @@ BEGIN_TEST(insert_nops.setpc_gfx11) //! v1: %0:v[0] = v_cndmask_b32 0, 0, %0:vcc //! s2: %0:vcc = s_mov_b64 0 //! s_waitcnt_depctr va_vdst(0) sa_sdst(0) + //! v1: %0:v[0] = v_xor3_b32 %0:v[0], %0:s[0], %0:s[0] + //! s_waitcnt_depctr va_vdst(0) //! s_setpc_b64 0 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5)); bld.vop2(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),