diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index 96d1ccaa0b0..e8e11abf77b 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -311,8 +311,9 @@ void transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx)
    if (ctx.info[idx].exec.back().second & mask_type_wqm)
       return;
    if (ctx.info[idx].exec.back().second & mask_type_global) {
-      Temp exec_mask = ctx.info[idx].exec.back().first;
-      exec_mask = bld.sop1(Builder::s_wqm, bld.def(bld.lm, exec), bld.def(s1, scc), bld.exec(exec_mask));
+      Temp exec_mask = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm), Operand(exec, bld.lm));
+      ctx.info[idx].exec.back().first = exec_mask;
+      exec_mask = bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), Operand(exec, bld.lm));
       ctx.info[idx].exec.emplace_back(exec_mask, mask_type_global | mask_type_wqm);
       return;
    }
@@ -320,7 +321,8 @@ void transition_to_WQM(exec_ctx& ctx, Builder bld, unsigned idx)
    ctx.info[idx].exec.pop_back();
    assert(ctx.info[idx].exec.back().second & mask_type_wqm);
    assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
-   ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
+   assert(ctx.info[idx].exec.back().first.id());
+   ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
                                                 ctx.info[idx].exec.back().first);
 }
 
@@ -336,17 +338,24 @@ void transition_to_Exact(exec_ctx& ctx, Builder bld, unsigned idx)
       ctx.info[idx].exec.pop_back();
       assert(ctx.info[idx].exec.back().second & mask_type_exact);
       assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
-      ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
+      assert(ctx.info[idx].exec.back().first.id());
+      ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
                                                    ctx.info[idx].exec.back().first);
       return;
    }
    /* otherwise, we create an exact mask and push to the stack */
-   Temp wqm = ctx.info[idx].exec.back().first;
-   Temp exact = bld.tmp(bld.lm);
-   wqm = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
-                  bld.exec(Definition(exact)), ctx.info[idx].exec[0].first, bld.exec(wqm));
+   Temp wqm = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
+                       Definition(exec, bld.lm), ctx.info[idx].exec[0].first, Operand(exec, bld.lm));
    ctx.info[idx].exec.back().first = wqm;
-   ctx.info[idx].exec.emplace_back(exact, mask_type_exact);
+   ctx.info[idx].exec.emplace_back(Temp(0, bld.lm), mask_type_exact);
+}
+
+Operand get_exec_op(Temp t)
+{
+   if (t == Temp())
+      return Operand(exec, t.regClass());
+   else
+      return Operand(t);
 }
 
 unsigned add_coupling_code(exec_ctx& ctx, Block* block,
@@ -360,29 +369,27 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
    if (idx == 0) {
       aco_ptr<Instruction>& startpgm = block->instructions[0];
       assert(startpgm->opcode == aco_opcode::p_startpgm);
-      Temp exec_mask = startpgm->definitions.back().getTemp();
       bld.insert(std::move(startpgm));
 
       /* exec seems to need to be manually initialized with combined shaders */
       if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
-         bld.copy(bld.exec(Definition(exec_mask)), Operand(UINT32_MAX, bld.lm == s2));
-         instructions[0]->definitions.pop_back();
+         bld.copy(Definition(exec, bld.lm), Operand(UINT32_MAX, bld.lm == s2));
       }
 
       if (ctx.handle_wqm) {
-         ctx.info[0].exec.emplace_back(exec_mask, mask_type_global | mask_type_exact | mask_type_initial);
+         ctx.info[0].exec.emplace_back(Temp(0, bld.lm), mask_type_global | mask_type_exact | mask_type_initial);
          /* if this block only needs WQM, initialize already */
          if (ctx.info[0].block_needs == WQM)
             transition_to_WQM(ctx, bld, 0);
       } else {
          uint8_t mask = mask_type_global;
          if (ctx.program->needs_wqm) {
-            exec_mask = bld.sop1(Builder::s_wqm, bld.def(bld.lm, exec), bld.def(s1, scc), bld.exec(exec_mask));
+            bld.sop1(Builder::s_wqm, Definition(exec, bld.lm), bld.def(s1, scc), Operand(exec, bld.lm));
             mask |= mask_type_wqm;
          } else {
             mask |= mask_type_exact;
          }
-         ctx.info[0].exec.emplace_back(exec_mask, mask);
+         ctx.info[0].exec.emplace_back(Temp(0, bld.lm), mask);
       }
 
       return 1;
@@ -402,7 +409,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
          for (int i = 0; i < info.num_exec_masks - 1; i++) {
             phi.reset(create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1));
             phi->definitions[0] = bld.def(bld.lm);
-            phi->operands[0] = Operand(ctx.info[preds[0]].exec[i].first);
+            phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec[i].first);
             ctx.info[idx].exec[i].first = bld.insert(std::move(phi));
          }
       }
@@ -412,7 +419,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
          /* this phi might be trivial but ensures a parallelcopy on the loop header */
          aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
          phi->definitions[0] = bld.def(bld.lm);
-         phi->operands[0] = Operand(ctx.info[preds[0]].exec[info.num_exec_masks - 1].first);
+         phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec[info.num_exec_masks - 1].first);
          ctx.info[idx].exec.back().first = bld.insert(std::move(phi));
       }
 
@@ -421,8 +428,8 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
       if (info.has_divergent_continue)
          phi->definitions[0] = bld.def(bld.lm);
       else
-         phi->definitions[0] = bld.def(bld.lm, exec);
-      phi->operands[0] = Operand(ctx.info[preds[0]].exec.back().first);
+         phi->definitions[0] = Definition(exec, bld.lm);
+      phi->operands[0] = get_exec_op(ctx.info[preds[0]].exec.back().first);
       Temp loop_active = bld.insert(std::move(phi));
 
       if (info.has_divergent_break) {
@@ -442,7 +449,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
          }
          uint8_t mask_type = ctx.info[idx].exec.back().second & (mask_type_wqm | mask_type_exact);
          assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
-         ctx.info[idx].exec.emplace_back(bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
+         ctx.info[idx].exec.emplace_back(bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
                                                     ctx.info[idx].exec.back().first), mask_type);
       }
 
@@ -465,7 +472,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
             aco_ptr<Instruction>& phi = header->instructions[instr_idx];
             assert(phi->opcode == aco_opcode::p_linear_phi);
             for (unsigned i = 1; i < phi->operands.size(); i++)
-               phi->operands[i] = Operand(ctx.info[header_preds[i]].exec[instr_idx].first);
+               phi->operands[i] = get_exec_op(ctx.info[header_preds[i]].exec[instr_idx].first);
             instr_idx++;
          }
       }
@@ -474,14 +481,14 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
          aco_ptr<Instruction>& phi = header->instructions[instr_idx++];
          assert(phi->opcode == aco_opcode::p_linear_phi);
          for (unsigned i = 1; i < phi->operands.size(); i++)
-            phi->operands[i] = Operand(ctx.info[header_preds[i]].exec[info.num_exec_masks - 1].first);
+            phi->operands[i] = get_exec_op(ctx.info[header_preds[i]].exec[info.num_exec_masks - 1].first);
       }
 
       if (info.has_divergent_break) {
          aco_ptr<Instruction>& phi = header->instructions[instr_idx];
          assert(phi->opcode == aco_opcode::p_linear_phi);
          for (unsigned i = 1; i < phi->operands.size(); i++)
-            phi->operands[i] = Operand(ctx.info[header_preds[i]].exec[info.num_exec_masks].first);
+            phi->operands[i] = get_exec_op(ctx.info[header_preds[i]].exec[info.num_exec_masks].first);
       }
 
       assert(!(block->kind & block_kind_top_level) || info.num_exec_masks <= 2);
@@ -520,11 +527,11 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
             aco_ptr<Pseudo_instruction> phi{create_instruction<Pseudo_instruction>(aco_opcode::p_linear_phi, Format::PSEUDO, preds.size(), 1)};
             phi->definitions[0] = bld.def(bld.lm);
             if (exec_idx == info.num_exec_masks - 1u) {
-               phi->definitions[0].setFixed(exec);
+               phi->definitions[0] = Definition(exec, bld.lm);
                need_parallelcopy = false;
             }
             for (unsigned i = 0; i < phi->operands.size(); i++)
-               phi->operands[i] = Operand(ctx.info[preds[i]].exec[exec_idx].first);
+               phi->operands[i] = get_exec_op(ctx.info[preds[i]].exec[exec_idx].first);
             ctx.info[idx].exec.emplace_back(bld.insert(std::move(phi)), type);
          }
       }
@@ -553,13 +560,13 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
       }
 
       assert(ctx.info[idx].exec.back().first.size() == bld.lm.size());
-      if (need_parallelcopy) {
+      if (need_parallelcopy && get_exec_op(ctx.info[idx].exec.back().first).isTemp()) {
          /* only create this parallelcopy is needed, since the operand isn't
           * fixed to exec which causes the spiller to miscalculate register demand */
          /* TODO: Fix register_demand calculation for spilling on loop exits.
           * The problem is only mitigated because the register demand could be
           * higher if the exec phi doesn't get assigned to exec. */
-         ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec),
+         ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
                                                       ctx.info[idx].exec.back().first);
       }
 
@@ -582,16 +589,17 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
 
       /* create phis for diverged exec masks */
       for (unsigned i = 0; i < num_exec_masks; i++) {
-         bool in_exec = i == num_exec_masks - 1 && !(block->kind & block_kind_merge);
-         if (!in_exec && ctx.info[preds[0]].exec[i].first == ctx.info[preds[1]].exec[i].first) {
+         /* skip trivial phis */
+         if (ctx.info[preds[0]].exec[i].first == ctx.info[preds[1]].exec[i].first) {
             assert(ctx.info[preds[0]].exec[i].second == ctx.info[preds[1]].exec[i].second);
             ctx.info[idx].exec.emplace_back(ctx.info[preds[0]].exec[i]);
             continue;
          }
 
-         Temp phi = bld.pseudo(aco_opcode::p_linear_phi, in_exec ? bld.def(bld.lm, exec) : bld.def(bld.lm),
-                               ctx.info[preds[0]].exec[i].first,
-                               ctx.info[preds[1]].exec[i].first);
+         bool in_exec = i == num_exec_masks - 1 && !(block->kind & block_kind_merge);
+         Temp phi = bld.pseudo(aco_opcode::p_linear_phi, in_exec ? Definition(exec, bld.lm) : bld.def(bld.lm),
+                               get_exec_op(ctx.info[preds[0]].exec[i].first),
+                               get_exec_op(ctx.info[preds[1]].exec[i].first));
          uint8_t mask_type = ctx.info[preds[0]].exec[i].second & ctx.info[preds[1]].exec[i].second;
          ctx.info[idx].exec.emplace_back(phi, mask_type);
       }
@@ -620,10 +628,10 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
          transition_to_Exact(ctx, bld, idx);
    }
 
-   if (block->kind & block_kind_merge) {
+   if (block->kind & block_kind_merge && ctx.info[idx].exec.back().first != Temp()) {
       Temp restore = ctx.info[idx].exec.back().first;
       assert(restore.size() == bld.lm.size());
-      ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec), restore);
+      ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), restore);
    }
 
    return i;
@@ -671,20 +679,22 @@ void process_instructions(exec_ctx& ctx, Block* block,
          }
          int num = ctx.info[block->index].exec.size();
          assert(num);
-         Operand cond = instr->operands[0];
-         for (int i = num - 1; i >= 0; i--) {
+
+         /* discard from current exec */
+         const Operand cond = instr->operands[0];
+         Temp exit_cond = bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc),
+                                   Operand(exec, bld.lm), cond).def(1).getTemp();
+
+         /* discard from inner to outer exec mask on stack */
+         for (int i = num - 2; i >= 0; i--) {
             Instruction *andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
                                           ctx.info[block->index].exec[i].first, cond);
-            if (i == num - 1) {
-               andn2->operands[0].setFixed(exec);
-               andn2->definitions[0].setFixed(exec);
-            }
-            if (i == 0) {
-               instr->opcode = aco_opcode::p_exit_early_if;
-               instr->operands[0] = bld.scc(andn2->definitions[1].getTemp());
-            }
             ctx.info[block->index].exec[i].first = andn2->definitions[0].getTemp();
+            exit_cond = andn2->definitions[1].getTemp();
          }
+
+         instr->opcode = aco_opcode::p_exit_early_if;
+         instr->operands[0] = bld.scc(exit_cond);
          assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
 
       } else if (needs == WQM && state != WQM) {
@@ -720,7 +730,7 @@ void process_instructions(exec_ctx& ctx, Block* block,
             assert(exact_mask.second & mask_type_exact);
 
             instr.reset(create_instruction<SOP2_instruction>(bld.w64or32(Builder::s_andn2), Format::SOP2, 2, 2));
-            instr->operands[0] = Operand(ctx.info[block->index].exec.back().first); /* current exec */
+            instr->operands[0] = Operand(exec, bld.lm); /* current exec */
             instr->operands[1] = Operand(exact_mask.first);
             instr->definitions[0] = dst;
             instr->definitions[1] = bld.def(s1, scc);
@@ -735,18 +745,14 @@ void process_instructions(exec_ctx& ctx, Block* block,
          if (instr->operands[0].isConstant()) {
             assert(instr->operands[0].constantValue() == -1u);
             /* transition to exact and set exec to zero */
-            Temp old_exec = ctx.info[block->index].exec.back().first;
-            Temp new_exec = bld.tmp(bld.lm);
             exit_cond = bld.tmp(s1);
             cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.scc(Definition(exit_cond)),
-                            bld.exec(Definition(new_exec)), Operand(0u), bld.exec(old_exec));
+                            Definition(exec, bld.lm), Operand(0u), Operand(exec, bld.lm));
 
             num = ctx.info[block->index].exec.size() - 2;
-            if (ctx.info[block->index].exec.back().second & mask_type_exact) {
-               ctx.info[block->index].exec.back().first = new_exec;
-            } else {
+            if (!(ctx.info[block->index].exec.back().second & mask_type_exact)) {
                ctx.info[block->index].exec.back().first = cond;
-               ctx.info[block->index].exec.emplace_back(new_exec, mask_type_exact);
+               ctx.info[block->index].exec.emplace_back(Temp(0, bld.lm), mask_type_exact);
             }
          } else {
             /* demote_if: transition to exact */
@@ -761,8 +767,8 @@ void process_instructions(exec_ctx& ctx, Block* block,
                Instruction *andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
                                              ctx.info[block->index].exec[i].first, cond);
                if (i == (int)ctx.info[block->index].exec.size() - 1) {
-                  andn2->operands[0].setFixed(exec);
-                  andn2->definitions[0].setFixed(exec);
+                  andn2->operands[0] = Operand(exec, bld.lm);
+                  andn2->definitions[0] = Definition(exec, bld.lm);
                }
 
                ctx.info[block->index].exec[i].first = andn2->definitions[0].getTemp();
@@ -873,7 +879,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
    /* For normal breaks, this is the exec mask. For discard+break, it's the
     * old exec mask before it was zero'd.
     */
-   Operand break_cond = bld.exec(ctx.info[idx].exec.back().first);
+   Operand break_cond = Operand(exec, bld.lm);
 
    if (block->kind & block_kind_discard) {
 
@@ -890,17 +896,14 @@ void add_branch_code(exec_ctx& ctx, Block* block)
          num = ctx.info[idx].exec.size() - 1;
       }
 
-      Temp old_exec = ctx.info[idx].exec.back().first;
-      Temp new_exec = bld.tmp(bld.lm);
       Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
-                           bld.exec(Definition(new_exec)), Operand(0u), bld.exec(old_exec));
-      ctx.info[idx].exec.back().first = new_exec;
+                           Definition(exec, bld.lm), Operand(0u), Operand(exec, bld.lm));
 
       for (int i = num - 1; i >= 0; i--) {
          Instruction *andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
-                                       ctx.info[block->index].exec[i].first, cond);
+                                       get_exec_op(ctx.info[block->index].exec[i].first), cond);
          if (i == (int)ctx.info[idx].exec.size() - 1)
-            andn2->definitions[0].setFixed(exec);
+            andn2->definitions[0] = Definition(exec, bld.lm);
          if (i == 0)
             bld.pseudo(aco_opcode::p_exit_early_if, bld.scc(andn2->definitions[1].getTemp()));
          ctx.info[block->index].exec[i].first = andn2->definitions[0].getTemp();
@@ -925,8 +928,8 @@ void add_branch_code(exec_ctx& ctx, Block* block)
       }
 
       if (need_parallelcopy)
-         ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(bld.lm, exec), ctx.info[idx].exec.back().first);
-      bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.exec(ctx.info[idx].exec.back().first), block->linear_succs[1], block->linear_succs[0]);
+         ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), ctx.info[idx].exec.back().first);
+      bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]);
       return;
    }
 
@@ -961,19 +964,17 @@ void add_branch_code(exec_ctx& ctx, Block* block)
       if (ctx.info[idx].block_needs & Exact_Branch)
          transition_to_Exact(ctx, bld, idx);
 
-      Temp current_exec = ctx.info[idx].exec.back().first;
       uint8_t mask_type = ctx.info[idx].exec.back().second & (mask_type_wqm | mask_type_exact);
 
-      Temp then_mask = bld.tmp(bld.lm);
       Temp old_exec = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
-                               bld.exec(Definition(then_mask)), cond, bld.exec(current_exec));
+                               Definition(exec, bld.lm), cond, Operand(exec, bld.lm));
 
       ctx.info[idx].exec.back().first = old_exec;
 
       /* add next current exec to the stack */
-      ctx.info[idx].exec.emplace_back(then_mask, mask_type);
+      ctx.info[idx].exec.emplace_back(Temp(0, bld.lm), mask_type);
 
-      bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), bld.exec(then_mask), block->linear_succs[1], block->linear_succs[0]);
+      bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]);
       return;
    }
 
@@ -981,17 +982,11 @@ void add_branch_code(exec_ctx& ctx, Block* block)
       // exec = s_andn2_b64 (original_exec, exec)
       assert(block->instructions.back()->opcode == aco_opcode::p_cbranch_nz);
       block->instructions.pop_back();
-      Temp then_mask = ctx.info[idx].exec.back().first;
-      uint8_t mask_type = ctx.info[idx].exec.back().second;
-      ctx.info[idx].exec.pop_back();
-      Temp orig_exec = ctx.info[idx].exec.back().first;
-      Temp else_mask = bld.sop2(Builder::s_andn2, bld.def(bld.lm, exec),
-                                bld.def(s1, scc), orig_exec, bld.exec(then_mask));
+      assert(ctx.info[idx].exec.size() >= 2);
+      Temp orig_exec = ctx.info[idx].exec[ctx.info[idx].exec.size() - 2].first;
+      bld.sop2(Builder::s_andn2, Definition(exec, bld.lm), bld.def(s1, scc), orig_exec, Operand(exec, bld.lm));
 
-      /* add next current exec to the stack */
-      ctx.info[idx].exec.emplace_back(else_mask, mask_type);
-
-      bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), bld.exec(else_mask), block->linear_succs[1], block->linear_succs[0]);
+      bld.branch(aco_opcode::p_cbranch_z, bld.hint_vcc(bld.def(s2)), Operand(exec, bld.lm), block->linear_succs[1], block->linear_succs[0]);
       return;
    }
 
@@ -1016,7 +1011,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
       unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
       Block& succ = ctx.program->blocks[succ_idx];
       if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
-         ctx.info[idx].exec.back().first = bld.copy(bld.def(bld.lm, exec), Operand(0u, bld.lm == s2));
+         bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2));
       }
 
       bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]);
@@ -1027,7 +1022,6 @@ void add_branch_code(exec_ctx& ctx, Block* block)
       assert(block->instructions.back()->opcode == aco_opcode::p_branch);
       block->instructions.pop_back();
 
-      Temp current_exec = ctx.info[idx].exec.back().first;
       Temp cond = Temp();
       for (int exec_idx = ctx.info[idx].exec.size() - 2; exec_idx >= 0; exec_idx--) {
          if (ctx.info[idx].exec[exec_idx].second & mask_type_loop)
@@ -1035,7 +1029,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
          cond = bld.tmp(s1);
          Temp exec_mask = ctx.info[idx].exec[exec_idx].first;
          exec_mask = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.scc(Definition(cond)),
-                              exec_mask, bld.exec(current_exec));
+                              exec_mask, Operand(exec, bld.lm));
          ctx.info[idx].exec[exec_idx].first = exec_mask;
       }
       assert(cond != Temp());
@@ -1045,7 +1039,7 @@ void add_branch_code(exec_ctx& ctx, Block* block)
       unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0];
       Block& succ = ctx.program->blocks[succ_idx];
       if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) {
-         ctx.info[idx].exec.back().first = bld.copy(bld.def(bld.lm, exec), Operand(0u, bld.lm == s2));
+         bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2));
       }
 
       bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), block->linear_succs[1], block->linear_succs[0]);