diff --git a/src/intel/compiler/brw_fs_lower_simd_width.cpp b/src/intel/compiler/brw_fs_lower_simd_width.cpp
index 97b26dea55f..36b439e6611 100644
--- a/src/intel/compiler/brw_fs_lower_simd_width.cpp
+++ b/src/intel/compiler/brw_fs_lower_simd_width.cpp
@@ -461,16 +461,10 @@ emit_unzip(const fs_builder &lbld, fs_inst *inst, unsigned i)
    const fs_reg src = horiz_offset(inst->src[i], lbld.group() - inst->group);
 
    if (needs_src_copy(lbld, inst, i)) {
-      /* Builder of the right width to perform the copy avoiding uninitialized
-       * data if the lowered execution size is greater than the original
-       * execution size of the instruction.
-       */
-      const fs_builder cbld = lbld.group(MIN2(lbld.dispatch_width(),
-                                              inst->exec_size), 0);
       const fs_reg tmp = lbld.vgrf(inst->src[i].type, inst->components_read(i));
 
       for (unsigned k = 0; k < inst->components_read(i); ++k)
-         cbld.MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, k));
+         lbld.MOV(offset(tmp, lbld, k), offset(src, inst->exec_size, k));
 
       return tmp;
 
@@ -507,13 +501,6 @@ needs_dst_copy(const fs_builder &lbld, const fs_inst *inst)
    if (inst->size_written > inst->dst.component_size(inst->exec_size))
       return true;
 
-   /* If the lowered execution size is larger than the original the result of
-    * the instruction won't fit in the original destination, so we'll have to
-    * allocate a temporary in any case.
-    */
-   if (lbld.dispatch_width() > inst->exec_size)
-      return true;
-
    for (unsigned i = 0; i < inst->sources; i++) {
       /* If we already made a copy of the source for other reasons there won't
        * be any overlap with the destination.
@@ -578,24 +565,15 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
        * destination into the temporary before emitting the lowered
        * instruction.
        */
-      const fs_builder gbld_before =
-         lbld_before.group(MIN2(lbld_before.dispatch_width(),
-                                inst->exec_size), 0);
       for (unsigned k = 0; k < dst_size; ++k) {
-         gbld_before.MOV(offset(tmp, lbld_before, k),
+         lbld_before.MOV(offset(tmp, lbld_before, k),
                          offset(dst, inst->exec_size, k));
       }
    }
 
-   const fs_builder gbld_after =
-      lbld_after.group(MIN2(lbld_after.dispatch_width(),
-                            inst->exec_size), 0);
    for (unsigned k = 0; k < dst_size; ++k) {
-      /* Use a builder of the right width to perform the copy avoiding
-       * uninitialized data if the lowered execution size is greater than the
-       * original execution size of the instruction.
-       */
-      gbld_after.MOV(offset(dst, inst->exec_size, k),
+      /* Copy the (split) temp into the original (larger) destination */
+      lbld_after.MOV(offset(dst, inst->exec_size, k),
                      offset(tmp, lbld_after, k));
    }
 
@@ -606,14 +584,14 @@ emit_zip(const fs_builder &lbld_before, const fs_builder &lbld_after,
        * have to build a single 32bit value for the SIMD32 message out of 2
        * SIMD16 16 bit values.
        */
-      const fs_builder rbld = gbld_after.exec_all().group(1, 0);
+      const fs_builder rbld = lbld_after.exec_all().group(1, 0);
       fs_reg local_res_reg = component(
          retype(offset(tmp, lbld_before, dst_size),
                 BRW_REGISTER_TYPE_UW), 0);
       fs_reg final_res_reg =
          retype(byte_offset(inst->dst,
                             inst->size_written - residency_size +
-                            gbld_after.group() / 8),
+                            lbld_after.group() / 8),
                 BRW_REGISTER_TYPE_UW);
       rbld.MOV(final_res_reg, local_res_reg);
    }
@@ -629,20 +607,19 @@ brw_fs_lower_simd_width(fs_visitor &s)
    foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
       const unsigned lower_width = brw_fs_get_lowered_simd_width(&s, inst);
 
-      if (lower_width != inst->exec_size) {
-         /* Builder matching the original instruction.  We may also need to
-          * emit an instruction of width larger than the original, set the
-          * execution size of the builder to the highest of both for now so
-          * we're sure that both cases can be handled.
-          */
-         const unsigned max_width = MAX2(inst->exec_size, lower_width);
+      /* No splitting required */
+      if (lower_width == inst->exec_size)
+         continue;
 
-         const fs_builder bld =
-            fs_builder(&s, MAX2(max_width, s.dispatch_width)).at_end();
-         const fs_builder ibld = bld.at(block, inst)
-                                    .exec_all(inst->force_writemask_all)
-                                    .group(max_width, inst->group / max_width);
+      assert(lower_width < inst->exec_size);
 
+      /* Builder matching the original instruction. */
+      const fs_builder bld = fs_builder(&s).at_end();
+      const fs_builder ibld =
+         bld.at(block, inst).exec_all(inst->force_writemask_all)
+            .group(inst->exec_size, inst->group / inst->exec_size);
+
+      {
          /* Split the copies in chunks of the execution width of either the
           * original or the lowered instruction, whichever is lower.
           */