diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 80f844fa9c3..a794ed452a2 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -3078,6 +3078,22 @@ fs_visitor::opt_algebraic()
    return progress;
 }
 
+static unsigned
+load_payload_sources_read_for_size(fs_inst *lp, unsigned size_read)
+{
+   assert(lp->opcode == SHADER_OPCODE_LOAD_PAYLOAD);
+   assert(size_read >= lp->header_size * REG_SIZE);
+
+   unsigned i;
+   unsigned size = lp->header_size * REG_SIZE;
+   for (i = lp->header_size; size < size_read && i < lp->sources; i++)
+      size += lp->exec_size * type_sz(lp->src[i].type);
+
+   /* Size read must cover exactly a subset of sources. */
+   assert(size == size_read);
+   return i;
+}
+
 /**
  * Optimize sample messages that have constant zero values for the trailing
  * texture coordinates. We can just reduce the message length for these
@@ -3155,23 +3171,14 @@ fs_visitor::opt_split_sends()
 
    bool progress = false;
 
-   const fs_live_variables &live = live_analysis.require();
-
-   int next_ip = 0;
-
-   foreach_block_and_inst_safe(block, fs_inst, send, cfg) {
-      int ip = next_ip;
-      next_ip++;
-
+   foreach_block_and_inst(block, fs_inst, send, cfg) {
       if (send->opcode != SHADER_OPCODE_SEND ||
           send->mlen <= reg_unit(devinfo) || send->ex_mlen > 0)
          continue;
 
-      /* Don't split payloads which are also read later. */
       assert(send->src[2].file == VGRF);
-      if (live.vgrf_end[send->src[2].nr] > ip)
-         continue;
 
+      /* Currently don't split sends that reuse a previously used payload. */
       fs_inst *lp = (fs_inst *) send->prev;
 
       if (lp->is_head_sentinel() || lp->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
@@ -3183,37 +3190,46 @@ fs_visitor::opt_split_sends()
       /* Split either after the header (if present), or when consecutive
        * sources switch from one VGRF to a different one.
        */
-      unsigned i = lp->header_size;
-      if (lp->header_size == 0) {
-         for (i = 1; i < lp->sources; i++) {
-            if (lp->src[i].file == BAD_FILE)
+      unsigned mid = lp->header_size;
+      if (mid == 0) {
+         for (mid = 1; mid < lp->sources; mid++) {
+            if (lp->src[mid].file == BAD_FILE)
                continue;
 
-            if (lp->src[0].file != lp->src[i].file ||
-                lp->src[0].nr != lp->src[i].nr)
+            if (lp->src[0].file != lp->src[mid].file ||
+                lp->src[0].nr != lp->src[mid].nr)
                break;
          }
       }
 
-      if (i != lp->sources) {
-         const fs_builder ibld(this, block, lp);
-         fs_inst *lp2 =
-            ibld.LOAD_PAYLOAD(lp->dst, &lp->src[i], lp->sources - i, 0);
+      /* SEND mlen might be smaller than what LOAD_PAYLOAD provides, so
+       * find out how many sources from the payload does it really need.
+       */
+      const unsigned end =
+         load_payload_sources_read_for_size(lp, send->mlen * REG_SIZE);
 
-         lp->resize_sources(i);
-         lp->size_written -= lp2->size_written;
+      /* Nothing to split. */
+      if (end <= mid)
+         continue;
 
-         lp->dst = fs_reg(VGRF, alloc.allocate(lp->size_written / REG_SIZE), lp->dst.type);
-         lp2->dst = fs_reg(VGRF, alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type);
+      const fs_builder ibld(this, block, lp);
+      fs_inst *lp1 = ibld.LOAD_PAYLOAD(lp->dst, &lp->src[0], mid, lp->header_size);
+      fs_inst *lp2 = ibld.LOAD_PAYLOAD(lp->dst, &lp->src[mid], end - mid, 0);
 
-         send->resize_sources(4);
-         send->src[2] = lp->dst;
-         send->src[3] = lp2->dst;
-         send->ex_mlen = lp2->size_written / REG_SIZE;
-         send->mlen -= send->ex_mlen;
+      assert(lp1->size_written % REG_SIZE == 0);
+      assert(lp2->size_written % REG_SIZE == 0);
+      assert((lp1->size_written + lp2->size_written) / REG_SIZE == send->mlen);
 
-         progress = true;
-      }
+      lp1->dst = fs_reg(VGRF, alloc.allocate(lp1->size_written / REG_SIZE), lp1->dst.type);
+      lp2->dst = fs_reg(VGRF, alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type);
+
+      send->resize_sources(4);
+      send->src[2] = lp1->dst;
+      send->src[3] = lp2->dst;
+      send->ex_mlen = lp2->size_written / REG_SIZE;
+      send->mlen -= send->ex_mlen;
+
+      progress = true;
    }
 
    if (progress)