diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 19885438115..beaeed46907 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2888,6 +2888,98 @@ fs_visitor::opt_zero_samples()
    return progress;
 }
 
+/**
+ * Opportunistically split SEND message payloads.
+ *
+ * Gfx9+ supports "split" SEND messages, which take two payloads that are
+ * implicitly concatenated.  If we find a SEND message with a single payload,
+ * we can split that payload in two.  This results in smaller contiguous
+ * register blocks for us to allocate.  But it can help beyond that, too.
+ *
+ * We try and split a LOAD_PAYLOAD between sources which change registers.
+ * For example, a sampler message often contains a x/y/z coordinate that may
+ * already be in a contiguous VGRF, combined with an LOD, shadow comparitor,
+ * or array index, which comes from elsewhere.  In this case, the first few
+ * sources will be different offsets of the same VGRF, then a later source
+ * will be a different VGRF.  So we split there, possibly eliminating the
+ * payload concatenation altogether.
+ */
+bool
+fs_visitor::opt_split_sends()
+{
+   if (devinfo->ver < 9)
+      return false;
+
+   bool progress = false;
+
+   const fs_live_variables &live = live_analysis.require();
+
+   int next_ip = 0;
+
+   foreach_block_and_inst_safe(block, fs_inst, send, cfg) {
+      int ip = next_ip;
+      next_ip++;
+
+      if (send->opcode != SHADER_OPCODE_SEND ||
+          send->mlen == 1 || send->ex_mlen > 0)
+         continue;
+
+      /* Don't split payloads which are also read later. */
+      assert(send->src[2].file == VGRF);
+      if (live.vgrf_end[send->src[2].nr] > ip)
+         continue;
+
+      fs_inst *lp = (fs_inst *) send->prev;
+
+      if (lp->is_head_sentinel() || lp->opcode != SHADER_OPCODE_LOAD_PAYLOAD)
+         continue;
+
+      if (lp->dst.file != send->src[2].file || lp->dst.nr != send->src[2].nr)
+         continue;
+
+      /* Split either after the header (if present), or when consecutive
+       * sources switch from one VGRF to a different one.
+       */
+      unsigned i = lp->header_size;
+      if (lp->header_size == 0) {
+         for (i = 1; i < lp->sources; i++) {
+            if (lp->src[i].file == BAD_FILE)
+               continue;
+
+            if (lp->src[0].file != lp->src[i].file ||
+                lp->src[0].nr != lp->src[i].nr)
+               break;
+         }
+      }
+
+      if (i != lp->sources) {
+         const fs_builder ibld(this, block, lp);
+         fs_inst *lp2 =
+            ibld.LOAD_PAYLOAD(lp->dst, &lp->src[i], lp->sources - i, 0);
+
+         lp->resize_sources(i);
+         lp->size_written -= lp2->size_written;
+
+         lp->dst = fs_reg(VGRF, alloc.allocate(lp->size_written / REG_SIZE), lp->dst.type);
+         lp2->dst = fs_reg(VGRF, alloc.allocate(lp2->size_written / REG_SIZE), lp2->dst.type);
+
+         send->resize_sources(4);
+         send->src[2] = lp->dst;
+         send->src[3] = lp2->dst;
+         send->ex_mlen = lp2->size_written / REG_SIZE;
+         send->mlen -= send->ex_mlen;
+
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
+
+   return progress;
+}
+
+
 bool
 fs_visitor::opt_register_renaming()
 {
@@ -8583,6 +8675,8 @@ fs_visitor::optimize()
    OPT(lower_logical_sends);
 
    /* After logical SEND lowering. */
+   OPT(opt_copy_propagation);
+   OPT(opt_split_sends);
    OPT(fixup_nomask_control_flow);
 
    if (progress) {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 78b2a7b1922..dc8d00a0a00 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -172,6 +172,7 @@ public:
    bool opt_drop_redundant_mov_to_flags();
    bool opt_register_renaming();
    bool opt_bank_conflicts();
+   bool opt_split_sends();
    bool register_coalesce();
    bool compute_to_mrf();
    bool eliminate_find_live_channel();