diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp
index ec8e116cb38..59e047483c0 100644
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -548,6 +548,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
    int first_mrf_hack_node = node_count;
    if (devinfo->gen >= 7)
       node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START;
+   int grf127_send_hack_node = node_count;
+   if (devinfo->gen >= 8 && dispatch_width == 8)
+      node_count ++;
    struct ra_graph *g =
       ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count);
 
@@ -653,6 +656,28 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
       }
    }
 
+   if (devinfo->gen >= 8 && dispatch_width == 8) {
+      /* At Intel Broadwell PRM, vol 07, section "Instruction Set Reference",
+       * subsection "EUISA Instructions", Send Message (page 990):
+       *
+       * "r127 must not be used for return address when there is a src and
+       * dest overlap in send instruction."
+       *
+       * We are avoiding using grf127 as part of the destination of send
+       * messages adding a node interference to the grf127_send_hack_node.
+       * This node has a fixed asignment to grf127.
+       *
+       * We don't apply it to SIMD16 because previous code avoids any register
+       * overlap between sources and destination.
+       */
+      ra_set_node_reg(g, grf127_send_hack_node, 127);
+      foreach_block_and_inst(block, fs_inst, inst, cfg) {
+         if (inst->is_send_from_grf() && inst->dst.file == VGRF) {
+            ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
+         }
+      }
+   }
+
    /* Debug of register spilling: Go spill everything. */
    if (unlikely(spill_all)) {
       int reg = choose_spill_reg(g);