diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
index f7fa6ad1e65..1c1ee70c6b1 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -383,10 +383,6 @@ index("bool", "explicit_coord")
 index("bool", "src_is_reg")
 index("bool", "dst_is_reg")
 
-# For an Intel render target store, whether this signals end-of-thread. Must be
-# the last instruction.
-index("bool", "eot")
-
 # The index of the format string used by a printf. (u_printf_info element of the shader)
 index("unsigned", "fmt_idx")
 # for NV coop matrix - num of matrix in load 1/2/4
@@ -2665,8 +2661,8 @@ intrinsic("load_reloc_const_intel", dest_comp=1, bit_sizes=[32],
           indices=[PARAM_IDX, BASE], flags=[CAN_ELIMINATE, CAN_REORDER])
 
 # Write a render target
-# src[] = { payload, 2x32 descriptor, predicate }
-intrinsic("store_render_target_intel", [-1, 2, 1], indices=[EOT], bit_sizes=[32])
+# src[] = { color, src0_alpha, omask, depth, stencil, predicate }
+intrinsic("store_render_target_intel", [4, 1, 1, 1, 1, 1], indices=[TARGET], bit_sizes=[32, 32, 16, 32, 32, 1])
 
 # Shuffle with an offset in bytes instead of a lane index.
 # src[] = { payload, lane offset in bytes }
diff --git a/src/intel/compiler/jay/jay_from_nir.c b/src/intel/compiler/jay/jay_from_nir.c
index 8f1dabe5196..c88c4b6918b 100644
--- a/src/intel/compiler/jay/jay_from_nir.c
+++ b/src/intel/compiler/jay/jay_from_nir.c
@@ -750,34 +750,88 @@ jay_emit_derivative(jay_builder *b,
            jay_negate(jay_QUAD_SWIZZLE_u32(b, val, swz0)));
 }
 
+static inline jay_def
+optional_src(nir_src nsrc)
+{
+   return nir_src_is_undef(nsrc) ? jay_null() : nj_src(nsrc);
+}
+
+static bool
+scalars_equal(nir_scalar a, nir_scalar b)
+{
+   return nir_scalar_equal(a, b) ||
+          (nir_scalar_is_const(a) &&
+           nir_scalar_is_const(b) &&
+           nir_scalar_as_uint(a) == nir_scalar_as_uint(b));
+}
+
 static void
 jay_emit_fb_write(jay_builder *b, nir_intrinsic_instr *intr)
 {
-   jay_def data = nj_src(intr->src[0]);
-   jay_def srcs[8];
+   const struct intel_device_info *devinfo = b->shader->devinfo;
+   jay_def colour = nj_src(intr->src[0]);
+   jay_def src0_alpha = optional_src(intr->src[1]);
+   jay_def omask = optional_src(intr->src[2]);
+   jay_def depth = optional_src(intr->src[3]);
+   jay_def stencil = optional_src(intr->src[4]);
+   const bool null_rt = ((signed) nir_intrinsic_target(intr)) < 0;
+   const int target = MAX2(((signed) nir_intrinsic_target(intr)), 0);
+   const bool last = !nir_instr_next(&intr->instr);
 
-   /* Optimize unconditional discards. Should probably do this in NIR. */
-   bool trivial =
-      nir_src_is_const(intr->src[2]) && nir_src_as_bool(intr->src[2]);
+   /* If our alpha happens to match src0_alpha, we can skip sending it,
+    * as the hardware will use our alpha in that case.
+    */
+   if (scalars_equal(nir_scalar_resolved(intr->src[1].ssa, 0),
+                     nir_scalar_resolved(intr->src[0].ssa, 3)))
+      src0_alpha = jay_null();
 
-   for (unsigned i = 0; i < nir_src_num_components(intr->src[0]); ++i) {
-      srcs[i] =
-         trivial ? jay_UNDEF_u32(b) : jay_as_gpr(b, jay_extract(data, i));
+   unsigned op = b->shader->dispatch_width == 32 ?
+                    XE2_DATAPORT_RENDER_TARGET_WRITE_SIMD32_SINGLE_SOURCE :
+                    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+   uint64_t desc =
+      brw_fb_write_desc(devinfo, target, op, last, false /* coarse write */);
+
+   uint64_t ex_desc = (target << 21) |
+                      (null_rt ? (1 << 20) : 0) |
+                      (jay_is_null(src0_alpha) ? 0 : (1 << 15)) |
+                      (jay_is_null(stencil) ? 0 : (1 << 14)) |
+                      (jay_is_null(depth) ? 0 : (1 << 13)) |
+                      (jay_is_null(omask) ? 0 : (1 << 12));
+
+   assert((jay_is_null(src0_alpha) || jay_is_null(omask)) &&
+          "TODO: lower alpha test to discards when samplemask is written");
+
+   jay_def srcs[4 + 16 + 4 + 1 + 16];
+
+   unsigned len = 0;
+
+   if (!jay_is_null(src0_alpha))
+      srcs[len++] = jay_as_gpr(b, src0_alpha);
+
+   assert(jay_is_null(omask) && "TODO: samplemask");
+
+   for (unsigned i = 0; i < 4; i++)
+      srcs[len++] = jay_as_gpr(b, jay_extract(colour, i));
+
+   if (!jay_is_null(depth))
+      srcs[len++] = jay_as_gpr(b, depth);
+
+   assert(jay_is_null(stencil) && "TODO: stencil");
+
+   /* Optimize out unconditional discards (probably should do this in NIR) */
+   if (nir_src_is_const(intr->src[5]) && nir_src_as_bool(intr->src[5])) {
+      for (unsigned i = 0; i < len; i++)
+         srcs[i] = jay_UNDEF_u32(b);
    }
 
    jay_inst *send =
       jay_SEND(b, .sfid = BRW_SFID_RENDER_CACHE, .check_tdr = true,
-               .msg_desc = nir_scalar_as_uint(nir_scalar_chase_movs(
-                              nir_get_scalar(intr->src[1].ssa, 0))) |
-                           (nir_scalar_as_uint(nir_scalar_chase_movs(
-                               nir_get_scalar(intr->src[1].ssa, 1)))
-                            << 32),
-               .srcs = srcs, .nr_srcs = nir_src_num_components(intr->src[0]),
-               .type = JAY_TYPE_U32, .eot = nir_intrinsic_eot(intr));
+               .msg_desc = desc | (ex_desc << 32), .srcs = srcs, .nr_srcs = len,
+               .type = JAY_TYPE_U32, .eot = last);
 
    /* Handle the disable predicate. It is logically inverted. */
-   if (!nir_src_is_const(intr->src[2]) || nir_src_as_bool(intr->src[2])) {
-      jay_add_predicate(b, send, jay_negate(nj_src(intr->src[2])));
+   if (!nir_src_is_const(intr->src[5]) || nir_src_as_bool(intr->src[5])) {
+      jay_add_predicate(b, send, jay_negate(nj_src(intr->src[5])));
    }
 }
 
diff --git a/src/intel/compiler/jay/jay_nir.c b/src/intel/compiler/jay/jay_nir.c
index 1241d4f9c5b..dfaffb04a25 100644
--- a/src/intel/compiler/jay/jay_nir.c
+++ b/src/intel/compiler/jay/jay_nir.c
@@ -166,43 +166,19 @@ collect_fragment_output(nir_builder *b, nir_intrinsic_instr *intr, void *ctx_)
    nir_instr_remove(&intr->instr);
    return true;
 }
-
-static void
-append_payload(nir_builder *b,
-               nir_def **payload,
-               unsigned *len,
-               unsigned max_len,
-               nir_def *value)
-{
-   if (value != NULL) {
-      for (unsigned i = 0; i < value->num_components; ++i) {
-         payload[*len] = nir_channel(b, value, i);
-         (*len)++;
-         assert((*len) <= max_len);
-      }
-   }
-}
-
 static void
 insert_rt_store(nir_builder *b,
-                const struct intel_device_info *devinfo,
                 signed target,
-                bool last,
                 nir_def *colour,
-                nir_def *src0_alpha,
+                nir_def *src0_colour,
                 nir_def *depth,
                 nir_def *stencil,
-                nir_def *sample_mask,
-                unsigned dispatch_width)
+                nir_def *sample_mask)
 {
    bool null_rt = target < 0;
    target = MAX2(target, 0);
 
-   if (!colour) {
-      colour = nir_undef(b, 4, 32);
-   }
-
-   colour = nir_pad_vec4(b, colour);
+   colour = nir_pad_vec4(b, colour ?: nir_undef(b, 4, 32));
 
    if (null_rt) {
       /* Even if we don't write a RT, we still need to write alpha for
@@ -212,42 +188,14 @@ insert_rt_store(nir_builder *b,
                                      nir_channel(b, colour, 3), 3);
    }
 
-   /* TODO: Not sure I like this. We'll see what 2src looks like. */
-   unsigned op = dispatch_width == 32 ?
-                    XE2_DATAPORT_RENDER_TARGET_WRITE_SIMD32_SINGLE_SOURCE :
-                    BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
-   uint64_t desc =
-      brw_fb_write_desc(devinfo, target, op, last, false /* coarse write */);
-
-   uint64_t ex_desc = 0;
-   if (devinfo->ver >= 20) {
-      ex_desc = target << 21 |
-                null_rt << 20 |
-                (src0_alpha ? (1 << 15) : 0) |
-                (stencil ? (1 << 14) : 0) |
-                (depth ? (1 << 13) : 0) |
-                (sample_mask ? (1 << 12) : 0);
-   } else if (devinfo->ver >= 11) {
-      /* Set the "Render Target Index" and "Src0 Alpha Present" fields
-       * in the extended message descriptor, in lieu of using a header.
-       */
-      ex_desc = target << 12 | null_rt << 20 | (src0_alpha ? (1 << 15) : 0);
-   }
-
-   /* Build the payload */
-   nir_def *payload[8] = { NULL };
-   unsigned len = 0;
-   append_payload(b, payload, &len, ARRAY_SIZE(payload), colour);
-   append_payload(b, payload, &len, ARRAY_SIZE(payload), depth);
-   /* TODO */
+   nir_def *src0_alpha = nir_channel_or_undef(b, src0_colour ?: colour, 3);
 
    nir_def *disable = b->shader->info.fs.uses_discard ?
                          nir_is_helper_invocation(b, 1) :
                          nir_imm_false(b);
 
-   nir_store_render_target_intel(b, nir_vec(b, payload, len),
-                                 nir_imm_ivec2(b, desc, ex_desc), disable,
-                                 .eot = last);
+   nir_store_render_target_intel(b, colour, src0_alpha, sample_mask, depth,
+                                 stencil, disable, .target = target);
 }
 
 static void
@@ -271,16 +219,18 @@ lower_fragment_outputs(nir_function_impl *impl,
       }
    }
 
+   nir_def *undef = nir_undef(b, 1, 32);
    for (signed i = 0; i < last; ++i) {
       if (ctx.colour[i]) {
-         insert_rt_store(b, devinfo, i, false, ctx.colour[i], NULL, ctx.depth,
-                         ctx.stencil, ctx.sample_mask, dispatch_width);
+         insert_rt_store(b, i, ctx.colour[i], i > 0 ? ctx.colour[0] : NULL,
+                         ctx.depth ?: undef, ctx.stencil ?: undef,
+                         ctx.sample_mask ?: undef);
       }
    }
 
-   insert_rt_store(b, devinfo, last, true, last >= 0 ? ctx.colour[last] : NULL,
-                   NULL, ctx.depth, ctx.stencil, ctx.sample_mask,
-                   dispatch_width);
+   insert_rt_store(b, last, last >= 0 ? ctx.colour[last] : NULL,
+                   last > 0 ? ctx.colour[0] : NULL, ctx.depth ?: undef,
+                   ctx.stencil ?: undef, ctx.sample_mask ?: undef);
 }
 
 unsigned