diff --git a/src/intel/compiler/brw_compile_fs.cpp b/src/intel/compiler/brw_compile_fs.cpp
index cd28b88f16b..ce9128d114c 100644
--- a/src/intel/compiler/brw_compile_fs.cpp
+++ b/src/intel/compiler/brw_compile_fs.cpp
@@ -1518,6 +1518,9 @@ brw_compile_fs(const struct brw_compiler *compiler,
    brw_nir_lower_fs_inputs(nir, devinfo, key);
    brw_nir_lower_fs_outputs(nir);
 
+   if (!key->coherent_fb_fetch)
+      NIR_PASS(_, nir, brw_nir_lower_fs_load_output, key);
+
    /* From the SKL PRM, Volume 7, "Alpha Coverage":
     *  "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
     *   hardware, regardless of the state setting for this feature."
diff --git a/src/intel/compiler/brw_from_nir.cpp b/src/intel/compiler/brw_from_nir.cpp
index 9a4bfe87c2f..8a733e062e5 100644
--- a/src/intel/compiler/brw_from_nir.cpp
+++ b/src/intel/compiler/brw_from_nir.cpp
@@ -3647,80 +3647,6 @@ emit_mcs_fetch(nir_to_brw_state &ntb, const brw_reg &coordinate, unsigned compon
    return dest;
 }
 
-/**
- * Fake non-coherent framebuffer read implemented using TXF to fetch from the
- * framebuffer at the current fragment coordinates and sample index.
- */
-static brw_inst *
-emit_non_coherent_fb_read(nir_to_brw_state &ntb, const brw_builder &bld, const brw_reg &dst,
-                          unsigned target)
-{
-   brw_shader &s = ntb.s;
-   const struct intel_device_info *devinfo = s.devinfo;
-
-   assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
-   const brw_wm_prog_key *wm_key =
-      reinterpret_cast<const brw_wm_prog_key *>(s.key);
-   assert(!wm_key->coherent_fb_fetch);
-
-   /* Calculate the fragment coordinates. */
-   const brw_reg coords = bld.vgrf(BRW_TYPE_UD, 3);
-   bld.MOV(offset(coords, bld, 0), s.pixel_x);
-   bld.MOV(offset(coords, bld, 1), s.pixel_y);
-   bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
-
-   /* Calculate the sample index and MCS payload when multisampling.  Luckily
-    * the MCS fetch message behaves deterministically for UMS surfaces, so it
-    * shouldn't be necessary to recompile based on whether the framebuffer is
-    * CMS or UMS.
-    */
-   assert(wm_key->multisample_fbo == INTEL_ALWAYS ||
-          wm_key->multisample_fbo == INTEL_NEVER);
-   if (wm_key->multisample_fbo &&
-       ntb.system_values[SYSTEM_VALUE_SAMPLE_ID].file == BAD_FILE)
-      ntb.system_values[SYSTEM_VALUE_SAMPLE_ID] = emit_sampleid_setup(ntb);
-
-   const brw_reg sample = ntb.system_values[SYSTEM_VALUE_SAMPLE_ID];
-   const brw_reg mcs = wm_key->multisample_fbo ?
-      emit_mcs_fetch(ntb, coords, 3, brw_imm_ud(target), brw_reg()) : brw_reg();
-
-   /* Use either a normal or a CMS texel fetch message depending on whether
-    * the framebuffer is single or multisample.  On SKL+ use the wide CMS
-    * message just in case the framebuffer uses 16x multisampling, it should
-    * be equivalent to the normal CMS fetch for lower multisampling modes.
-    */
-   opcode op;
-   if (wm_key->multisample_fbo) {
-      /* On SKL+ use the wide CMS message just in case the framebuffer uses 16x
-       * multisampling, it should be equivalent to the normal CMS fetch for
-       * lower multisampling modes.
-       *
-       * On Gfx12HP, there is only CMS_W variant available.
-       */
-      if (devinfo->verx10 >= 125)
-         op = SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL;
-      else
-         op = SHADER_OPCODE_TXF_CMS_W_LOGICAL;
-   } else {
-      op = SHADER_OPCODE_TXF_LOGICAL;
-   }
-
-   /* Emit the instruction. */
-   brw_reg srcs[TEX_LOGICAL_NUM_SRCS];
-   srcs[TEX_LOGICAL_SRC_COORDINATE]       = coords;
-   srcs[TEX_LOGICAL_SRC_LOD]              = brw_imm_ud(0);
-   srcs[TEX_LOGICAL_SRC_SAMPLE_INDEX]     = sample;
-   srcs[TEX_LOGICAL_SRC_MCS]              = mcs;
-   srcs[TEX_LOGICAL_SRC_SURFACE]          = brw_imm_ud(target);
-   srcs[TEX_LOGICAL_SRC_SAMPLER]          = brw_imm_ud(0);
-
-   brw_tex_inst *tex = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs))->as_tex();
-   tex->size_written = 4 * tex->dst.component_size(tex->exec_size);
-   tex->coord_components = 3;
-
-   return tex;
-}
-
 /**
  * Actual coherent framebuffer read implemented using the native render target
  * read message.  Requires SKL+.
@@ -4260,10 +4186,8 @@ brw_from_nir_emit_fs_intrinsic(nir_to_brw_state &ntb,
       const unsigned target = l - FRAG_RESULT_DATA0 + load_offset;
       const brw_reg tmp = bld.vgrf(dest.type, 4);
 
-      if (reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch)
-         emit_coherent_fb_read(bld, tmp, target);
-      else
-         emit_non_coherent_fb_read(ntb, bld, tmp, target);
+      assert(reinterpret_cast<const brw_wm_prog_key *>(s.key)->coherent_fb_fetch);
+      emit_coherent_fb_read(bld, tmp, target);
 
       brw_combine_with_vec(bld, dest,
                            offset(tmp, bld, nir_intrinsic_component(instr)),
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index aaff0f97256..fee89cca3fb 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -192,6 +192,8 @@ void brw_nir_lower_tcs_outputs(nir_shader *nir,
                                const struct intel_vue_map *vue,
                                enum tess_primitive_mode tes_primitive_mode);
 void brw_nir_lower_fs_outputs(nir_shader *nir);
+bool brw_nir_lower_fs_load_output(nir_shader *shader,
+                                  const struct brw_wm_prog_key *key);
 
 bool brw_nir_lower_cmat(nir_shader *nir, unsigned subgroup_size);
 
diff --git a/src/intel/compiler/brw_nir_lower_fs_load_output.c b/src/intel/compiler/brw_nir_lower_fs_load_output.c
new file mode 100644
index 00000000000..8c79f55fb91
--- /dev/null
+++ b/src/intel/compiler/brw_nir_lower_fs_load_output.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright © 2025 Intel Corporation
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "brw_nir.h"
+#include "compiler/nir/nir_builder.h"
+
+/**
+ * Lower fragment shader output reads into sampler operations.
+ */
+
+static bool
+brw_nir_lower_fs_load_output_instr(nir_builder *b,
+                                   nir_intrinsic_instr *intrin,
+                                   void *data)
+{
+   if (intrin->intrinsic != nir_intrinsic_load_output)
+      return false;
+
+   const struct brw_wm_prog_key *key = data;
+
+   /* Only used by Iris that never sets this to SOMETIMES */
+   assert(key->multisample_fbo != INTEL_SOMETIMES);
+
+   b->cursor = nir_before_instr(&intrin->instr);
+
+   nir_def *coords[3] = {
+      nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 0)),
+      nir_f2u32(b, nir_channel(b, nir_load_frag_coord(b), 1)),
+      nir_load_layer_id(b),
+   };
+   nir_def *coord = nir_vec(b, coords, 3);
+
+   nir_def *tex =
+      key->multisample_fbo == INTEL_NEVER ?
+      nir_build_tex(b, nir_texop_txf, coord,
+                    .texture_index = nir_intrinsic_base(intrin),
+                    .dim = GLSL_SAMPLER_DIM_2D,
+                    .is_array = true,
+                    .dest_type = nir_type_uint32) :
+      nir_build_tex(b, nir_texop_txf_ms, coord,
+                    .texture_index = nir_intrinsic_base(intrin),
+                    .ms_index = nir_load_sample_id(b),
+                    .dim = GLSL_SAMPLER_DIM_MS,
+                    .dest_type = nir_type_uint32);
+
+   nir_def_replace(&intrin->def, tex);
+
+   return true;
+}
+
+bool
+brw_nir_lower_fs_load_output(nir_shader *shader,
+                             const struct brw_wm_prog_key *key)
+{
+   return nir_shader_intrinsics_pass(shader,
+                                     brw_nir_lower_fs_load_output_instr,
+                                     nir_metadata_control_flow,
+                                     (void *) key);
+}
diff --git a/src/intel/compiler/meson.build b/src/intel/compiler/meson.build
index aae7dc30a42..2fff9740f6e 100644
--- a/src/intel/compiler/meson.build
+++ b/src/intel/compiler/meson.build
@@ -73,6 +73,7 @@ libintel_compiler_brw_files = files(
   'brw_nir_lower_cs_intrinsics.c',
   'brw_nir_lower_alpha_to_coverage.c',
   'brw_nir_lower_fs_barycentrics.c',
+  'brw_nir_lower_fs_load_output.c',
   'brw_nir_lower_immediate_offsets.c',
   'brw_nir_lower_intersection_shader.c',
   'brw_nir_lower_ray_queries.c',