diff --git a/src/freedreno/fdl/fd6_view.c b/src/freedreno/fdl/fd6_view.c
index 4de6c2b3358..e600296bd98 100644
--- a/src/freedreno/fdl/fd6_view.c
+++ b/src/freedreno/fdl/fd6_view.c
@@ -402,6 +402,8 @@ fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts,
       tile_mode == TILE6_LINEAR && args->base_miplevel != layout->mip_levels - 1;
 
    view->ubwc_enabled = ubwc_enabled;
+   view->is_mutable = layout->is_mutable;
+   view->color_swap = color_swap;
 
    view->RB_MRT_BUF_INFO =
       A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
diff --git a/src/freedreno/fdl/freedreno_layout.h b/src/freedreno/fdl/freedreno_layout.h
index c745ffa0180..44c17435216 100644
--- a/src/freedreno/fdl/freedreno_layout.h
+++ b/src/freedreno/fdl/freedreno_layout.h
@@ -297,6 +297,8 @@ struct fdl6_view {
    bool need_y2_align;
 
    bool ubwc_enabled;
+   bool is_mutable;
+   uint8_t color_swap;
 
    enum pipe_format format;
 
diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc
index 60f4c71334c..7016049ea34 100644
--- a/src/freedreno/vulkan/tu_clear_blit.cc
+++ b/src/freedreno/vulkan/tu_clear_blit.cc
@@ -1334,7 +1334,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
    uint32_t desc[A6XX_TEX_CONST_DWORDS];
    memcpy(desc, iview->view.descriptor, sizeof(desc));
 
-   enum a6xx_format fmt = blit_format_texture<CHIP>(format, TILE6_LINEAR, false, true).fmt;
+   enum a6xx_format fmt =
+      blit_format_texture<CHIP>(format, TILE6_2,
+                                iview->view.is_mutable, true).fmt;
    fixup_src_format(&format, dst_format, &fmt);
 
    /* patch the format so that depth/stencil get the right format and swizzle */
@@ -1348,7 +1350,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd,
                A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
 
    /* patched for gmem */
-   desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
+   desc[0] &= ~A6XX_TEX_CONST_0_TILE_MODE__MASK;
+   if (!iview->view.is_mutable)
+      desc[0] &= ~A6XX_TEX_CONST_0_SWAP__MASK;
    desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
    desc[2] =
       A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
@@ -4856,7 +4860,8 @@ template <chip CHIP>
 static void
 store_cp_blit(struct tu_cmd_buffer *cmd,
               struct tu_cs *cs,
-              const struct tu_image_view *iview,
+              const struct tu_image_view *src_iview,
+              const struct tu_image_view *dst_iview,
               uint32_t samples,
               bool separate_stencil,
               enum pipe_format src_format,
@@ -4867,33 +4872,44 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
 {
    r2d_setup_common<CHIP>(cmd, cs, src_format, dst_format,
                           VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
-                          iview->view.ubwc_enabled, true);
+                          dst_iview->view.ubwc_enabled, true);
 
-   if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+   if (dst_iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
       if (!separate_stencil) {
-         r2d_dst_depth(cs, iview, layer);
+         r2d_dst_depth(cs, dst_iview, layer);
       } else {
-         r2d_dst_stencil(cs, iview, layer);
+         r2d_dst_stencil(cs, dst_iview, layer);
       }
    } else {
-      r2d_dst<CHIP>(cs, &iview->view, layer, src_format);
+      r2d_dst<CHIP>(cs, &dst_iview->view, layer, src_format);
    }
 
-   enum a6xx_format fmt = blit_format_texture<CHIP>(src_format, TILE6_2, false, true).fmt;
-   fixup_src_format(&src_format, dst_format, &fmt);
+   /* Note: we compute the swap here instead of using the color_swap as
+    * programmed when we setup the color attachment because the attachment in
+    * GMEM ignores the swap except when MUTABLEEN is enabled. If the
+    * color attachment is linear, we need to use the identity swap even if the
+    * original attachment has a non-identity swap.
+    */
+   struct tu_native_format fmt =
+      blit_format_texture<CHIP>(src_format, TILE6_2,
+                                src_iview->view.is_mutable, true);
+   enum a6xx_format format = fmt.fmt;
+   fixup_src_format(&src_format, dst_format, &format);
 
    tu_cs_emit_regs(cs,
                    SP_PS_2D_SRC_INFO(CHIP,
-                      .color_format = fmt,
+                      .color_format = format,
                       .tile_mode = TILE6_2,
-                      .color_swap = WZYX,
+                      .color_swap = fmt.swap,
                       .srgb = util_format_is_srgb(src_format),
                       .samples = tu_msaa_samples(samples),
                       .samples_average = !util_format_is_pure_integer(dst_format) &&
                                          !util_format_is_depth_or_stencil(dst_format),
                       .unk20 = 1,
                       .unk22 = 1),
-                   SP_PS_2D_SRC_SIZE(CHIP, .width = iview->vk.extent.width, .height = iview->vk.extent.height),
+                   SP_PS_2D_SRC_SIZE(CHIP,
+                      .width = dst_iview->vk.extent.width,
+                      .height = dst_iview->vk.extent.height),
                    SP_PS_2D_SRC(CHIP, .qword = cmd->device->physical_device->gmem_base + gmem_offset),
                    SP_PS_2D_SRC_PITCH(CHIP, .pitch = cmd->state.tiling->tile0.width * cpp));
 
@@ -4921,7 +4937,8 @@ template <chip CHIP>
 static void
 store_3d_blit(struct tu_cmd_buffer *cmd,
               struct tu_cs *cs,
-              const struct tu_image_view *iview,
+              const struct tu_image_view *src_iview,
+              const struct tu_image_view *dst_iview,
               VkSampleCountFlagBits dst_samples,
               bool separate_stencil,
               enum pipe_format src_format,
@@ -4949,21 +4966,21 @@ store_3d_blit(struct tu_cmd_buffer *cmd,
    }
 
    r3d_setup<CHIP>(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT,
-                   0, false, iview->view.ubwc_enabled, dst_samples);
+                   0, false, dst_iview->view.ubwc_enabled, dst_samples);
 
    r3d_coords(cmd, cs, render_area->offset, render_area->offset, render_area->extent);
 
-   if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
+   if (dst_iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
       if (!separate_stencil) {
-         r3d_dst_depth<CHIP>(cs, iview, layer);
+         r3d_dst_depth<CHIP>(cs, dst_iview, layer);
       } else {
-         r3d_dst_stencil<CHIP>(cs, iview, layer);
+         r3d_dst_stencil<CHIP>(cs, dst_iview, layer);
       }
    } else {
-      r3d_dst<CHIP>(cs, &iview->view, layer, src_format);
+      r3d_dst<CHIP>(cs, &dst_iview->view, layer, src_format);
    }
 
-   r3d_src_gmem<CHIP>(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp);
+   r3d_src_gmem<CHIP>(cmd, cs, src_iview, src_format, dst_format, gmem_offset, cpp);
 
    /* sync GMEM writes with CACHE. */
    tu_emit_event_write<CHIP>(cmd, cs, FD_CACHE_INVALIDATE);
@@ -5033,6 +5050,29 @@ tu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a)
            (y2 % phys_dev->info->gmem_align_h && need_y2_align));
 }
 
+/* The fast path cannot handle the corner case where GMEM and sysmem
+ * attachments have different swap if the GMEM attachment is mutable, which
+ * can happen when a mutable color attachment is being resolved into a
+ * non-mutable resolve attachment. In such a case, if the format is a swapped
+ * format like BGRA8, the color attachment will be stored in GMEM swapped but
+ * the resolve attachment in sysmem will not be swapped and there's no way to
+ * express that in the hardware because it computes the GMEM swap from the
+ * sysmem swap.
+ */
+static bool
+tu_attachment_store_mismatched_swap(struct tu_cmd_buffer *cmd, uint32_t a,
+                                    uint32_t gmem_a)
+{
+   if (a == gmem_a)
+      return false;
+
+   const struct tu_image_view *dst_iview = cmd->state.attachments[a];
+   const struct tu_image_view *src_iview = cmd->state.attachments[gmem_a];
+
+   return src_iview->view.is_mutable &&
+      dst_iview->view.color_swap != src_iview->view.color_swap;
+}
+
 /* Choose the GMEM layout (use the CCU space or not) based on whether the
  * current attachments will need.  This has to happen at vkBeginRenderPass()
  * time because tu_attachment_store_unaligned() looks at the image views, which
@@ -5062,6 +5102,21 @@ tu_choose_gmem_layout(struct tu_cmd_buffer *cmd)
          cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
    }
 
+   for (unsigned i = 0; i < cmd->state.pass->subpass_count; i++) {
+      const struct tu_subpass *subpass = &cmd->state.pass->subpasses[i];
+      for (unsigned j = 0; j < subpass->resolve_count; j++) {
+         uint32_t a = subpass->resolve_attachments[j].attachment;
+         if (a == VK_ATTACHMENT_UNUSED)
+            continue;
+         uint32_t gmem_a =
+            j == subpass->color_count ?
+               subpass->depth_stencil_attachment.attachment :
+               subpass->color_attachments[j].attachment;
+         if (tu_attachment_store_mismatched_swap(cmd, a, gmem_a))
+            cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
+      }
+   }
+
    cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
 }
 
@@ -5117,8 +5172,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
 {
    const VkRect2D *render_area = &cmd->state.render_area;
    struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
-   const struct tu_image_view *iview = cmd->state.attachments[a];
+   const struct tu_image_view *dst_iview = cmd->state.attachments[a];
    struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
+   const struct tu_image_view *src_iview = cmd->state.attachments[a];
    const VkClearValue *clear_value = &cmd->state.clear_values[gmem_a];
    bool resolve = a != gmem_a;
    if (resolve)
@@ -5128,6 +5184,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
       return;
 
    bool unaligned = tu_attachment_store_unaligned(cmd, a);
+   bool mismatched_swap = tu_attachment_store_mismatched_swap(cmd, a, gmem_a);
 
    /* D32_SFLOAT_S8_UINT is quite special format: it has two planes,
     * one for depth and other for stencil. When resolving a MSAA
@@ -5147,7 +5204,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
    bool store_common = dst->store && !resolve_d32s8_s8;
    bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8;
 
-   bool use_fast_path = !unaligned && !resolve_d24s8_s8 &&
+   bool use_fast_path = !unaligned && !mismatched_swap && !resolve_d24s8_s8 &&
                         (a == gmem_a || blit_can_resolve(dst->format));
 
    trace_start_gmem_store(&cmd->trace, cs, dst->format, use_fast_path, unaligned);
@@ -5163,9 +5220,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
    /* use fast path when render area is aligned, except for unsupported resolve cases */
    if (use_fast_path) {
       if (store_common)
-         tu_emit_blit<CHIP>(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, false);
+         tu_emit_blit<CHIP>(cmd, cs, resolve_group, dst_iview, src, clear_value, BLIT_EVENT_STORE, false);
       if (store_separate_stencil)
-         tu_emit_blit<CHIP>(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, true);
+         tu_emit_blit<CHIP>(cmd, cs, resolve_group, dst_iview, src, clear_value, BLIT_EVENT_STORE, true);
 
       if (cond_exec) {
          tu_end_load_store_cond_exec(cmd, cs, false);
@@ -5198,11 +5255,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
 
       for_each_layer(i, layer_mask, layers) {
          if (store_common) {
-            store_3d_blit<CHIP>(cmd, cs, iview, dst->samples, false, src_format,
+            store_3d_blit<CHIP>(cmd, cs, src_iview, dst_iview, dst->samples, false, src_format,
                           dst_format, render_area, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);
          }
          if (store_separate_stencil) {
-            store_3d_blit<CHIP>(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
+            store_3d_blit<CHIP>(cmd, cs, src_iview, dst_iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
                           PIPE_FORMAT_S8_UINT, render_area, i,
                           tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples);
          }
@@ -5236,11 +5293,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
                                          state);
          }
          if (store_common) {
-            store_cp_blit<CHIP>(cmd, cs, iview, src->samples, false, src_format,
+            store_cp_blit<CHIP>(cmd, cs, src_iview, dst_iview, src->samples, false, src_format,
                           dst_format, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp);
          }
          if (store_separate_stencil) {
-            store_cp_blit<CHIP>(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT,
+            store_cp_blit<CHIP>(cmd, cs, src_iview, dst_iview, src->samples, true, PIPE_FORMAT_S8_UINT,
                           PIPE_FORMAT_S8_UINT, i, tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples);
          }
       }