diff --git a/src/freedreno/fdl/fd6_view.c b/src/freedreno/fdl/fd6_view.c index 4de6c2b3358..e600296bd98 100644 --- a/src/freedreno/fdl/fd6_view.c +++ b/src/freedreno/fdl/fd6_view.c @@ -402,6 +402,8 @@ fdl6_view_init(struct fdl6_view *view, const struct fdl_layout **layouts, tile_mode == TILE6_LINEAR && args->base_miplevel != layout->mip_levels - 1; view->ubwc_enabled = ubwc_enabled; + view->is_mutable = layout->is_mutable; + view->color_swap = color_swap; view->RB_MRT_BUF_INFO = A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | diff --git a/src/freedreno/fdl/freedreno_layout.h b/src/freedreno/fdl/freedreno_layout.h index c745ffa0180..44c17435216 100644 --- a/src/freedreno/fdl/freedreno_layout.h +++ b/src/freedreno/fdl/freedreno_layout.h @@ -297,6 +297,8 @@ struct fdl6_view { bool need_y2_align; bool ubwc_enabled; + bool is_mutable; + uint8_t color_swap; enum pipe_format format; diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 60f4c71334c..7016049ea34 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -1334,7 +1334,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd, uint32_t desc[A6XX_TEX_CONST_DWORDS]; memcpy(desc, iview->view.descriptor, sizeof(desc)); - enum a6xx_format fmt = blit_format_texture(format, TILE6_LINEAR, false, true).fmt; + enum a6xx_format fmt = + blit_format_texture(format, TILE6_2, + iview->view.is_mutable, true).fmt; fixup_src_format(&format, dst_format, &fmt); /* patch the format so that depth/stencil get the right format and swizzle */ @@ -1348,7 +1350,9 @@ r3d_src_gmem(struct tu_cmd_buffer *cmd, A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W); /* patched for gmem */ - desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); + desc[0] &= ~A6XX_TEX_CONST_0_TILE_MODE__MASK; + if (!iview->view.is_mutable) + desc[0] &= ~A6XX_TEX_CONST_0_SWAP__MASK; desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2); desc[2] = A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | @@ -4856,7 +4860,8 @@ template static void store_cp_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - const struct tu_image_view *iview, + const struct tu_image_view *src_iview, + const struct tu_image_view *dst_iview, uint32_t samples, bool separate_stencil, enum pipe_format src_format, @@ -4867,33 +4872,44 @@ store_cp_blit(struct tu_cmd_buffer *cmd, { r2d_setup_common(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, - iview->view.ubwc_enabled, true); + dst_iview->view.ubwc_enabled, true); - if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (dst_iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (!separate_stencil) { - r2d_dst_depth(cs, iview, layer); + r2d_dst_depth(cs, dst_iview, layer); } else { - r2d_dst_stencil(cs, iview, layer); + r2d_dst_stencil(cs, dst_iview, layer); } } else { - r2d_dst(cs, &iview->view, layer, src_format); + r2d_dst(cs, &dst_iview->view, layer, src_format); } - enum a6xx_format fmt = blit_format_texture(src_format, TILE6_2, false, true).fmt; - fixup_src_format(&src_format, dst_format, &fmt); + /* Note: we compute the swap here instead of using the color_swap as + * programmed when we setup the color attachment because the attachment in + * GMEM ignores the swap except when MUTABLEEN is enabled. If the + * color attachment is linear, we need to use the identity swap even if the + * original attachment has a non-identity swap. + */ + struct tu_native_format fmt = + blit_format_texture(src_format, TILE6_2, + src_iview->view.is_mutable, true); + enum a6xx_format format = fmt.fmt; + fixup_src_format(&src_format, dst_format, &format); tu_cs_emit_regs(cs, SP_PS_2D_SRC_INFO(CHIP, - .color_format = fmt, + .color_format = format, .tile_mode = TILE6_2, - .color_swap = WZYX, + .color_swap = fmt.swap, .srgb = util_format_is_srgb(src_format), .samples = tu_msaa_samples(samples), .samples_average = !util_format_is_pure_integer(dst_format) && !util_format_is_depth_or_stencil(dst_format), .unk20 = 1, .unk22 = 1), - SP_PS_2D_SRC_SIZE(CHIP, .width = iview->vk.extent.width, .height = iview->vk.extent.height), + SP_PS_2D_SRC_SIZE(CHIP, + .width = dst_iview->vk.extent.width, + .height = dst_iview->vk.extent.height), SP_PS_2D_SRC(CHIP, .qword = cmd->device->physical_device->gmem_base + gmem_offset), SP_PS_2D_SRC_PITCH(CHIP, .pitch = cmd->state.tiling->tile0.width * cpp)); @@ -4921,7 +4937,8 @@ template static void store_3d_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs, - const struct tu_image_view *iview, + const struct tu_image_view *src_iview, + const struct tu_image_view *dst_iview, VkSampleCountFlagBits dst_samples, bool separate_stencil, enum pipe_format src_format, @@ -4949,21 +4966,21 @@ store_3d_blit(struct tu_cmd_buffer *cmd, } r3d_setup(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, - 0, false, iview->view.ubwc_enabled, dst_samples); + 0, false, dst_iview->view.ubwc_enabled, dst_samples); r3d_coords(cmd, cs, render_area->offset, render_area->offset, render_area->extent); - if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { + if (dst_iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { if (!separate_stencil) { - r3d_dst_depth(cs, iview, layer); + r3d_dst_depth(cs, dst_iview, layer); } else { - r3d_dst_stencil(cs, iview, layer); + r3d_dst_stencil(cs, dst_iview, layer); } } else { - r3d_dst(cs, &iview->view, layer, src_format); + r3d_dst(cs, &dst_iview->view, layer, src_format); } - r3d_src_gmem(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp); + r3d_src_gmem(cmd, cs, src_iview, src_format, dst_format, gmem_offset, cpp); /* sync GMEM writes with CACHE. */ tu_emit_event_write(cmd, cs, FD_CACHE_INVALIDATE); @@ -5033,6 +5050,29 @@ tu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a) (y2 % phys_dev->info->gmem_align_h && need_y2_align)); } +/* The fast path cannot handle the corner case where GMEM and sysmem + * attachments have different swap if the GMEM attachment is mutable, which + * can happen when a mutable color attachment is being resolved into a + * non-mutable resolve attachment. In such a case, if the format is a swapped + * format like BGRA8, the color attachment will be stored in GMEM swapped but + * the resolve attachment in sysmem will not be swapped and there's no way to + * express that in the hardware because it computes the GMEM swap from the + * sysmem swap. + */ +static bool +tu_attachment_store_mismatched_swap(struct tu_cmd_buffer *cmd, uint32_t a, + uint32_t gmem_a) +{ + if (a == gmem_a) + return false; + + const struct tu_image_view *dst_iview = cmd->state.attachments[a]; + const struct tu_image_view *src_iview = cmd->state.attachments[gmem_a]; + + return src_iview->view.is_mutable && + dst_iview->view.color_swap != src_iview->view.color_swap; +} + /* Choose the GMEM layout (use the CCU space or not) based on whether the * current attachments will need. This has to happen at vkBeginRenderPass() * time because tu_attachment_store_unaligned() looks at the image views, which @@ -5062,6 +5102,21 @@ tu_choose_gmem_layout(struct tu_cmd_buffer *cmd) cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU; } + for (unsigned i = 0; i < cmd->state.pass->subpass_count; i++) { + const struct tu_subpass *subpass = &cmd->state.pass->subpasses[i]; + for (unsigned j = 0; j < subpass->resolve_count; j++) { + uint32_t a = subpass->resolve_attachments[j].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + uint32_t gmem_a = + j == subpass->color_count ? + subpass->depth_stencil_attachment.attachment : + subpass->color_attachments[j].attachment; + if (tu_attachment_store_mismatched_swap(cmd, a, gmem_a)) + cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU; + } + } + cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout]; } @@ -5117,8 +5172,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, { const VkRect2D *render_area = &cmd->state.render_area; struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a]; - const struct tu_image_view *iview = cmd->state.attachments[a]; + const struct tu_image_view *dst_iview = cmd->state.attachments[a]; struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a]; + const struct tu_image_view *src_iview = cmd->state.attachments[a]; const VkClearValue *clear_value = &cmd->state.clear_values[gmem_a]; bool resolve = a != gmem_a; if (resolve) @@ -5128,6 +5184,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, return; bool unaligned = tu_attachment_store_unaligned(cmd, a); + bool mismatched_swap = tu_attachment_store_mismatched_swap(cmd, a, gmem_a); /* D32_SFLOAT_S8_UINT is quite special format: it has two planes, * one for depth and other for stencil. When resolving a MSAA @@ -5147,7 +5204,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, bool store_common = dst->store && !resolve_d32s8_s8; bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8; - bool use_fast_path = !unaligned && !resolve_d24s8_s8 && + bool use_fast_path = !unaligned && !mismatched_swap && !resolve_d24s8_s8 && (a == gmem_a || blit_can_resolve(dst->format)); trace_start_gmem_store(&cmd->trace, cs, dst->format, use_fast_path, unaligned); @@ -5163,9 +5220,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, /* use fast path when render area is aligned, except for unsupported resolve cases */ if (use_fast_path) { if (store_common) - tu_emit_blit(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, false); + tu_emit_blit(cmd, cs, resolve_group, dst_iview, src, clear_value, BLIT_EVENT_STORE, false); if (store_separate_stencil) - tu_emit_blit(cmd, cs, resolve_group, iview, src, clear_value, BLIT_EVENT_STORE, true); + tu_emit_blit(cmd, cs, resolve_group, dst_iview, src, clear_value, BLIT_EVENT_STORE, true); if (cond_exec) { tu_end_load_store_cond_exec(cmd, cs, false); @@ -5198,11 +5255,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, for_each_layer(i, layer_mask, layers) { if (store_common) { - store_3d_blit(cmd, cs, iview, dst->samples, false, src_format, + store_3d_blit(cmd, cs, src_iview, dst_iview, dst->samples, false, src_format, dst_format, render_area, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp); } if (store_separate_stencil) { - store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT, + store_3d_blit(cmd, cs, src_iview, dst_iview, dst->samples, true, PIPE_FORMAT_S8_UINT, PIPE_FORMAT_S8_UINT, render_area, i, tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples); } @@ -5236,11 +5293,11 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, state); } if (store_common) { - store_cp_blit(cmd, cs, iview, src->samples, false, src_format, + store_cp_blit(cmd, cs, src_iview, dst_iview, src->samples, false, src_format, dst_format, i, tu_attachment_gmem_offset(cmd, src, i), src->cpp); } if (store_separate_stencil) { - store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT, + store_cp_blit(cmd, cs, src_iview, dst_iview, src->samples, true, PIPE_FORMAT_S8_UINT, PIPE_FORMAT_S8_UINT, i, tu_attachment_gmem_offset_stencil(cmd, src, i), src->samples); } }