diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 36faf7c044a..ceb324304e3 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -1253,10 +1253,11 @@ r3d_src_stencil(struct tu_cmd_buffer *cmd, } static void -r3d_src_gmem_load(struct tu_cmd_buffer *cmd, - struct tu_cs *cs, - const struct tu_image_view *iview, - uint32_t layer) +r3d_src_load(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t layer, + bool override_swap) { uint32_t desc[A6XX_TEX_CONST_DWORDS]; @@ -1281,8 +1282,9 @@ r3d_src_gmem_load(struct tu_cmd_buffer *cmd, * GMEM, so we need to fixup the swizzle and swap. */ desc[0] &= ~(A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK | - A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK | - A6XX_TEX_CONST_0_SWAP__MASK); + A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK); + if (override_swap) + desc[0] &= ~A6XX_TEX_CONST_0_SWAP__MASK; desc[0] |= A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) | A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) | @@ -1294,6 +1296,24 @@ r3d_src_gmem_load(struct tu_cmd_buffer *cmd, VK_FILTER_NEAREST); } +static void +r3d_src_gmem_load(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t layer) +{ + r3d_src_load(cmd, cs, iview, layer, true); +} + +static void +r3d_src_sysmem_load(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_image_view *iview, + uint32_t layer) +{ + r3d_src_load(cmd, cs, iview, layer, false); +} + template static void r3d_src_gmem(struct tu_cmd_buffer *cmd, @@ -3576,6 +3596,11 @@ resolve_sysmem(struct tu_cmd_buffer *cmd, { const struct blit_ops *ops = &r2d_ops; + /* A2D does not support "unresolve". */ + if (dst->image->layout[0].nr_samples > 1) { + ops = &r3d_ops; + } + trace_start_sysmem_resolve(&cmd->rp_trace, cs, cmd, vk_dst_format); enum pipe_format src_format = vk_format_to_pipe_format(vk_src_format); @@ -3595,7 +3620,11 @@ resolve_sysmem(struct tu_cmd_buffer *cmd, ops->src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST); } } else { - ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST, dst_format); + if (ops == &r3d_ops) { + r3d_src_sysmem_load(cmd, cs, src, i); + } else { + ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST, dst_format); + } } if (dst_separate_ds) { @@ -5081,12 +5110,13 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, struct tu_resolve_group *resolve_group, uint32_t a, + uint32_t gmem_a, bool cond_exec_allowed, bool force_load) { const struct tu_image_view *iview = cmd->state.attachments[a]; const struct tu_render_pass_attachment *attachment = - &cmd->state.pass->attachments[a]; + &cmd->state.pass->attachments[gmem_a]; bool load_common = attachment->load || force_load; bool load_stencil = @@ -5110,7 +5140,10 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, tu_begin_load_store_cond_exec(cmd, cs, true); if (TU_DEBUG(3D_LOAD) || - cmd->state.pass->has_fdm) { + cmd->state.pass->has_fdm || + /* Replicating unresolve seems to not work and the blob never uses it. + */ + (a != gmem_a)) { if (load_common || load_stencil) tu_disable_draw_states(cmd, cs); diff --git a/src/freedreno/vulkan/tu_clear_blit.h b/src/freedreno/vulkan/tu_clear_blit.h index c7245010061..7fdd37ef9cc 100644 --- a/src/freedreno/vulkan/tu_clear_blit.h +++ b/src/freedreno/vulkan/tu_clear_blit.h @@ -70,6 +70,7 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, struct tu_resolve_group *resolve_group, uint32_t a, + uint32_t gmem_a, bool cond_exec_allowed, bool force_load); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index c86a097293e..e0499eb645f 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -1521,6 +1521,62 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd, } } +template +static void +tu6_emit_sysmem_unresolve(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + uint32_t layer_mask, + uint32_t a, + uint32_t gmem_a) +{ + const struct tu_framebuffer *fb = cmd->state.framebuffer; + const struct tu_image_view *src = cmd->state.attachments[a]; + const struct tu_image_view *dst = cmd->state.attachments[gmem_a]; + + tu_resolve_sysmem(cmd, cs, src, dst, layer_mask, fb->layers, &cmd->state.render_area); +} + +template +static void +tu6_emit_sysmem_unresolves(struct tu_cmd_buffer *cmd, + struct tu_cs *cs, + const struct tu_subpass *subpass) +{ + if (subpass->unresolve_count) { + /* Similar to above, we need to explicitly flush afterwards to keep this + * in sync with draw commands. However we also don't currently insert + * dependencies when a resolve is followed by an unresolve so we also + * need to manually flush for that case. + */ + tu_emit_event_write(cmd, cs, FD_CCU_CLEAN_COLOR); + tu_emit_event_write(cmd, cs, FD_CACHE_INVALIDATE); + + /* Wait for the flushes to land before using the 2D engine */ + tu_cs_emit_wfi(cs); + + bool unresolve_ds = false; + for (unsigned i = 0; i < subpass->unresolve_count; i++) { + uint32_t a = subpass->unresolve_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + if (vk_format_is_depth_or_stencil(cmd->state.pass->attachments[a].format)) + unresolve_ds = true; + + uint32_t gmem_a = tu_subpass_get_attachment_to_unresolve(subpass, i); + + tu6_emit_sysmem_unresolve(cmd, cs, subpass->multiview_mask, a, gmem_a); + } + + tu_emit_event_write(cmd, cs, FD_CCU_CLEAN_COLOR); + tu_emit_event_write(cmd, cs, FD_CCU_INVALIDATE_COLOR); + if (unresolve_ds) { + tu_emit_event_write(cmd, cs, FD_CCU_CLEAN_DEPTH); + tu_emit_event_write(cmd, cs, FD_CCU_INVALIDATE_DEPTH); + } + tu_cs_emit_wfi(cs); + } +} template static void tu6_emit_gmem_resolves(struct tu_cmd_buffer *cmd, @@ -1552,7 +1608,7 @@ tu6_emit_gmem_resolves(struct tu_cmd_buffer *cmd, "TODO: missing GMEM->GMEM resolve path\n"); if (CHIP >= A7XX) tu_emit_event_write(cmd, cs, FD_CCU_CLEAN_BLIT_CACHE); - tu_load_gmem_attachment(cmd, cs, resolve_group, a, false, true); + tu_load_gmem_attachment(cmd, cs, resolve_group, a, a, false, true); } } } @@ -5607,11 +5663,27 @@ tu_emit_subpass_begin_gmem(struct tu_cmd_buffer *cmd, struct tu_resolve_group *r tu6_emit_blit_scissor(cmd, cs, true, false); emitted_scissor = true; } - tu_load_gmem_attachment(cmd, cs, resolve_group, i, + tu_load_gmem_attachment(cmd, cs, resolve_group, i, i, cond_load_allowed, false); } } + + /* Emit unresolves that replicate single-sampled attachments into + * multisampled GMEM attachments. + */ + for (uint32_t i = 0; i < cmd->state.subpass->unresolve_count; ++i) { + uint32_t a = cmd->state.subpass->unresolve_attachments[i].attachment; + if (a == VK_ATTACHMENT_UNUSED) + continue; + + uint32_t gmem_a = + tu_subpass_get_attachment_to_unresolve(cmd->state.subpass, i); + + tu_load_gmem_attachment(cmd, cs, resolve_group, a, gmem_a, + cond_load_allowed, true); + } + if (!cmd->device->physical_device->info->a7xx.has_generic_clear) { /* Emit gmem clears that are first used in this subpass. */ emitted_scissor = false; @@ -5640,18 +5712,23 @@ template static void tu_emit_subpass_begin_sysmem(struct tu_cmd_buffer *cmd) { - if (cmd->device->physical_device->info->a7xx.has_generic_clear) + if (cmd->device->physical_device->info->a7xx.has_generic_clear && + !cmd->state.subpass->unresolve_count) return; struct tu_cs *cs = &cmd->draw_cs; uint32_t subpass_idx = cmd->state.subpass - cmd->state.pass->subpasses; tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM); + + tu6_emit_sysmem_unresolves(cmd, cs, cmd->state.subpass); + for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i) { struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[i]; if (att->clear_mask && att->first_subpass_idx == subpass_idx) tu_clear_sysmem_attachment(cmd, cs, i); } + tu_cond_exec_end(cs); /* sysmem */ } diff --git a/src/freedreno/vulkan/tu_pass.cc b/src/freedreno/vulkan/tu_pass.cc index 1c77ea64e26..5a350bac73e 100644 --- a/src/freedreno/vulkan/tu_pass.cc +++ b/src/freedreno/vulkan/tu_pass.cc @@ -1491,3 +1491,13 @@ tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t return subpass->color_attachments[index].attachment; } + +uint32_t +tu_subpass_get_attachment_to_unresolve(const struct tu_subpass *subpass, uint32_t index) +{ + if (index == subpass->color_count && + index == (subpass->unresolve_count - 1)) + return subpass->depth_stencil_attachment.attachment; + + return subpass->color_attachments[index].attachment; +} diff --git a/src/freedreno/vulkan/tu_pass.h b/src/freedreno/vulkan/tu_pass.h index 612c5e5402e..956c045add6 100644 --- a/src/freedreno/vulkan/tu_pass.h +++ b/src/freedreno/vulkan/tu_pass.h @@ -48,6 +48,7 @@ struct tu_subpass uint32_t input_count; uint32_t color_count; uint32_t resolve_count; + uint32_t unresolve_count; bool resolve_depth_stencil; bool legacy_dithering_enabled; @@ -64,6 +65,7 @@ struct tu_subpass struct tu_subpass_attachment *input_attachments; struct tu_subpass_attachment *color_attachments; struct tu_subpass_attachment *resolve_attachments; + struct tu_subpass_attachment *unresolve_attachments; struct tu_subpass_attachment depth_stencil_attachment; uint32_t fsr_attachment; @@ -157,4 +159,7 @@ void tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer, uint32_t tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index); +uint32_t +tu_subpass_get_attachment_to_unresolve(const struct tu_subpass *subpass, uint32_t index); + #endif /* TU_PASS_H */