From 4e5979b5a205a7d2a81131971db978832ca25f66 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 25 Jan 2024 14:35:52 -0800 Subject: [PATCH] anv/sparse: flush the tile cache when resolving sparse images Consider the following program: - Uses a multi-sampled image as the color attachment. - Draws simple geometry at the color attachment. - Uses the (non-multi-sampled) swapchain image as the resolve image. - Presents the result. If the color attachment image (the multi-sampled one) is a sparse image and it's fully bound, everything works and this patch is not required. If the image is partially bound (or just completely unbound), without this patch the unbound area of the image that ends up being displayed on the screen is not completely black, and it should be completely black due to the fact that we claim to support residencyNonResidentStrict (which is required by vkd3d for DX12). On DG2, what ends up being displayed in the swapchain image is actually the whole image as if it was completely bound. On TGL the unbound area partially displays the geometry that was supposed to be drawn, but the background is a different color: it's a weird corrupted image. On both platforms the unbound areas should all be fully black. This patch applies the proper flushing so that we get the results we should have. The bug fixed by this patch is not caught by dEQP or anything our CI runs (dEQP does have some checks for residencynonResidentStrict correctness, but none that catch this issue in particular). I was able to catch this with my own sample program. Using INTEL_DEBUG=stall also makes the problem go away. If we had a way to track which images are fully bound we would be able to avoid this flush. I had code for that in the earliest versions of sparse before xe.ko had support for gpuva, but it requires maintaining a bunch of lists, so I'm not sure that's actually worth it. Reviewed-by: Lionel Landwerlin Signed-off-by: Paulo Zanoni Part-of: --- src/intel/vulkan/genX_cmd_buffer.c | 37 +++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 390a8ac2bde..90817923b68 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -5353,14 +5353,18 @@ void genX(CmdEndRendering)( is_multiview ? util_last_bit(gfx->view_mask) : gfx->layer_count; bool has_color_resolve = false; + bool has_sparse_color_resolve = false; for (uint32_t i = 0; i < gfx->color_att_count; i++) { cmd_buffer_mark_attachment_written(cmd_buffer, &gfx->color_att[i], VK_IMAGE_ASPECT_COLOR_BIT); /* Stash this off for later */ if (gfx->color_att[i].resolve_mode != VK_RESOLVE_MODE_NONE && - !(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT)) + !(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT)) { has_color_resolve = true; + if (anv_image_is_sparse(gfx->color_att[i].iview->image)) + has_sparse_color_resolve = true; + } } cmd_buffer_mark_attachment_written(cmd_buffer, &gfx->depth_att, @@ -5380,9 +5384,19 @@ void genX(CmdEndRendering)( "MSAA resolve"); } - if (!(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT) && - (gfx->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE || - gfx->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE)) { + const bool has_depth_resolve = + gfx->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE && + !(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT); + const bool has_stencil_resolve = + gfx->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE && + !(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT); + const bool has_sparse_depth_resolve = + has_depth_resolve && anv_image_is_sparse(gfx->depth_att.iview->image); + const bool has_sparse_stencil_resolve = + has_stencil_resolve && + anv_image_is_sparse(gfx->stencil_att.iview->image); + + if (has_depth_resolve || has_stencil_resolve) { /* We are about to do some MSAA resolves. We need to flush so that the * result of writes to the MSAA depth attachments show up in the sampler * when we blit to the single-sampled resolve target. @@ -5393,6 +5407,15 @@ void genX(CmdEndRendering)( "MSAA resolve"); } + if (has_sparse_color_resolve || has_sparse_depth_resolve || + has_sparse_stencil_resolve) { + /* If the resolve image is sparse we need some extra bits to make sure + * unbound regions read 0, as residencyNonResidentStrict mandates. + */ + anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_TILE_CACHE_FLUSH_BIT, + "sparse MSAA resolve"); + } + for (uint32_t i = 0; i < gfx->color_att_count; i++) { const struct anv_attachment *att = &gfx->color_att[i]; if (att->resolve_mode == VK_RESOLVE_MODE_NONE || @@ -5403,8 +5426,7 @@ void genX(CmdEndRendering)( VK_IMAGE_ASPECT_COLOR_BIT); } - if (gfx->depth_att.resolve_mode != VK_RESOLVE_MODE_NONE && - !(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT)) { + if (has_depth_resolve) { const struct anv_image_view *src_iview = gfx->depth_att.iview; /* MSAA resolves sample from the source attachment. Transition the @@ -5434,8 +5456,7 @@ void genX(CmdEndRendering)( false /* will_full_fast_clear */); } - if (gfx->stencil_att.resolve_mode != VK_RESOLVE_MODE_NONE && - !(gfx->rendering_flags & VK_RENDERING_SUSPENDING_BIT)) { + if (has_stencil_resolve) { anv_attachment_msaa_resolve(cmd_buffer, &gfx->stencil_att, gfx->stencil_att.layout, VK_IMAGE_ASPECT_STENCIL_BIT);