From 3af0f0129cfcd3cf0d16542dc4155a24e8afcd87 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 11 Jul 2024 15:49:03 +0200 Subject: [PATCH] radv: fix DRLR with subpass input attachments and feedback loops Dynamic rendering local read allows the application to use subpass input attachments with feedback loops. But unless legacy RPs where it's possible to determine feedback look at creation time, with dynamic rendering it's not possible. To fix that, the driver needs to determine at draw time if a feedback loop is present, and it needs to decompress DCC/HTILE if necessary. See https://gitlab.khronos.org/vulkan/vulkan/-/issues/3928 for more information. Note that VKCTS is still missing coverage but this has been reported. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127 Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/vulkan/radv_cmd_buffer.c | 151 +++++++++++++++++- src/amd/vulkan/radv_cmd_buffer.h | 5 +- .../zink/ci/zink-radv-navi31-fails.txt | 3 - .../zink/ci/zink-radv-vangogh-fails.txt | 17 -- 4 files changed, 147 insertions(+), 29 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 087c6afbd9e..836542be75c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -261,6 +261,10 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy if (pdev->info.rbplus_allowed && (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; } + + if (dest_mask & (RADV_DYNAMIC_COLOR_ATTACHMENT_MAP | RADV_DYNAMIC_INPUT_ATTACHMENT_MAP)) { + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; + } } bool @@ -8097,6 +8101,11 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ previous_ps->info.ps.pops_is_per_sample != ps->info.ps.pops_is_per_sample) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DB_SHADER_CONTROL; + if (!previous_ps || cmd_buffer->state.uses_fbfetch_output != ps->info.ps.uses_fbfetch_output) { + cmd_buffer->state.uses_fbfetch_output = ps->info.ps.uses_fbfetch_output; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; + } + /* Re-emit the PS epilog when a new fragment shader is bound. */ if (ps->info.has_epilog) cmd_buffer->state.emitted_ps_epilog = NULL; @@ -9169,6 +9178,7 @@ radv_CmdSetRenderingAttachmentLocationsKHR(VkCommandBuffer commandBuffer, } state->dirty_dynamic |= RADV_DYNAMIC_COLOR_ATTACHMENT_MAP; + state->dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; } VKAPI_ATTR void VKAPI_CALL @@ -9203,6 +9213,120 @@ radv_CmdSetRenderingInputAttachmentIndicesKHR(VkCommandBuffer commandBuffer, : *pLocationInfo->pStencilInputAttachmentIndex; state->dirty_dynamic |= RADV_DYNAMIC_INPUT_ATTACHMENT_MAP; + state->dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; +} + +static void +radv_handle_color_fbfetch_output(struct radv_cmd_buffer *cmd_buffer, uint32_t index) +{ + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_rendering_state *render = &cmd_buffer->state.render; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_attachment *att = &render->color_att[index]; + + if (!att->iview) + return; + + const struct radv_image *image = att->iview->image; + if (!(image->vk.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + return; + + if (!radv_layout_dcc_compressed(device, image, att->iview->vk.base_mip_level, att->layout, + radv_image_queue_family_mask(att->iview->image, cmd_buffer->qf, cmd_buffer->qf))) + return; + + const uint32_t color_att_idx = d->vk.cal.color_map[index]; + if (color_att_idx == MESA_VK_ATTACHMENT_UNUSED) + return; + + if (d->vk.ial.color_map[color_att_idx] != color_att_idx) + return; + + const VkImageSubresourceRange range = { + .aspectMask = att->iview->vk.aspects, + .baseMipLevel = att->iview->vk.base_mip_level, + .levelCount = att->iview->vk.level_count, + .baseArrayLayer = att->iview->vk.base_array_layer, + .layerCount = att->iview->vk.layer_count, + }; + + /* Consider previous rendering work for WAW hazards. */ + cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, att->iview->image); + + /* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress DCC. */ + radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout, + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, RADV_QUEUE_GENERAL, + RADV_QUEUE_GENERAL, &range, NULL); + + att->layout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; +} + +static void +radv_handle_depth_fbfetch_output(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_rendering_state *render = &cmd_buffer->state.render; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_attachment *att = &render->ds_att; + + if (!att->iview) + return; + + const struct radv_image *image = att->iview->image; + if (!(image->vk.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + return; + + if (!radv_layout_is_htile_compressed( + device, att->iview->image, att->layout, + radv_image_queue_family_mask(att->iview->image, cmd_buffer->qf, cmd_buffer->qf))) + return; + + if (d->vk.ial.depth_att == MESA_VK_ATTACHMENT_UNUSED && d->vk.ial.stencil_att == MESA_VK_ATTACHMENT_UNUSED) + return; + + const VkImageSubresourceRange range = { + .aspectMask = att->iview->vk.aspects, + .baseMipLevel = att->iview->vk.base_mip_level, + .levelCount = att->iview->vk.level_count, + .baseArrayLayer = att->iview->vk.base_array_layer, + .layerCount = att->iview->vk.layer_count, + }; + + /* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress HTILE. */ + radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout, + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, RADV_QUEUE_GENERAL, + RADV_QUEUE_GENERAL, &range, NULL); + + att->layout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; +} + +static void +radv_handle_fbfetch_output(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_rendering_state *render = &cmd_buffer->state.render; + + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FBFETCH_OUTPUT; + + /* Nothing to do when dynamic rendering doesn't use concurrent input attachment writes. */ + if (render->has_input_attachment_no_concurrent_writes) + return; + + /* Nothing to do when the bound fragment shader doesn't use subpass input attachments. */ + if (!cmd_buffer->state.uses_fbfetch_output) + return; + + /* Check if any color attachments are compressed and also used as input attachments. */ + for (uint32_t i = 0; i < render->color_att_count; i++) { + radv_handle_color_fbfetch_output(cmd_buffer, i); + } + + /* Check if the depth/stencil attachment is compressed and also used as input attachment. */ + radv_handle_depth_fbfetch_output(cmd_buffer); } VKAPI_ATTR void VKAPI_CALL @@ -9257,13 +9381,19 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou primary->shader_upload_seq = MAX2(primary->shader_upload_seq, secondary->shader_upload_seq); - if (!secondary->state.render.has_image_views && primary->state.render.active && - (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) { - /* Emit the framebuffer state from primary if secondary - * has been recorded without a framebuffer, otherwise - * fast color/depth clears can't work. - */ - radv_emit_framebuffer_state(primary); + primary->state.uses_fbfetch_output |= secondary->state.uses_fbfetch_output; + + if (!secondary->state.render.has_image_views) { + if (primary->state.dirty & RADV_CMD_DIRTY_FBFETCH_OUTPUT) + radv_handle_fbfetch_output(primary); + + if (primary->state.render.active && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) { + /* Emit the framebuffer state from primary if secondary + * has been recorded without a framebuffer, otherwise + * fast color/depth clears can't work. + */ + radv_emit_framebuffer_state(primary); + } } if (secondary->gang.cs) { @@ -9545,6 +9675,8 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe struct radv_rendering_state *render = &cmd_buffer->state.render; render->active = true; render->has_image_views = true; + render->has_input_attachment_no_concurrent_writes = + !!(pRenderingInfo->flags & VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA); render->area = pRenderingInfo->renderArea; render->view_mask = pRenderingInfo->viewMask; render->layer_count = pRenderingInfo->layerCount; @@ -9558,7 +9690,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe render->ds_att_aspects = ds_att_aspects; render->vrs_att = vrs_att; render->vrs_texel_size = vrs_texel_size; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER | RADV_CMD_DIRTY_FBFETCH_OUTPUT; if (pdev->info.rbplus_allowed) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; @@ -11025,6 +11157,9 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info cmd_buffer->state.last_index_type = -1; } + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FBFETCH_OUTPUT) + radv_handle_fbfetch_output(cmd_buffer); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_GRAPHICS_SHADERS) { radv_bind_graphics_shaders(cmd_buffer); } diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 8b645c63799..b11f42eca85 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -93,7 +93,8 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 10, RADV_CMD_DIRTY_GRAPHICS_SHADERS = 1ull << 11, RADV_CMD_DIRTY_COLOR_OUTPUT = 1ull << 12, - RADV_CMD_DIRTY_ALL = (1ull << 13) - 1, + RADV_CMD_DIRTY_FBFETCH_OUTPUT = 1ull << 13, + RADV_CMD_DIRTY_ALL = (1ull << 14) - 1, }; enum radv_cmd_flush_bits { @@ -184,6 +185,7 @@ struct radv_attachment { struct radv_rendering_state { bool active; bool has_image_views; + bool has_input_attachment_no_concurrent_writes; VkRect2D area; uint32_t layer_count; uint32_t view_mask; @@ -447,6 +449,7 @@ struct radv_cmd_state { bool uses_vrs_coarse_shading; bool uses_dynamic_patch_control_points; bool uses_dynamic_vertex_binding_stride; + bool uses_fbfetch_output; }; struct radv_enc_state { diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt index a7fc4dfd184..8245da675a1 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt @@ -207,6 +207,3 @@ spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail - -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127 -KHR-GL46.blend_equation_advanced.test_coherency.multiplySequence,Fail diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt index c5e48f79dcb..df03d976a56 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt @@ -177,20 +177,3 @@ spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail # Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104 spec@arb_viewport_array@display-list,Fail - -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127 -dEQP-GLES31.functional.blend_equation_advanced.msaa.colorburn,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.colordodge,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.darken,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.difference,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.exclusion,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hardlight,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_color,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_hue,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_luminosity,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_saturation,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.lighten,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.multiply,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.overlay,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.screen,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.softlight,Fail