diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 087c6afbd9e..836542be75c 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -261,6 +261,10 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy if (pdev->info.rbplus_allowed && (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) { cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; } + + if (dest_mask & (RADV_DYNAMIC_COLOR_ATTACHMENT_MAP | RADV_DYNAMIC_INPUT_ATTACHMENT_MAP)) { + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; + } } bool @@ -8097,6 +8101,11 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_ previous_ps->info.ps.pops_is_per_sample != ps->info.ps.pops_is_per_sample) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DB_SHADER_CONTROL; + if (!previous_ps || cmd_buffer->state.uses_fbfetch_output != ps->info.ps.uses_fbfetch_output) { + cmd_buffer->state.uses_fbfetch_output = ps->info.ps.uses_fbfetch_output; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; + } + /* Re-emit the PS epilog when a new fragment shader is bound. */ if (ps->info.has_epilog) cmd_buffer->state.emitted_ps_epilog = NULL; @@ -9169,6 +9178,7 @@ radv_CmdSetRenderingAttachmentLocationsKHR(VkCommandBuffer commandBuffer, } state->dirty_dynamic |= RADV_DYNAMIC_COLOR_ATTACHMENT_MAP; + state->dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; } VKAPI_ATTR void VKAPI_CALL @@ -9203,6 +9213,120 @@ radv_CmdSetRenderingInputAttachmentIndicesKHR(VkCommandBuffer commandBuffer, : *pLocationInfo->pStencilInputAttachmentIndex; state->dirty_dynamic |= RADV_DYNAMIC_INPUT_ATTACHMENT_MAP; + state->dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT; +} + +static void +radv_handle_color_fbfetch_output(struct radv_cmd_buffer *cmd_buffer, uint32_t index) +{ + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_rendering_state *render = &cmd_buffer->state.render; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_attachment *att = &render->color_att[index]; + + if (!att->iview) + return; + + const struct radv_image *image = att->iview->image; + if (!(image->vk.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + return; + + if (!radv_layout_dcc_compressed(device, image, att->iview->vk.base_mip_level, att->layout, + radv_image_queue_family_mask(att->iview->image, cmd_buffer->qf, cmd_buffer->qf))) + return; + + const uint32_t color_att_idx = d->vk.cal.color_map[index]; + if (color_att_idx == MESA_VK_ATTACHMENT_UNUSED) + return; + + if (d->vk.ial.color_map[color_att_idx] != color_att_idx) + return; + + const VkImageSubresourceRange range = { + .aspectMask = att->iview->vk.aspects, + .baseMipLevel = att->iview->vk.base_mip_level, + .levelCount = att->iview->vk.level_count, + .baseArrayLayer = att->iview->vk.base_array_layer, + .layerCount = att->iview->vk.layer_count, + }; + + /* Consider previous rendering work for WAW hazards. */ + cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, att->iview->image); + + /* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress DCC. */ + radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout, + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, RADV_QUEUE_GENERAL, + RADV_QUEUE_GENERAL, &range, NULL); + + att->layout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; +} + +static void +radv_handle_depth_fbfetch_output(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer); + struct radv_rendering_state *render = &cmd_buffer->state.render; + const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; + struct radv_attachment *att = &render->ds_att; + + if (!att->iview) + return; + + const struct radv_image *image = att->iview->image; + if (!(image->vk.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) + return; + + if (!radv_layout_is_htile_compressed( + device, att->iview->image, att->layout, + radv_image_queue_family_mask(att->iview->image, cmd_buffer->qf, cmd_buffer->qf))) + return; + + if (d->vk.ial.depth_att == MESA_VK_ATTACHMENT_UNUSED && d->vk.ial.stencil_att == MESA_VK_ATTACHMENT_UNUSED) + return; + + const VkImageSubresourceRange range = { + .aspectMask = att->iview->vk.aspects, + .baseMipLevel = att->iview->vk.base_mip_level, + .levelCount = att->iview->vk.level_count, + .baseArrayLayer = att->iview->vk.base_array_layer, + .layerCount = att->iview->vk.layer_count, + }; + + /* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress HTILE. */ + radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout, + VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, RADV_QUEUE_GENERAL, + RADV_QUEUE_GENERAL, &range, NULL); + + att->layout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; +} + +static void +radv_handle_fbfetch_output(struct radv_cmd_buffer *cmd_buffer) +{ + const struct radv_rendering_state *render = &cmd_buffer->state.render; + + cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FBFETCH_OUTPUT; + + /* Nothing to do when dynamic rendering doesn't use concurrent input attachment writes. */ + if (render->has_input_attachment_no_concurrent_writes) + return; + + /* Nothing to do when the bound fragment shader doesn't use subpass input attachments. */ + if (!cmd_buffer->state.uses_fbfetch_output) + return; + + /* Check if any color attachments are compressed and also used as input attachments. */ + for (uint32_t i = 0; i < render->color_att_count; i++) { + radv_handle_color_fbfetch_output(cmd_buffer, i); + } + + /* Check if the depth/stencil attachment is compressed and also used as input attachment. */ + radv_handle_depth_fbfetch_output(cmd_buffer); } VKAPI_ATTR void VKAPI_CALL @@ -9257,13 +9381,19 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou primary->shader_upload_seq = MAX2(primary->shader_upload_seq, secondary->shader_upload_seq); - if (!secondary->state.render.has_image_views && primary->state.render.active && - (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) { - /* Emit the framebuffer state from primary if secondary - * has been recorded without a framebuffer, otherwise - * fast color/depth clears can't work. - */ - radv_emit_framebuffer_state(primary); + primary->state.uses_fbfetch_output |= secondary->state.uses_fbfetch_output; + + if (!secondary->state.render.has_image_views) { + if (primary->state.dirty & RADV_CMD_DIRTY_FBFETCH_OUTPUT) + radv_handle_fbfetch_output(primary); + + if (primary->state.render.active && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) { + /* Emit the framebuffer state from primary if secondary + * has been recorded without a framebuffer, otherwise + * fast color/depth clears can't work. + */ + radv_emit_framebuffer_state(primary); + } } if (secondary->gang.cs) { @@ -9545,6 +9675,8 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe struct radv_rendering_state *render = &cmd_buffer->state.render; render->active = true; render->has_image_views = true; + render->has_input_attachment_no_concurrent_writes = + !!(pRenderingInfo->flags & VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA); render->area = pRenderingInfo->renderArea; render->view_mask = pRenderingInfo->viewMask; render->layer_count = pRenderingInfo->layerCount; @@ -9558,7 +9690,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe render->ds_att_aspects = ds_att_aspects; render->vrs_att = vrs_att; render->vrs_texel_size = vrs_texel_size; - cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; + cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER | RADV_CMD_DIRTY_FBFETCH_OUTPUT; if (pdev->info.rbplus_allowed) cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS; @@ -11025,6 +11157,9 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info cmd_buffer->state.last_index_type = -1; } + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FBFETCH_OUTPUT) + radv_handle_fbfetch_output(cmd_buffer); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_GRAPHICS_SHADERS) { radv_bind_graphics_shaders(cmd_buffer); } diff --git a/src/amd/vulkan/radv_cmd_buffer.h b/src/amd/vulkan/radv_cmd_buffer.h index 8b645c63799..b11f42eca85 100644 --- a/src/amd/vulkan/radv_cmd_buffer.h +++ b/src/amd/vulkan/radv_cmd_buffer.h @@ -93,7 +93,8 @@ enum radv_cmd_dirty_bits { RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 10, RADV_CMD_DIRTY_GRAPHICS_SHADERS = 1ull << 11, RADV_CMD_DIRTY_COLOR_OUTPUT = 1ull << 12, - RADV_CMD_DIRTY_ALL = (1ull << 13) - 1, + RADV_CMD_DIRTY_FBFETCH_OUTPUT = 1ull << 13, + RADV_CMD_DIRTY_ALL = (1ull << 14) - 1, }; enum radv_cmd_flush_bits { @@ -184,6 +185,7 @@ struct radv_attachment { struct radv_rendering_state { bool active; bool has_image_views; + bool has_input_attachment_no_concurrent_writes; VkRect2D area; uint32_t layer_count; uint32_t view_mask; @@ -447,6 +449,7 @@ struct radv_cmd_state { bool uses_vrs_coarse_shading; bool uses_dynamic_patch_control_points; bool uses_dynamic_vertex_binding_stride; + bool uses_fbfetch_output; }; struct radv_enc_state { diff --git a/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt index a7fc4dfd184..8245da675a1 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-navi31-fails.txt @@ -207,6 +207,3 @@ spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail - -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127 -KHR-GL46.blend_equation_advanced.test_coherency.multiplySequence,Fail diff --git a/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt index c5e48f79dcb..df03d976a56 100644 --- a/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt +++ b/src/gallium/drivers/zink/ci/zink-radv-vangogh-fails.txt @@ -177,20 +177,3 @@ spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail # Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104 spec@arb_viewport_array@display-list,Fail - -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127 -dEQP-GLES31.functional.blend_equation_advanced.msaa.colorburn,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.colordodge,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.darken,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.difference,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.exclusion,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hardlight,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_color,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_hue,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_luminosity,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_saturation,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.lighten,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.multiply,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.overlay,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.screen,Fail -dEQP-GLES31.functional.blend_equation_advanced.msaa.softlight,Fail