radv: fix DRLR with subpass input attachments and feedback loops

Dynamic rendering local read allows the application to use subpass input
attachments with feedback loops. But unless legacy RPs where it's
possible to determine feedback look at creation time, with dynamic
rendering it's not possible.

To fix that, the driver needs to determine at draw time if a feedback
loop is present, and it needs to decompress DCC/HTILE if necessary.

See https://gitlab.khronos.org/vulkan/vulkan/-/issues/3928 for more
information.

Note that VKCTS is still missing coverage but this has been reported.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30124>
This commit is contained in:
Samuel Pitoiset 2024-07-11 15:49:03 +02:00 committed by Marge Bot
parent 4a191e34c9
commit 3af0f0129c
4 changed files with 147 additions and 29 deletions

View file

@ -261,6 +261,10 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dy
if (pdev->info.rbplus_allowed && (dest_mask & RADV_DYNAMIC_COLOR_WRITE_MASK)) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS;
}
if (dest_mask & (RADV_DYNAMIC_COLOR_ATTACHMENT_MAP | RADV_DYNAMIC_INPUT_ATTACHMENT_MAP)) {
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT;
}
}
bool
@ -8097,6 +8101,11 @@ radv_bind_fragment_shader(struct radv_cmd_buffer *cmd_buffer, const struct radv_
previous_ps->info.ps.pops_is_per_sample != ps->info.ps.pops_is_per_sample)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DB_SHADER_CONTROL;
if (!previous_ps || cmd_buffer->state.uses_fbfetch_output != ps->info.ps.uses_fbfetch_output) {
cmd_buffer->state.uses_fbfetch_output = ps->info.ps.uses_fbfetch_output;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT;
}
/* Re-emit the PS epilog when a new fragment shader is bound. */
if (ps->info.has_epilog)
cmd_buffer->state.emitted_ps_epilog = NULL;
@ -9169,6 +9178,7 @@ radv_CmdSetRenderingAttachmentLocationsKHR(VkCommandBuffer commandBuffer,
}
state->dirty_dynamic |= RADV_DYNAMIC_COLOR_ATTACHMENT_MAP;
state->dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT;
}
VKAPI_ATTR void VKAPI_CALL
@ -9203,6 +9213,120 @@ radv_CmdSetRenderingInputAttachmentIndicesKHR(VkCommandBuffer commandBuffer,
: *pLocationInfo->pStencilInputAttachmentIndex;
state->dirty_dynamic |= RADV_DYNAMIC_INPUT_ATTACHMENT_MAP;
state->dirty |= RADV_CMD_DIRTY_FBFETCH_OUTPUT;
}
static void
radv_handle_color_fbfetch_output(struct radv_cmd_buffer *cmd_buffer, uint32_t index)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_rendering_state *render = &cmd_buffer->state.render;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct radv_attachment *att = &render->color_att[index];
if (!att->iview)
return;
const struct radv_image *image = att->iview->image;
if (!(image->vk.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
return;
if (!radv_layout_dcc_compressed(device, image, att->iview->vk.base_mip_level, att->layout,
radv_image_queue_family_mask(att->iview->image, cmd_buffer->qf, cmd_buffer->qf)))
return;
const uint32_t color_att_idx = d->vk.cal.color_map[index];
if (color_att_idx == MESA_VK_ATTACHMENT_UNUSED)
return;
if (d->vk.ial.color_map[color_att_idx] != color_att_idx)
return;
const VkImageSubresourceRange range = {
.aspectMask = att->iview->vk.aspects,
.baseMipLevel = att->iview->vk.base_mip_level,
.levelCount = att->iview->vk.level_count,
.baseArrayLayer = att->iview->vk.base_array_layer,
.layerCount = att->iview->vk.layer_count,
};
/* Consider previous rendering work for WAW hazards. */
cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, att->iview->image);
/* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress DCC. */
radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout,
VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, RADV_QUEUE_GENERAL,
RADV_QUEUE_GENERAL, &range, NULL);
att->layout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
}
static void
radv_handle_depth_fbfetch_output(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
struct radv_rendering_state *render = &cmd_buffer->state.render;
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
struct radv_attachment *att = &render->ds_att;
if (!att->iview)
return;
const struct radv_image *image = att->iview->image;
if (!(image->vk.usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
return;
if (!radv_layout_is_htile_compressed(
device, att->iview->image, att->layout,
radv_image_queue_family_mask(att->iview->image, cmd_buffer->qf, cmd_buffer->qf)))
return;
if (d->vk.ial.depth_att == MESA_VK_ATTACHMENT_UNUSED && d->vk.ial.stencil_att == MESA_VK_ATTACHMENT_UNUSED)
return;
const VkImageSubresourceRange range = {
.aspectMask = att->iview->vk.aspects,
.baseMipLevel = att->iview->vk.base_mip_level,
.levelCount = att->iview->vk.level_count,
.baseArrayLayer = att->iview->vk.base_array_layer,
.layerCount = att->iview->vk.layer_count,
};
/* Force a transition to FEEDBACK_LOOP_OPTIMAL to decompress HTILE. */
radv_handle_image_transition(cmd_buffer, att->iview->image, att->layout,
VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT, RADV_QUEUE_GENERAL,
RADV_QUEUE_GENERAL, &range, NULL);
att->layout = VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
}
static void
radv_handle_fbfetch_output(struct radv_cmd_buffer *cmd_buffer)
{
const struct radv_rendering_state *render = &cmd_buffer->state.render;
cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FBFETCH_OUTPUT;
/* Nothing to do when dynamic rendering doesn't use concurrent input attachment writes. */
if (render->has_input_attachment_no_concurrent_writes)
return;
/* Nothing to do when the bound fragment shader doesn't use subpass input attachments. */
if (!cmd_buffer->state.uses_fbfetch_output)
return;
/* Check if any color attachments are compressed and also used as input attachments. */
for (uint32_t i = 0; i < render->color_att_count; i++) {
radv_handle_color_fbfetch_output(cmd_buffer, i);
}
/* Check if the depth/stencil attachment is compressed and also used as input attachment. */
radv_handle_depth_fbfetch_output(cmd_buffer);
}
VKAPI_ATTR void VKAPI_CALL
@ -9257,13 +9381,19 @@ radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCou
primary->shader_upload_seq = MAX2(primary->shader_upload_seq, secondary->shader_upload_seq);
if (!secondary->state.render.has_image_views && primary->state.render.active &&
(primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) {
/* Emit the framebuffer state from primary if secondary
* has been recorded without a framebuffer, otherwise
* fast color/depth clears can't work.
*/
radv_emit_framebuffer_state(primary);
primary->state.uses_fbfetch_output |= secondary->state.uses_fbfetch_output;
if (!secondary->state.render.has_image_views) {
if (primary->state.dirty & RADV_CMD_DIRTY_FBFETCH_OUTPUT)
radv_handle_fbfetch_output(primary);
if (primary->state.render.active && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) {
/* Emit the framebuffer state from primary if secondary
* has been recorded without a framebuffer, otherwise
* fast color/depth clears can't work.
*/
radv_emit_framebuffer_state(primary);
}
}
if (secondary->gang.cs) {
@ -9545,6 +9675,8 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe
struct radv_rendering_state *render = &cmd_buffer->state.render;
render->active = true;
render->has_image_views = true;
render->has_input_attachment_no_concurrent_writes =
!!(pRenderingInfo->flags & VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA);
render->area = pRenderingInfo->renderArea;
render->view_mask = pRenderingInfo->viewMask;
render->layer_count = pRenderingInfo->layerCount;
@ -9558,7 +9690,7 @@ radv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRe
render->ds_att_aspects = ds_att_aspects;
render->vrs_att = vrs_att;
render->vrs_texel_size = vrs_texel_size;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER | RADV_CMD_DIRTY_FBFETCH_OUTPUT;
if (pdev->info.rbplus_allowed)
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_RBPLUS;
@ -11025,6 +11157,9 @@ radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info
cmd_buffer->state.last_index_type = -1;
}
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FBFETCH_OUTPUT)
radv_handle_fbfetch_output(cmd_buffer);
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_GRAPHICS_SHADERS) {
radv_bind_graphics_shaders(cmd_buffer);
}

View file

@ -93,7 +93,8 @@ enum radv_cmd_dirty_bits {
RADV_CMD_DIRTY_STREAMOUT_ENABLE = 1ull << 10,
RADV_CMD_DIRTY_GRAPHICS_SHADERS = 1ull << 11,
RADV_CMD_DIRTY_COLOR_OUTPUT = 1ull << 12,
RADV_CMD_DIRTY_ALL = (1ull << 13) - 1,
RADV_CMD_DIRTY_FBFETCH_OUTPUT = 1ull << 13,
RADV_CMD_DIRTY_ALL = (1ull << 14) - 1,
};
enum radv_cmd_flush_bits {
@ -184,6 +185,7 @@ struct radv_attachment {
struct radv_rendering_state {
bool active;
bool has_image_views;
bool has_input_attachment_no_concurrent_writes;
VkRect2D area;
uint32_t layer_count;
uint32_t view_mask;
@ -447,6 +449,7 @@ struct radv_cmd_state {
bool uses_vrs_coarse_shading;
bool uses_dynamic_patch_control_points;
bool uses_dynamic_vertex_binding_stride;
bool uses_fbfetch_output;
};
struct radv_enc_state {

View file

@ -207,6 +207,3 @@ spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yuv420,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvu420,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-sample_yvyu,Fail
spec@ext_image_dma_buf_import@ext_image_dma_buf_import-transcode-nv12-as-r8-gr88,Fail
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127
KHR-GL46.blend_equation_advanced.test_coherency.multiplySequence,Fail

View file

@ -177,20 +177,3 @@ spec@ext_framebuffer_multisample@interpolation 8 non-centroid-disabled,Fail
# Regression noticed in https://gitlab.freedesktop.org/mesa/mesa/-/pipelines/891104
spec@arb_viewport_array@display-list,Fail
# https://gitlab.freedesktop.org/mesa/mesa/-/issues/11127
dEQP-GLES31.functional.blend_equation_advanced.msaa.colorburn,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.colordodge,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.darken,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.difference,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.exclusion,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.hardlight,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_color,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_hue,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_luminosity,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.hsl_saturation,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.lighten,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.multiply,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.overlay,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.screen,Fail
dEQP-GLES31.functional.blend_equation_advanced.msaa.softlight,Fail