From a15466187c0d171fe633e1fbeb34c404554b875e Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Tue, 13 Aug 2024 15:59:30 +0200 Subject: [PATCH] tu/a7xx: Use BLIT_EVENT_STORE_AND_CLEAR when appropriate BLIT_EVENT_STORE_AND_CLEAR presumably swallows the BLIT_EVENT_CLEAR at the start of the next bin. Should be faster than separate events. Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_clear_blit.cc | 49 ++++++++++++++++++++++++--- src/freedreno/vulkan/tu_cmd_buffer.cc | 25 ++++++++------ src/freedreno/vulkan/tu_cmd_buffer.h | 1 + 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.cc b/src/freedreno/vulkan/tu_clear_blit.cc index 7003d461b5d..36682245e22 100644 --- a/src/freedreno/vulkan/tu_clear_blit.cc +++ b/src/freedreno/vulkan/tu_clear_blit.cc @@ -3957,11 +3957,46 @@ tu_emit_blit(struct tu_cmd_buffer *cmd, struct tu_cs *cs, const struct tu_image_view *iview, const struct tu_render_pass_attachment *attachment, + const VkClearValue *clear_value, enum a6xx_blit_event_type blit_event_type, bool separate_stencil) { assert(blit_event_type != BLIT_EVENT_CLEAR); - event_blit_setup(cs, attachment, blit_event_type, 0x0); + uint32_t clear_mask = 0; + + /* BLIT_EVENT_STORE_AND_CLEAR would presumably swallow the + * BLIT_EVENT_CLEAR at the start of a renderpass, and be more efficient. + */ + if (blit_event_type == BLIT_EVENT_STORE && clear_value && + attachment->clear_mask && + use_generic_clear_for_image_clear(cmd, iview->image)) { + blit_event_type = BLIT_EVENT_STORE_AND_CLEAR; + + enum pipe_format format = vk_format_to_pipe_format(attachment->format); + VkImageAspectFlags aspect_mask = attachment->clear_mask; + if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { + if (separate_stencil) + aspect_mask = VK_IMAGE_ASPECT_STENCIL_BIT; + else + aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT; + } + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) { + if (separate_stencil) + format = PIPE_FORMAT_S8_UINT; + else + format = PIPE_FORMAT_Z32_FLOAT; + } + + clear_mask = aspect_write_mask_generic_clear(format, aspect_mask); + + uint32_t clear_vals[4] = {}; + pack_blit_event_clear_value(clear_value, format, clear_vals); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); + tu_cs_emit_array(cs, clear_vals, 4); + } + + event_blit_setup(cs, attachment, blit_event_type, clear_mask); for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) { event_blit_dst_view blt_view = blt_view_from_tu_view(iview, i); @@ -4194,10 +4229,10 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd, load_3d_blit(cmd, cs, iview, attachment, true); } else { if (load_common) - tu_emit_blit(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, false); + tu_emit_blit(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, false); if (load_stencil) - tu_emit_blit(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, true); + tu_emit_blit(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, true); } if (cond_exec) @@ -4473,6 +4508,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a]; const struct tu_image_view *iview = cmd->state.attachments[a]; struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a]; + const VkClearValue *clear_value = &cmd->state.clear_values[gmem_a]; + bool resolve = a != gmem_a; + if (resolve) + clear_value = NULL; if (!dst->store && !dst->store_stencil) return; @@ -4513,9 +4552,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd, /* use fast path when render area is aligned, except for unsupported resolve cases */ if (use_fast_path) { if (store_common) - tu_emit_blit(cmd, cs, iview, src, BLIT_EVENT_STORE, false); + tu_emit_blit(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, false); if (store_separate_stencil) - tu_emit_blit(cmd, cs, iview, src, BLIT_EVENT_STORE, true); + tu_emit_blit(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, true); if (cond_exec) { tu_end_load_store_cond_exec(cmd, cs, false); diff --git a/src/freedreno/vulkan/tu_cmd_buffer.cc b/src/freedreno/vulkan/tu_cmd_buffer.cc index 98c01eb43c5..d0653f72032 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.cc +++ b/src/freedreno/vulkan/tu_cmd_buffer.cc @@ -1184,16 +1184,9 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) tu6_emit_blit_scissor(cmd, cs, true); - for (uint32_t a = 0; a < pass->attachment_count; ++a) { - if (pass->attachments[a].gmem) { - const bool cond_exec_allowed = cmd->state.tiling->binning_possible && - cmd->state.pass->has_cond_load_store; - tu_store_gmem_attachment(cmd, cs, a, a, - fb->layers, subpass->multiview_mask, - cond_exec_allowed); - } - } - + /* Resolve should happen before store in case BLIT_EVENT_STORE_AND_CLEAR is + * used for a store. + */ if (subpass->resolve_attachments) { for (unsigned i = 0; i < subpass->resolve_count; i++) { uint32_t a = subpass->resolve_attachments[i].attachment; @@ -1205,6 +1198,16 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs) } } + for (uint32_t a = 0; a < pass->attachment_count; ++a) { + if (pass->attachments[a].gmem) { + const bool cond_exec_allowed = cmd->state.tiling->binning_possible && + cmd->state.pass->has_cond_load_store; + tu_store_gmem_attachment(cmd, cs, a, a, + fb->layers, subpass->multiview_mask, + cond_exec_allowed); + } + } + if (pass->has_fdm) tu_cs_set_writeable(cs, false); } @@ -4012,6 +4015,7 @@ tu_restore_suspended_pass(struct tu_cmd_buffer *cmd, cmd->state.subpass = suspended->state.suspended_pass.subpass; cmd->state.framebuffer = suspended->state.suspended_pass.framebuffer; cmd->state.attachments = suspended->state.suspended_pass.attachments; + cmd->state.clear_values = suspended->state.suspended_pass.clear_values; cmd->state.render_area = suspended->state.suspended_pass.render_area; cmd->state.gmem_layout = suspended->state.suspended_pass.gmem_layout; cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout]; @@ -4636,6 +4640,7 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer, cmd->state.suspended_pass.framebuffer = cmd->state.framebuffer; cmd->state.suspended_pass.render_area = cmd->state.render_area; cmd->state.suspended_pass.attachments = cmd->state.attachments; + cmd->state.suspended_pass.clear_values = cmd->state.clear_values; cmd->state.suspended_pass.gmem_layout = cmd->state.gmem_layout; } diff --git a/src/freedreno/vulkan/tu_cmd_buffer.h b/src/freedreno/vulkan/tu_cmd_buffer.h index 338351b4bd0..1c35d077ba7 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.h +++ b/src/freedreno/vulkan/tu_cmd_buffer.h @@ -495,6 +495,7 @@ struct tu_cmd_state enum tu_gmem_layout gmem_layout; const struct tu_image_view **attachments; + VkClearValue *clear_values; struct tu_lrz_state lrz; } suspended_pass;