tu/a7xx: Use BLIT_EVENT_STORE_AND_CLEAR when appropriate

BLIT_EVENT_STORE_AND_CLEAR presumably swallows the BLIT_EVENT_CLEAR at the start of the next bin. Should be faster than separate events. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/30270>
2026-05-08 13:28:06 +02:00 · 2024-08-13 15:59:30 +02:00 · 2024-08-13 15:59:30 +02:00 · a15466187c
commit a15466187c
parent 80a50269a2
3 changed files with 60 additions and 15 deletions
--- a/src/freedreno/vulkan/tu_clear_blit.cc
+++ b/src/freedreno/vulkan/tu_clear_blit.cc
@ -3957,11 +3957,46 @@ tu_emit_blit(struct tu_cmd_buffer *cmd,
             struct tu_cs *cs,
             const struct tu_image_view *iview,
             const struct tu_render_pass_attachment *attachment,
+             const VkClearValue *clear_value,
             enum a6xx_blit_event_type blit_event_type,
             bool separate_stencil)
 {
   assert(blit_event_type != BLIT_EVENT_CLEAR);
-   event_blit_setup(cs, attachment, blit_event_type, 0x0);
+   uint32_t clear_mask = 0;
+
+   /* BLIT_EVENT_STORE_AND_CLEAR would presumably swallow the
+    * BLIT_EVENT_CLEAR at the start of a renderpass, and be more efficient.
+    */
+   if (blit_event_type == BLIT_EVENT_STORE && clear_value &&
+       attachment->clear_mask &&
+       use_generic_clear_for_image_clear(cmd, iview->image)) {
+      blit_event_type = BLIT_EVENT_STORE_AND_CLEAR;
+
+      enum pipe_format format = vk_format_to_pipe_format(attachment->format);
+      VkImageAspectFlags aspect_mask = attachment->clear_mask;
+      if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
+         if (separate_stencil)
+            aspect_mask = VK_IMAGE_ASPECT_STENCIL_BIT;
+         else
+            aspect_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
+      }
+      if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+         if (separate_stencil)
+            format = PIPE_FORMAT_S8_UINT;
+         else
+            format = PIPE_FORMAT_Z32_FLOAT;
+      }
+
+      clear_mask = aspect_write_mask_generic_clear(format, aspect_mask);
+
+      uint32_t clear_vals[4] = {};
+      pack_blit_event_clear_value(clear_value, format, clear_vals);
+
+      tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
+      tu_cs_emit_array(cs, clear_vals, 4);
+   }
+
+   event_blit_setup(cs, attachment, blit_event_type, clear_mask);

   for_each_layer(i, attachment->clear_views, cmd->state.framebuffer->layers) {
      event_blit_dst_view blt_view = blt_view_from_tu_view(iview, i);
@ -4194,10 +4229,10 @@ tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
         load_3d_blit<CHIP>(cmd, cs, iview, attachment, true);
   } else {
      if (load_common)
-         tu_emit_blit<CHIP>(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, false);
+         tu_emit_blit<CHIP>(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, false);

      if (load_stencil)
-         tu_emit_blit<CHIP>(cmd, cs, iview, attachment, BLIT_EVENT_LOAD, true);
+         tu_emit_blit<CHIP>(cmd, cs, iview, attachment, NULL, BLIT_EVENT_LOAD, true);
   }

   if (cond_exec)
@ -4473,6 +4508,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
   struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
   const struct tu_image_view *iview = cmd->state.attachments[a];
   struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
+   const VkClearValue *clear_value = &cmd->state.clear_values[gmem_a];
+   bool resolve = a != gmem_a;
+   if (resolve)
+      clear_value = NULL;

   if (!dst->store && !dst->store_stencil)
      return;
@ -4513,9 +4552,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
   /* use fast path when render area is aligned, except for unsupported resolve cases */
   if (use_fast_path) {
      if (store_common)
-         tu_emit_blit<CHIP>(cmd, cs, iview, src, BLIT_EVENT_STORE, false);
+         tu_emit_blit<CHIP>(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, false);
      if (store_separate_stencil)
-         tu_emit_blit<CHIP>(cmd, cs, iview, src, BLIT_EVENT_STORE, true);
+         tu_emit_blit<CHIP>(cmd, cs, iview, src, clear_value, BLIT_EVENT_STORE, true);

      if (cond_exec) {
         tu_end_load_store_cond_exec(cmd, cs, false);
--- a/src/freedreno/vulkan/tu_cmd_buffer.cc
+++ b/src/freedreno/vulkan/tu_cmd_buffer.cc
@ -1184,16 +1184,9 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)

   tu6_emit_blit_scissor(cmd, cs, true);

-   for (uint32_t a = 0; a < pass->attachment_count; ++a) {
-      if (pass->attachments[a].gmem) {
-         const bool cond_exec_allowed = cmd->state.tiling->binning_possible &&
-                                        cmd->state.pass->has_cond_load_store;
-         tu_store_gmem_attachment<CHIP>(cmd, cs, a, a,
-                                  fb->layers, subpass->multiview_mask,
-                                  cond_exec_allowed);
-      }
-   }
-
+   /* Resolve should happen before store in case BLIT_EVENT_STORE_AND_CLEAR is
+    * used for a store.
+    */
   if (subpass->resolve_attachments) {
      for (unsigned i = 0; i < subpass->resolve_count; i++) {
         uint32_t a = subpass->resolve_attachments[i].attachment;
@ -1205,6 +1198,16 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
      }
   }

+   for (uint32_t a = 0; a < pass->attachment_count; ++a) {
+      if (pass->attachments[a].gmem) {
+         const bool cond_exec_allowed = cmd->state.tiling->binning_possible &&
+                                        cmd->state.pass->has_cond_load_store;
+         tu_store_gmem_attachment<CHIP>(cmd, cs, a, a,
+                                  fb->layers, subpass->multiview_mask,
+                                  cond_exec_allowed);
+      }
+   }
+
   if (pass->has_fdm)
      tu_cs_set_writeable(cs, false);
 }
@ -4012,6 +4015,7 @@ tu_restore_suspended_pass(struct tu_cmd_buffer *cmd,
   cmd->state.subpass = suspended->state.suspended_pass.subpass;
   cmd->state.framebuffer = suspended->state.suspended_pass.framebuffer;
   cmd->state.attachments = suspended->state.suspended_pass.attachments;
+   cmd->state.clear_values = suspended->state.suspended_pass.clear_values;
   cmd->state.render_area = suspended->state.suspended_pass.render_area;
   cmd->state.gmem_layout = suspended->state.suspended_pass.gmem_layout;
   cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
@ -4636,6 +4640,7 @@ tu_CmdBeginRendering(VkCommandBuffer commandBuffer,
      cmd->state.suspended_pass.framebuffer = cmd->state.framebuffer;
      cmd->state.suspended_pass.render_area = cmd->state.render_area;
      cmd->state.suspended_pass.attachments = cmd->state.attachments;
+      cmd->state.suspended_pass.clear_values = cmd->state.clear_values;
      cmd->state.suspended_pass.gmem_layout = cmd->state.gmem_layout;
   }

--- a/src/freedreno/vulkan/tu_cmd_buffer.h
+++ b/src/freedreno/vulkan/tu_cmd_buffer.h
@ -495,6 +495,7 @@ struct tu_cmd_state
      enum tu_gmem_layout gmem_layout;

      const struct tu_image_view **attachments;
+      VkClearValue *clear_values;

      struct tu_lrz_state lrz;
   } suspended_pass;