anv: try to keep the pipeline in GPGPU mode when buffer transfer ops

To avoid ping-ponging between 3D & GPGPU in the following sequence : vkCmdDispatch(...) vkCmdCopyBuffer(...) vkCmdDispatch(...) We can try to keep the pipeline in GPGPU mode when doing blorp buffer operations (we have blorp support for the CCS and can use the same shaders on RCS). Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Rohan Garg <rohan.garg@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27956>
2026-03-11 09:10:32 +01:00 · 2024-03-01 12:39:03 +02:00 · 2024-03-01 12:39:03 +02:00 · 6823ffe70e
commit 6823ffe70e
parent 194afe8416
4 changed files with 27 additions and 3 deletions
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@ -130,6 +130,10 @@ anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
      unreachable("unknown queue family");
   }

+   /* Can't have both flags at the same time. */
+   assert((flags & BLORP_BATCH_USE_BLITTER) == 0 ||
+          (flags & BLORP_BATCH_USE_COMPUTE) == 0);
+
   blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
 }

@ -1030,7 +1034,10 @@ void anv_CmdCopyBuffer2(
   ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);

   struct blorp_batch batch;
-   anv_blorp_batch_init(cmd_buffer, &batch, 0);
+   anv_blorp_batch_init(cmd_buffer, &batch,
+                        cmd_buffer->state.current_pipeline ==
+                        cmd_buffer->device->physical->gpgpu_pipeline_value ?
+                        BLORP_BATCH_USE_COMPUTE : 0);

   for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
      copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
@ -1054,7 +1061,10 @@ void anv_CmdUpdateBuffer(
   ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);

   struct blorp_batch batch;
-   anv_blorp_batch_init(cmd_buffer, &batch, 0);
+   anv_blorp_batch_init(cmd_buffer, &batch,
+                        cmd_buffer->state.current_pipeline ==
+                        cmd_buffer->device->physical->gpgpu_pipeline_value ?
+                        BLORP_BATCH_USE_COMPUTE : 0);

   /* We can't quite grab a full block because the state stream needs a
    * little data at the top to build its linked list.
@ -1118,7 +1128,10 @@ anv_cmd_buffer_fill_area(struct anv_cmd_buffer *cmd_buffer,
   struct isl_surf isl_surf;

   struct blorp_batch batch;
-   anv_blorp_batch_init(cmd_buffer, &batch, 0);
+   anv_blorp_batch_init(cmd_buffer, &batch,
+                        cmd_buffer->state.current_pipeline ==
+                        cmd_buffer->device->physical->gpgpu_pipeline_value ?
+                        BLORP_BATCH_USE_COMPUTE : 0);

   /* First, we compute the biggest format that can be used with the
    * given offsets and size.
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@ -1201,6 +1201,9 @@ struct anv_physical_device {
    void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_device *, struct anv_address,
                               enum anv_timestamp_capture_type, void *);
    struct intel_measure_device                 measure_device;
+
+    /* Value of PIPELINE_SELECT::PipelineSelection == GPGPU */
+    uint32_t                                    gpgpu_pipeline_value;
 };

 static inline uint32_t
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@ -3390,6 +3390,12 @@ anv_pipe_flush_bits_for_access_flags(struct anv_cmd_buffer *cmd_buffer,
            pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
            pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
         } else {
+            /* We can use the data port when trying to stay in compute mode on
+             * the RCS.
+             */
+            pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;
+            pipe_bits |= ANV_PIPE_UNTYPED_DATAPORT_CACHE_FLUSH_BIT;
+            /* Most operations are done through RT/detph writes */
            pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;
            pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;
         }
--- a/src/intel/vulkan/genX_init_state.c
+++ b/src/intel/vulkan/genX_init_state.c
@ -770,6 +770,8 @@ genX(init_physical_device_state)(ASSERTED struct anv_physical_device *pdevice)
 #endif

   pdevice->cmd_emit_timestamp = genX(cmd_emit_timestamp);
+
+   pdevice->gpgpu_pipeline_value = GPGPU;
 }

 VkResult