radv: Enable compute dispatch tunneling

Compute tunneling can considerably lower the latency of high-priority compute work. Enabling it is beneficial in cases where high-priority work is dispatched while the GPU is already busy with other work (e.g. rendering on GFX). This is the case in VR compositors that dispatch latency-sensitive compositing work to ACE while GFX is busy rendering the next frame. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462>
2026-05-07 07:08:04 +02:00 · 2023-12-01 15:18:44 +01:00 · 2023-12-01 15:18:44 +01:00 · d6d68ceda1
commit d6d68ceda1
parent b3ab233ff7
2 changed files with 9 additions and 0 deletions
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@ -988,6 +988,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
       */
      device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
   }
+   if (device->physical_device->rad_info.gfx_level >= GFX10) {
+      /* Enable asynchronous compute tunneling. The KMD restricts this feature
+       * to high-priority compute queues, so setting the bit on any other queue
+       * is a no-op. PAL always sets this bit as well.
+       */
+      device->dispatch_initiator |= S_00B800_TUNNEL_ENABLE(1);
+   }

   /* Disable partial preemption for task shaders.
    * The kernel may not support preemption, but PAL always sets this bit,
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@ -113,6 +113,8 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
      radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */
      radeon_emit(cs, 0); /* R_00B898_COMPUTE_USER_ACCUM_2 */
      radeon_emit(cs, 0); /* R_00B89C_COMPUTE_USER_ACCUM_3 */
+
+      radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
   }

   /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID