radv: Enable compute dispatch tunneling

Compute tunneling can considerably lower the latency of high-priority
compute work. Enabling it is beneficial in cases where high-priority
work is dispatched while the GPU is already busy with other work (e.g.
rendering on GFX). This is the case in VR compositors that dispatch
latency-sensitive compositing work to ACE while GFX is busy rendering
the next frame.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26462>
This commit is contained in:
Friedrich Vock 2023-12-01 15:18:44 +01:00 committed by Marge Bot
parent b3ab233ff7
commit d6d68ceda1
2 changed files with 9 additions and 0 deletions

View file

@ -988,6 +988,13 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr
*/
device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
}
if (device->physical_device->rad_info.gfx_level >= GFX10) {
/* Enable asynchronous compute tunneling. The KMD restricts this feature
* to high-priority compute queues, so setting the bit on any other queue
* is a no-op. PAL always sets this bit as well.
*/
device->dispatch_initiator |= S_00B800_TUNNEL_ENABLE(1);
}
/* Disable partial preemption for task shaders.
* The kernel may not support preemption, but PAL always sets this bit,

View file

@ -113,6 +113,8 @@ si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
radeon_emit(cs, 0); /* R_00B894_COMPUTE_USER_ACCUM_1 */
radeon_emit(cs, 0); /* R_00B898_COMPUTE_USER_ACCUM_2 */
radeon_emit(cs, 0); /* R_00B89C_COMPUTE_USER_ACCUM_3 */
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
}
/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID