anv: Implement Xe version of anv_queue_exec_locked() and queue_exec_trace()

Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22171>
This commit is contained in:
José Roberto de Souza 2023-02-10 08:00:16 -08:00 committed by Marge Bot
parent 91ee00a741
commit fdea48df5e
3 changed files with 225 additions and 0 deletions

View file

@ -70,3 +70,208 @@ exec_error:
return result;
}
#define TYPE_SIGNAL true
#define TYPE_WAIT false
static void
xe_exec_fill_sync(struct drm_xe_sync *xe_sync, struct vk_sync *vk_sync,
uint64_t value, bool signal)
{
if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) {
unreachable("Unsupported sync type");
return;
}
const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync);
xe_sync->handle = syncobj->syncobj;
if (value) {
xe_sync->flags |= DRM_XE_SYNC_TIMELINE_SYNCOBJ;
xe_sync->timeline_value = value;
} else {
xe_sync->flags |= DRM_XE_SYNC_SYNCOBJ;
}
if (signal)
xe_sync->flags |= DRM_XE_SYNC_SIGNAL;
}
static VkResult
xe_exec_process_syncs(struct anv_queue *queue,
uint32_t wait_count, const struct vk_sync_wait *waits,
uint32_t signal_count, const struct vk_sync_signal *signals,
struct anv_utrace_submit *utrace_submit,
struct drm_xe_sync **ret, uint32_t *ret_count)
{
struct anv_device *device = queue->device;
uint32_t num_syncs = wait_count + signal_count + (utrace_submit ? 1 : 0) +
(queue->sync ? 1 : 0);
if (!num_syncs)
return VK_SUCCESS;
struct drm_xe_sync *xe_syncs = vk_zalloc(&device->vk.alloc,
sizeof(*xe_syncs) * num_syncs, 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!xe_syncs)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
uint32_t count = 0;
if (utrace_submit) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
xe_exec_fill_sync(xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
}
for (uint32_t i = 0; i < wait_count; i++) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
const struct vk_sync_wait *vk_wait = &waits[i];
xe_exec_fill_sync(xe_sync, vk_wait->sync, vk_wait->wait_value,
TYPE_WAIT);
}
for (uint32_t i = 0; i < signal_count; i++) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
const struct vk_sync_signal *vk_signal = &signals[i];
xe_exec_fill_sync(xe_sync, vk_signal->sync, vk_signal->signal_value,
TYPE_SIGNAL);
}
if (queue->sync) {
struct drm_xe_sync *xe_sync = &xe_syncs[count++];
xe_exec_fill_sync(xe_sync, queue->sync, 0,
TYPE_SIGNAL);
}
assert(count == num_syncs);
*ret = xe_syncs;
*ret_count = num_syncs;
return VK_SUCCESS;
}
static void
xe_exec_print_debug(struct anv_queue *queue, uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers, struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass, struct drm_xe_exec *exec)
{
if (INTEL_DEBUG(DEBUG_SUBMIT))
fprintf(stderr, "Batch offset=0x%016"PRIx64" on queue %u\n",
(uint64_t)exec->address, queue->vk.index_in_family);
anv_cmd_buffer_exec_batch_debug(queue, cmd_buffer_count, cmd_buffers,
perf_query_pool, perf_query_pass);
}
VkResult
xe_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_submit *utrace_submit)
{
struct anv_device *device = queue->device;
struct drm_xe_sync xe_sync = {};
xe_exec_fill_sync(&xe_sync, utrace_submit->sync, 0, TYPE_SIGNAL);
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush)
intel_flush_range(utrace_submit->batch_bo->map,
utrace_submit->batch_bo->size);
#endif
struct drm_xe_exec exec = {
.engine_id = queue->engine_id,
.num_batch_buffer = 1,
.syncs = (uintptr_t)&xe_sync,
.num_syncs = 1,
.address = utrace_submit->batch_bo->offset,
};
if (likely(!device->info->no_hw)) {
if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
return vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
}
return VK_SUCCESS;
}
VkResult
xe_queue_exec_locked(struct anv_queue *queue,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
uint32_t signal_count,
const struct vk_sync_signal *signals,
struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass)
{
struct anv_device *device = queue->device;
struct anv_utrace_submit *utrace_submit = NULL;
VkResult result;
result = anv_device_utrace_flush_cmd_buffers(queue, cmd_buffer_count,
cmd_buffers, &utrace_submit);
if (result != VK_SUCCESS)
return result;
if (utrace_submit && !utrace_submit->batch_bo)
utrace_submit = NULL;
struct drm_xe_sync *xe_syncs = NULL;
uint32_t xe_syncs_count = 0;
result = xe_exec_process_syncs(queue, wait_count, waits,
signal_count, signals,
utrace_submit,
&xe_syncs, &xe_syncs_count);
if (result != VK_SUCCESS)
return result;
struct drm_xe_exec exec = {
.engine_id = queue->engine_id,
.num_batch_buffer = 1,
.syncs = (uintptr_t)xe_syncs,
.num_syncs = xe_syncs_count,
};
if (cmd_buffer_count) {
anv_cmd_buffer_chain_command_buffers(cmd_buffers, cmd_buffer_count);
#ifdef SUPPORT_INTEL_INTEGRATED_GPUS
if (device->physical->memory.need_clflush)
anv_cmd_buffer_clflush(cmd_buffers, cmd_buffer_count);
#endif
struct anv_cmd_buffer *first_cmd_buffer = cmd_buffers[0];
struct anv_batch_bo *first_batch_bo = list_first_entry(&first_cmd_buffer->batch_bos,
struct anv_batch_bo, link);
exec.address = first_batch_bo->bo->offset;
} else {
exec.address = device->trivial_batch_bo->offset;
}
xe_exec_print_debug(queue, cmd_buffer_count, cmd_buffers, perf_query_pool,
perf_query_pass, &exec);
/* TODO: add perfetto stuff when Xe supports it */
if (!device->info->no_hw) {
if (intel_ioctl(device->fd, DRM_IOCTL_XE_EXEC, &exec))
result = vk_device_set_lost(&device->vk, "anv_xe_queue_exec_locked failed: %m");
}
vk_free(&device->vk.alloc, xe_syncs);
if (result == VK_SUCCESS && queue->sync) {
result = vk_sync_wait(&device->vk, queue->sync, 0,
VK_SYNC_WAIT_COMPLETE, UINT64_MAX);
if (result != VK_SUCCESS)
result = vk_queue_set_lost(&queue->vk, "sync wait failed");
}
if (result == VK_SUCCESS && utrace_submit)
result = xe_queue_exec_utrace_locked(queue, utrace_submit);
return result;
}

View file

@ -26,10 +26,28 @@
#include <stdint.h>
#include "vulkan/vulkan_core.h"
#include "vk_sync.h"
struct anv_queue;
struct anv_bo;
struct anv_cmd_buffer;
struct anv_query_pool;
struct anv_utrace_submit;
VkResult
xe_execute_simple_batch(struct anv_queue *queue, struct anv_bo *batch_bo,
uint32_t batch_bo_size);
VkResult
xe_queue_exec_locked(struct anv_queue *queue,
uint32_t wait_count,
const struct vk_sync_wait *waits,
uint32_t cmd_buffer_count,
struct anv_cmd_buffer **cmd_buffers,
uint32_t signal_count,
const struct vk_sync_signal *signals,
struct anv_query_pool *perf_query_pool,
uint32_t perf_query_pass);
VkResult
xe_queue_exec_utrace_locked(struct anv_queue *queue,
struct anv_utrace_submit *utrace_submit);

View file

@ -140,6 +140,8 @@ anv_xe_kmd_backend_get(void)
.gem_vm_bind = xe_gem_vm_bind,
.gem_vm_unbind = xe_gem_vm_unbind,
.execute_simple_batch = xe_execute_simple_batch,
.queue_exec_locked = xe_queue_exec_locked,
.queue_exec_trace = xe_queue_exec_utrace_locked,
};
return &xe_backend;
}