mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-03-11 20:00:38 +01:00
Merge branch 'work/tu_kgsl_timeline_sync' into 'main'
tu/kgsl: timeline-based queue and sync primitives See merge request mesa/mesa!39751
This commit is contained in:
commit
e0d31a135c
4 changed files with 680 additions and 100 deletions
|
|
@ -75,6 +75,7 @@
|
|||
#include "vk_object.h"
|
||||
#include "vk_sync.h"
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "vk_sync_binary.h"
|
||||
#include "vk_sync_timeline.h"
|
||||
|
||||
#define MAX_VBS 32
|
||||
|
|
|
|||
|
|
@ -80,6 +80,11 @@ struct tu_queue_family {
|
|||
const VkQueueFamilyProperties *properties;
|
||||
};
|
||||
|
||||
enum tu_kgsl_sync_impl_type {
|
||||
TU_KGSL_SYNC_IMPL_TYPE_SYNCOBJ,
|
||||
TU_KGSL_SYNC_IMPL_TYPE_TIMELINE,
|
||||
};
|
||||
|
||||
extern uint64_t os_page_size;
|
||||
|
||||
struct tu_physical_device
|
||||
|
|
@ -162,7 +167,9 @@ struct tu_physical_device
|
|||
|
||||
struct tu_memory_heap heap;
|
||||
|
||||
enum tu_kgsl_sync_impl_type kgsl_sync_impl_type;
|
||||
struct vk_sync_type syncobj_type;
|
||||
struct vk_sync_binary_type binary_type;
|
||||
struct vk_sync_timeline_type timeline_type;
|
||||
const struct vk_sync_type *sync_types[3];
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,92 @@ safe_ioctl(int fd, unsigned long request, void *arg)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
kgsl_timeline_create_ioctl(int fd,
|
||||
uint64_t initial_value,
|
||||
uint32_t *id)
|
||||
{
|
||||
struct kgsl_timeline_create req = { .seqno = initial_value };
|
||||
int ret = safe_ioctl(fd, IOCTL_KGSL_TIMELINE_CREATE, &req);
|
||||
|
||||
if (!ret) {
|
||||
/* Sentinel value, returned valid ID should be non-zero. */
|
||||
assert(req.id != 0);
|
||||
*id = req.id;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
kgsl_timeline_destroy_ioctl(int fd,
|
||||
uint32_t id)
|
||||
{
|
||||
/* Sentinel value, valid ID should be non-zero. */
|
||||
assert(id != 0);
|
||||
return safe_ioctl(fd, IOCTL_KGSL_TIMELINE_DESTROY, &id);
|
||||
}
|
||||
|
||||
static int
|
||||
kgsl_timeline_signal_ioctl(int fd,
|
||||
struct kgsl_timeline_val *timeline_vals,
|
||||
uint32_t count)
|
||||
{
|
||||
struct kgsl_timeline_signal req = {
|
||||
.timelines = (uint64_t)(uintptr_t) timeline_vals,
|
||||
.count = count,
|
||||
.timelines_size = sizeof(struct kgsl_timeline_val)
|
||||
};
|
||||
|
||||
return safe_ioctl(fd, IOCTL_KGSL_TIMELINE_SIGNAL, &req);
|
||||
}
|
||||
|
||||
static int
|
||||
kgsl_timeline_wait_ioctl(int fd,
|
||||
int64_t timeout,
|
||||
struct kgsl_timeline_val *timeline_vals,
|
||||
uint32_t count,
|
||||
uint32_t flags)
|
||||
{
|
||||
struct kgsl_timeline_wait req = {
|
||||
.tv_sec = timeout / NSEC_PER_SEC,
|
||||
.tv_nsec = timeout % NSEC_PER_SEC,
|
||||
.timelines = (uint64_t)(uintptr_t) timeline_vals,
|
||||
.count = count,
|
||||
.timelines_size = sizeof(struct kgsl_timeline_val),
|
||||
.flags = flags
|
||||
};
|
||||
|
||||
return safe_ioctl(fd, IOCTL_KGSL_TIMELINE_WAIT, &req);
|
||||
}
|
||||
|
||||
static int
|
||||
kgsl_timeline_query_ioctl(int fd,
|
||||
uint32_t id,
|
||||
uint64_t *value)
|
||||
{
|
||||
struct kgsl_timeline_val req = { .timeline = id };
|
||||
int ret = safe_ioctl(fd, IOCTL_KGSL_TIMELINE_QUERY, &req);
|
||||
|
||||
if (!ret)
|
||||
*value = req.seqno;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
kgsl_submitqueue_destroy(struct tu_device *dev, struct tu_queue *queue)
|
||||
{
|
||||
if (queue->msm_queue_id) {
|
||||
struct kgsl_drawctxt_destroy req = {
|
||||
.drawctxt_id = queue->msm_queue_id
|
||||
};
|
||||
|
||||
safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
|
||||
}
|
||||
|
||||
if (queue->kgsl_queue_timeline_id)
|
||||
kgsl_timeline_destroy_ioctl(dev->physical_device->local_fd, queue->kgsl_queue_timeline_id);
|
||||
}
|
||||
|
||||
static int
|
||||
kgsl_submitqueue_new(struct tu_device *dev, struct tu_queue *queue)
|
||||
{
|
||||
|
|
@ -60,21 +146,27 @@ kgsl_submitqueue_new(struct tu_device *dev, struct tu_queue *queue)
|
|||
|
||||
int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
goto fail;
|
||||
queue->msm_queue_id = req.drawctxt_id;
|
||||
|
||||
if (dev->physical_device->kgsl_sync_impl_type == TU_KGSL_SYNC_IMPL_TYPE_TIMELINE) {
|
||||
ret = kgsl_timeline_create_ioctl(dev->physical_device->local_fd,
|
||||
0, &queue->kgsl_queue_timeline_id);
|
||||
if (ret)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
kgsl_submitqueue_destroy(dev, queue);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
kgsl_submitqueue_close(struct tu_device *dev, struct tu_queue *queue)
|
||||
{
|
||||
struct kgsl_drawctxt_destroy req = {
|
||||
.drawctxt_id = queue->msm_queue_id,
|
||||
};
|
||||
|
||||
safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
|
||||
kgsl_submitqueue_destroy(dev, queue);
|
||||
}
|
||||
|
||||
static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo);
|
||||
|
|
@ -712,6 +804,19 @@ get_relative_ms(uint64_t abs_timeout_ns)
|
|||
return abs_timeout_ms - cur_time_ms;
|
||||
}
|
||||
|
||||
static int64_t
|
||||
get_relative_ns(uint64_t abs_timeout_ns)
|
||||
{
|
||||
if (abs_timeout_ns >= INT64_MAX)
|
||||
return INT64_MAX;
|
||||
|
||||
int64_t cur_time_ns = os_time_get_nano();
|
||||
if (abs_timeout_ns <= cur_time_ns)
|
||||
return 0;
|
||||
|
||||
return abs_timeout_ns - cur_time_ns;
|
||||
}
|
||||
|
||||
/* safe_ioctl is not enough as restarted waits would not adjust the timeout
|
||||
* which could lead to waiting substantially longer than requested
|
||||
*/
|
||||
|
|
@ -1203,6 +1308,151 @@ const struct vk_sync_type vk_kgsl_sync_type = {
|
|||
.export_sync_file = vk_kgsl_sync_export_sync_file,
|
||||
};
|
||||
|
||||
struct vk_kgsl_timeline
|
||||
{
|
||||
struct vk_sync vk;
|
||||
uint32_t id;
|
||||
};
|
||||
|
||||
static VkResult
|
||||
vk_kgsl_timeline_init(struct vk_device *_device,
|
||||
struct vk_sync *sync,
|
||||
uint64_t initial_value)
|
||||
{
|
||||
struct tu_device *device = container_of(_device, struct tu_device, vk);
|
||||
struct vk_kgsl_timeline *timeline = container_of(sync, struct vk_kgsl_timeline, vk);
|
||||
|
||||
int ret = kgsl_timeline_create_ioctl(device->physical_device->local_fd,
|
||||
initial_value, &timeline->id);
|
||||
if (ret) {
|
||||
return vk_errorf(_device, VK_ERROR_OUT_OF_HOST_MEMORY,
|
||||
"kgsl_timeline_create failed: %m");
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
vk_kgsl_timeline_finish(struct vk_device *_device,
|
||||
struct vk_sync *sync)
|
||||
{
|
||||
struct tu_device *device = container_of(_device, struct tu_device, vk);
|
||||
struct vk_kgsl_timeline *timeline = container_of(sync, struct vk_kgsl_timeline, vk);
|
||||
|
||||
kgsl_timeline_destroy_ioctl(device->physical_device->local_fd, timeline->id);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_kgsl_timeline_signal_many(struct vk_device *_device,
|
||||
uint32_t signal_count,
|
||||
const struct vk_sync_signal *signals)
|
||||
{
|
||||
struct tu_device *device = container_of(_device, struct tu_device, vk);
|
||||
|
||||
STACK_ARRAY(struct kgsl_timeline_val, timeline_vals, signal_count);
|
||||
for (uint32_t i = 0; i < signal_count; ++i) {
|
||||
struct vk_kgsl_timeline *timeline = container_of(signals[i].sync, struct vk_kgsl_timeline, vk);
|
||||
timeline_vals[i] = (struct kgsl_timeline_val) {
|
||||
.seqno = signals[i].signal_value,
|
||||
.timeline = timeline->id
|
||||
};
|
||||
}
|
||||
|
||||
int ret = kgsl_timeline_signal_ioctl(device->physical_device->local_fd,
|
||||
timeline_vals, signal_count);
|
||||
STACK_ARRAY_FINISH(timeline_vals);
|
||||
|
||||
if (ret) {
|
||||
return vk_errorf(_device, VK_ERROR_UNKNOWN,
|
||||
"kgsl_timeline_signal failed: %m");
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_kgsl_timeline_signal(struct vk_device *device,
|
||||
struct vk_sync *sync,
|
||||
uint64_t value)
|
||||
{
|
||||
struct vk_sync_signal signal = {
|
||||
.sync = sync,
|
||||
.stage_mask = ~(VkPipelineStageFlags2)0,
|
||||
.signal_value = value
|
||||
};
|
||||
|
||||
return vk_kgsl_timeline_signal_many(device, 1, &signal);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_kgsl_timeline_wait_many(struct vk_device *_device,
|
||||
uint32_t wait_count,
|
||||
const struct vk_sync_wait *waits,
|
||||
enum vk_sync_wait_flags wait_flags,
|
||||
uint64_t abs_timeout_ns)
|
||||
{
|
||||
struct tu_device *device = container_of(_device, struct tu_device, vk);
|
||||
|
||||
STACK_ARRAY(struct kgsl_timeline_val, timeline_vals, wait_count);
|
||||
for (uint32_t i = 0; i < wait_count; ++i) {
|
||||
struct vk_kgsl_timeline *timeline = container_of(waits[i].sync, struct vk_kgsl_timeline, vk);
|
||||
timeline_vals[i] = (struct kgsl_timeline_val) {
|
||||
.seqno = waits[i].wait_value,
|
||||
.timeline = timeline->id
|
||||
};
|
||||
}
|
||||
|
||||
uint32_t flag = KGSL_TIMELINE_WAIT_ALL;
|
||||
if (wait_flags & VK_SYNC_WAIT_ANY)
|
||||
flag = KGSL_TIMELINE_WAIT_ANY;
|
||||
|
||||
int ret = kgsl_timeline_wait_ioctl(device->physical_device->local_fd,
|
||||
get_relative_ns(abs_timeout_ns),
|
||||
timeline_vals, wait_count, flag);
|
||||
STACK_ARRAY_FINISH(timeline_vals);
|
||||
|
||||
if (ret) {
|
||||
if (errno == EBUSY || errno == ETIMEDOUT)
|
||||
return VK_TIMEOUT;
|
||||
|
||||
return vk_errorf(_device, VK_ERROR_UNKNOWN,
|
||||
"kgsl_timeline_wait failed: %m");
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
vk_kgsl_timeline_get_value(struct vk_device *_device,
|
||||
struct vk_sync *sync,
|
||||
uint64_t *value)
|
||||
{
|
||||
struct tu_device *device = container_of(_device, struct tu_device, vk);
|
||||
struct vk_kgsl_timeline *timeline = container_of(sync, struct vk_kgsl_timeline, vk);
|
||||
|
||||
int ret = kgsl_timeline_query_ioctl(device->physical_device->local_fd,
|
||||
timeline->id, value);
|
||||
if (ret) {
|
||||
return vk_errorf(_device, VK_ERROR_UNKNOWN,
|
||||
"kgsl_timeline_query failed: %m");
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
const struct vk_sync_type vk_kgsl_timeline_type = {
|
||||
.size = sizeof(struct vk_kgsl_timeline),
|
||||
.features = (enum vk_sync_features)
|
||||
(VK_SYNC_FEATURE_TIMELINE |
|
||||
VK_SYNC_FEATURE_GPU_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_SIGNAL |
|
||||
VK_SYNC_FEATURE_WAIT_ANY |
|
||||
VK_SYNC_FEATURE_WAIT_BEFORE_SIGNAL),
|
||||
.init = vk_kgsl_timeline_init,
|
||||
.finish = vk_kgsl_timeline_finish,
|
||||
.signal = vk_kgsl_timeline_signal,
|
||||
.signal_many = vk_kgsl_timeline_signal_many,
|
||||
.get_value = vk_kgsl_timeline_get_value,
|
||||
.wait_many = vk_kgsl_timeline_wait_many,
|
||||
};
|
||||
|
||||
struct tu_kgsl_queue_submit {
|
||||
struct util_dynarray commands;
|
||||
struct util_dynarray ranges;
|
||||
|
|
@ -1320,19 +1570,110 @@ kgsl_bind_finalize(struct tu_kgsl_queue_submit *submit)
|
|||
}
|
||||
}
|
||||
|
||||
struct kgsl_profiling {
|
||||
struct kgsl_command_object *cmd_obj;
|
||||
struct kgsl_cmdbatch_profiling_buffer *buffer;
|
||||
|
||||
uint64_t gpu_offset;
|
||||
#if HAVE_PERFETTO
|
||||
uint64_t start_ts;
|
||||
#endif
|
||||
};
|
||||
|
||||
static void
|
||||
kgsl_profiling_alloc(struct kgsl_profiling *profiling,
|
||||
struct tu_queue *queue,
|
||||
struct tu_u_trace_submission_data *u_trace_submission_data)
|
||||
{
|
||||
mtx_lock(&queue->device->kgsl_profiling_mutex);
|
||||
tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
|
||||
&queue->device->kgsl_profiling_suballoc,
|
||||
sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
|
||||
mtx_unlock(&queue->device->kgsl_profiling_mutex);
|
||||
|
||||
profiling->cmd_obj = (struct kgsl_command_object *)
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(*profiling->cmd_obj),
|
||||
alignof(*profiling->cmd_obj), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
|
||||
struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
|
||||
|
||||
*profiling->cmd_obj = (struct kgsl_command_object) {
|
||||
.offset = bo->iova - bo->bo->iova,
|
||||
.gpuaddr = bo->bo->iova,
|
||||
.size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
|
||||
.flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
|
||||
.id = bo->bo->gem_handle,
|
||||
};
|
||||
|
||||
profiling->buffer =
|
||||
(struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
|
||||
memset(profiling->buffer, 0, sizeof(*profiling->buffer));
|
||||
}
|
||||
|
||||
static void
|
||||
kgsl_profiling_free(struct kgsl_profiling *profiling,
|
||||
struct tu_queue *queue,
|
||||
struct tu_u_trace_submission_data *u_trace_submission_data)
|
||||
{
|
||||
mtx_lock(&queue->device->kgsl_profiling_mutex);
|
||||
tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
|
||||
&u_trace_submission_data->kgsl_timestamp_bo);
|
||||
mtx_unlock(&queue->device->kgsl_profiling_mutex);
|
||||
}
|
||||
|
||||
#if HAVE_PERFETTO
|
||||
static int
|
||||
kgsl_profiling_end_perfetto_submit(struct kgsl_profiling *profiling,
|
||||
struct tu_queue *queue)
|
||||
{
|
||||
/* We need to wait for KGSL to queue the GPU command before we can read
|
||||
* the timestamp. Since this is just for profiling and doesn't take too
|
||||
* long, we can just busy-wait for it.
|
||||
*/
|
||||
while (p_atomic_read(&profiling->buffer->gpu_ticks_queued) == 0);
|
||||
|
||||
struct kgsl_perfcounter_read_group perf = {
|
||||
.groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
|
||||
.countable = 0,
|
||||
.value = 0
|
||||
};
|
||||
|
||||
struct kgsl_perfcounter_read req = {
|
||||
.reads = &perf,
|
||||
.count = 1,
|
||||
};
|
||||
|
||||
int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
|
||||
/* Older KGSL has some kind of garbage in upper 32 bits */
|
||||
uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
|
||||
|
||||
profiling->gpu_offset = tu_device_ticks_to_ns(
|
||||
queue->device, offseted_gpu_ts - profiling->buffer->gpu_ticks_queued);
|
||||
|
||||
struct tu_perfetto_clocks clocks = {
|
||||
.cpu = profiling->buffer->wall_clock_ns,
|
||||
.gpu_ts = tu_device_ticks_to_ns(queue->device,
|
||||
profiling->buffer->gpu_ticks_queued),
|
||||
.gpu_ts_offset = profiling->gpu_offset,
|
||||
};
|
||||
|
||||
clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
|
||||
profiling->start_ts, &clocks);
|
||||
profiling->gpu_offset = clocks.gpu_ts_offset;
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static VkResult
|
||||
kgsl_queue_submit(struct tu_queue *queue, void *_submit,
|
||||
struct vk_sync_wait *waits, uint32_t wait_count,
|
||||
struct vk_sync_signal *signals, uint32_t signal_count,
|
||||
struct tu_u_trace_submission_data *u_trace_submission_data)
|
||||
kgsl_queue_submit_syncobj(struct tu_queue *queue, void *_submit,
|
||||
struct vk_sync_wait *waits, uint32_t wait_count,
|
||||
struct vk_sync_signal *signals, uint32_t signal_count,
|
||||
struct tu_u_trace_submission_data *u_trace_submission_data)
|
||||
{
|
||||
struct tu_kgsl_queue_submit *submit =
|
||||
(struct tu_kgsl_queue_submit *)_submit;
|
||||
|
||||
#if HAVE_PERFETTO
|
||||
uint64_t start_ts = tu_perfetto_begin_submit();
|
||||
#endif
|
||||
|
||||
if (submit->commands.size == 0 && submit->bind_cmds.size == 0) {
|
||||
/* This handles the case where we have a wait and no commands to submit.
|
||||
* It is necessary to handle this case separately as the kernel will not
|
||||
|
|
@ -1399,37 +1740,13 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
|
|||
if (submit->bind_cmds.size != 0)
|
||||
kgsl_bind_finalize(submit);
|
||||
|
||||
|
||||
struct kgsl_profiling profiling = { 0 };
|
||||
if (u_trace_submission_data) {
|
||||
mtx_lock(&queue->device->kgsl_profiling_mutex);
|
||||
tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
|
||||
&queue->device->kgsl_profiling_suballoc,
|
||||
sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
|
||||
mtx_unlock(&queue->device->kgsl_profiling_mutex);
|
||||
}
|
||||
|
||||
uint32_t obj_count = 0;
|
||||
if (u_trace_submission_data)
|
||||
obj_count++;
|
||||
|
||||
struct kgsl_command_object *objs = (struct kgsl_command_object *)
|
||||
vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
|
||||
alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
|
||||
struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
|
||||
uint32_t obj_idx = 0;
|
||||
if (u_trace_submission_data) {
|
||||
struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
|
||||
|
||||
objs[obj_idx++] = (struct kgsl_command_object) {
|
||||
.offset = bo->iova - bo->bo->iova,
|
||||
.gpuaddr = bo->bo->iova,
|
||||
.size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
|
||||
.flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
|
||||
.id = bo->bo->gem_handle,
|
||||
};
|
||||
profiling_buffer =
|
||||
(struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
|
||||
memset(profiling_buffer, 0, sizeof(*profiling_buffer));
|
||||
#if HAVE_PERFETTO
|
||||
profiling.start_ts = tu_perfetto_begin_submit();
|
||||
#endif
|
||||
kgsl_profiling_alloc(&profiling, queue, u_trace_submission_data);
|
||||
}
|
||||
|
||||
const struct kgsl_syncobj *wait_semaphores[wait_count];
|
||||
|
|
@ -1477,7 +1794,6 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
|
|||
|
||||
int ret;
|
||||
uint32_t timestamp = 0;
|
||||
uint64_t gpu_offset = 0;
|
||||
|
||||
if (submit->bind_cmds.size == 0) {
|
||||
struct kgsl_gpu_command req = {
|
||||
|
|
@ -1492,11 +1808,11 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
|
|||
.context_id = queue->msm_queue_id,
|
||||
};
|
||||
|
||||
if (obj_idx) {
|
||||
if (profiling.cmd_obj) {
|
||||
req.flags |= KGSL_CMDBATCH_PROFILING;
|
||||
req.objlist = (uintptr_t) objs;
|
||||
req.objlist = (uintptr_t) profiling.cmd_obj;
|
||||
req.objsize = sizeof(struct kgsl_command_object);
|
||||
req.numobjs = obj_idx;
|
||||
req.numobjs = 1;
|
||||
}
|
||||
|
||||
ret = safe_ioctl(queue->device->physical_device->local_fd,
|
||||
|
|
@ -1546,42 +1862,8 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
|
|||
}
|
||||
|
||||
#if HAVE_PERFETTO
|
||||
if (profiling_buffer) {
|
||||
/* We need to wait for KGSL to queue the GPU command before we can read
|
||||
* the timestamp. Since this is just for profiling and doesn't take too
|
||||
* long, we can just busy-wait for it.
|
||||
*/
|
||||
while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
|
||||
|
||||
struct kgsl_perfcounter_read_group perf = {
|
||||
.groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
|
||||
.countable = 0,
|
||||
.value = 0
|
||||
};
|
||||
|
||||
struct kgsl_perfcounter_read req = {
|
||||
.reads = &perf,
|
||||
.count = 1,
|
||||
};
|
||||
|
||||
ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
|
||||
/* Older KGSL has some kind of garbage in upper 32 bits */
|
||||
uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
|
||||
|
||||
gpu_offset = tu_device_ticks_to_ns(
|
||||
queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
|
||||
|
||||
struct tu_perfetto_clocks clocks = {
|
||||
.cpu = profiling_buffer->wall_clock_ns,
|
||||
.gpu_ts = tu_device_ticks_to_ns(queue->device,
|
||||
profiling_buffer->gpu_ticks_queued),
|
||||
.gpu_ts_offset = gpu_offset,
|
||||
};
|
||||
|
||||
clocks = tu_perfetto_end_submit(queue, queue->device->submit_count,
|
||||
start_ts, &clocks);
|
||||
gpu_offset = clocks.gpu_ts_offset;
|
||||
}
|
||||
if (profiling.buffer)
|
||||
ret = kgsl_profiling_end_perfetto_submit(&profiling, queue);
|
||||
#endif
|
||||
|
||||
kgsl_syncobj_destroy(&wait_sync);
|
||||
|
|
@ -1605,23 +1887,293 @@ kgsl_queue_submit(struct tu_queue *queue, void *_submit,
|
|||
signal_sync->timestamp = timestamp;
|
||||
}
|
||||
|
||||
if (u_trace_submission_data) {
|
||||
struct tu_u_trace_submission_data *submission_data =
|
||||
u_trace_submission_data;
|
||||
submission_data->gpu_ts_offset = gpu_offset;
|
||||
}
|
||||
if (u_trace_submission_data)
|
||||
u_trace_submission_data->gpu_ts_offset = profiling.gpu_offset;
|
||||
|
||||
fail_submit:
|
||||
if (result != VK_SUCCESS && u_trace_submission_data) {
|
||||
mtx_lock(&queue->device->kgsl_profiling_mutex);
|
||||
tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
|
||||
&u_trace_submission_data->kgsl_timestamp_bo);
|
||||
mtx_unlock(&queue->device->kgsl_profiling_mutex);
|
||||
}
|
||||
if (result != VK_SUCCESS && u_trace_submission_data)
|
||||
kgsl_profiling_free(&profiling, queue, u_trace_submission_data);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
kgsl_queue_submit_timeline(struct tu_queue *queue, void *_submit,
|
||||
struct vk_sync_wait *waits, uint32_t wait_count,
|
||||
struct vk_sync_signal *signals, uint32_t signal_count,
|
||||
struct tu_u_trace_submission_data *u_trace_submission_data)
|
||||
{
|
||||
struct tu_kgsl_queue_submit *submit =
|
||||
(struct tu_kgsl_queue_submit *)_submit;
|
||||
|
||||
/* The queue timeline is included in the waits and signals, as it is
|
||||
* necessary for the fast path below to function properly. Wait on the
|
||||
* current queue timeline seqno will ensure that work of previous submit
|
||||
* was completed. Signal of an increased queue timeline seqno will indicate
|
||||
* the work of the current submit was completed.
|
||||
*/
|
||||
const uint64_t previous_submit_timeline_seqno = queue->kgsl_queue_timeline_seqno;
|
||||
const uint64_t current_submit_timeline_seqno = previous_submit_timeline_seqno + 1;
|
||||
|
||||
STACK_ARRAY(struct kgsl_timeline_val, wait_timeline_vals, wait_count + 1);
|
||||
for (uint32_t i = 0; i < wait_count; ++i) {
|
||||
struct vk_kgsl_timeline *timeline = container_of(waits[i].sync, struct vk_kgsl_timeline, vk);
|
||||
wait_timeline_vals[i] = (struct kgsl_timeline_val) {
|
||||
.seqno = waits[i].wait_value,
|
||||
.timeline = timeline->id
|
||||
};
|
||||
}
|
||||
wait_timeline_vals[wait_count] = (struct kgsl_timeline_val) {
|
||||
.seqno = previous_submit_timeline_seqno,
|
||||
.timeline = queue->kgsl_queue_timeline_id
|
||||
};
|
||||
|
||||
STACK_ARRAY(struct kgsl_timeline_val, signal_timeline_vals, signal_count + 1);
|
||||
for (uint32_t i = 0; i < signal_count; ++i) {
|
||||
struct vk_kgsl_timeline *timeline = container_of(signals[i].sync, struct vk_kgsl_timeline, vk);
|
||||
signal_timeline_vals[i] = (struct kgsl_timeline_val) {
|
||||
.seqno = signals[i].signal_value,
|
||||
.timeline = timeline->id
|
||||
};
|
||||
}
|
||||
signal_timeline_vals[signal_count] = (struct kgsl_timeline_val) {
|
||||
.seqno = current_submit_timeline_seqno,
|
||||
.timeline = queue->kgsl_queue_timeline_id
|
||||
};
|
||||
|
||||
if (submit->commands.size == 0 && submit->bind_cmds.size == 0) {
|
||||
/* First part of the zero-command, zero-bind fast path. If all wait
|
||||
* timelines are already signaled, fire off the signals and finish.
|
||||
*/
|
||||
|
||||
int ret = kgsl_timeline_wait_ioctl(queue->device->physical_device->local_fd, 0,
|
||||
wait_timeline_vals, wait_count + 1,
|
||||
KGSL_TIMELINE_WAIT_ALL);
|
||||
if (ret == 0) {
|
||||
VkResult result = VK_SUCCESS;
|
||||
ret = kgsl_timeline_signal_ioctl(queue->device->physical_device->local_fd,
|
||||
signal_timeline_vals, signal_count + 1);
|
||||
if (ret) {
|
||||
result = vk_device_set_lost(&queue->device->vk,
|
||||
"signal submit failed\n");
|
||||
} else {
|
||||
queue->kgsl_queue_timeline_seqno = current_submit_timeline_seqno;
|
||||
}
|
||||
|
||||
STACK_ARRAY_FINISH(signal_timeline_vals);
|
||||
STACK_ARRAY_FINISH(wait_timeline_vals);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
struct kgsl_cmd_syncpoint_timeline cmd_syncpoint_timeline = {
|
||||
.timelines = (uint64_t)(uintptr_t) wait_timeline_vals,
|
||||
.count = wait_count + 1,
|
||||
.timelines_size = sizeof(struct kgsl_timeline_val)
|
||||
};
|
||||
|
||||
struct kgsl_command_syncpoint cmd_syncpoint = {
|
||||
.priv = (uint64_t)(uintptr_t) &cmd_syncpoint_timeline,
|
||||
.size = sizeof(struct kgsl_cmd_syncpoint_timeline),
|
||||
.type = KGSL_CMD_SYNCPOINT_TYPE_TIMELINE
|
||||
};
|
||||
|
||||
struct kgsl_gpu_aux_command_timeline aux_cmd_timeline = {
|
||||
.timelines = (uint64_t)(uintptr_t) signal_timeline_vals,
|
||||
.count = signal_count + 1,
|
||||
.timelines_size = sizeof(struct kgsl_timeline_val)
|
||||
};
|
||||
|
||||
struct kgsl_gpu_aux_command_generic aux_cmd_generic_timeline = {
|
||||
.priv = (uint64_t)(uintptr_t) &aux_cmd_timeline,
|
||||
.size = sizeof(struct kgsl_gpu_aux_command_timeline),
|
||||
.type = KGSL_GPU_AUX_COMMAND_TIMELINE,
|
||||
};
|
||||
|
||||
struct kgsl_gpu_aux_command aux_cmd;
|
||||
|
||||
if (submit->commands.size == 0 && submit->bind_cmds.size == 0) {
|
||||
/* Second part of the zero-command, zero-bind fast path. All wait
|
||||
* timelines haven't yet been signaled, but we can dispatch an aux
|
||||
* command that will wait on those timelines and then fire off the
|
||||
* the necessary timeline signals.
|
||||
*/
|
||||
aux_cmd = {
|
||||
.flags = KGSL_GPU_AUX_COMMAND_TIMELINE | KGSL_GPU_AUX_COMMAND_SYNC,
|
||||
.cmdlist = (uint64_t)(uintptr_t) &aux_cmd_generic_timeline,
|
||||
.cmdsize = sizeof(struct kgsl_gpu_aux_command_generic),
|
||||
.numcmds = 1,
|
||||
.synclist = (uint64_t)(uintptr_t) &cmd_syncpoint,
|
||||
.syncsize = sizeof(struct kgsl_command_syncpoint),
|
||||
.numsyncs = 1,
|
||||
.context_id = queue->msm_queue_id
|
||||
};
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
int ret = safe_ioctl(queue->device->physical_device->local_fd,
|
||||
IOCTL_KGSL_GPU_AUX_COMMAND, &aux_cmd);
|
||||
if (ret) {
|
||||
result = vk_device_set_lost(&queue->device->vk,
|
||||
"timeline signal aux command submit failed\n");
|
||||
} else {
|
||||
queue->kgsl_queue_timeline_seqno = current_submit_timeline_seqno;
|
||||
}
|
||||
|
||||
STACK_ARRAY_FINISH(signal_timeline_vals);
|
||||
STACK_ARRAY_FINISH(wait_timeline_vals);
|
||||
return result;
|
||||
}
|
||||
|
||||
int ret;
|
||||
uint32_t timestamp;
|
||||
|
||||
/* For sanity: we're either dealing with commands or binds. In either case,
|
||||
* the first dispatched command should wait on the specified timelines.
|
||||
* An additional aux command will take care of timeline signals.
|
||||
*/
|
||||
assert((submit->commands.size == 0) ^ (submit->bind_cmds.size == 0));
|
||||
|
||||
struct kgsl_profiling profiling = { 0 };
|
||||
if (u_trace_submission_data) {
|
||||
#if HAVE_PERFETTO
|
||||
profiling.start_ts = tu_perfetto_begin_submit();
|
||||
#endif
|
||||
kgsl_profiling_alloc(&profiling, queue, u_trace_submission_data);
|
||||
}
|
||||
|
||||
if (submit->commands.size != 0) {
|
||||
struct kgsl_gpu_command req = {
|
||||
.flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
|
||||
.cmdlist = (uint64_t)(uintptr_t) submit->commands.data,
|
||||
.cmdsize = sizeof(struct kgsl_command_object),
|
||||
.numcmds = util_dynarray_num_elements(&submit->commands,
|
||||
struct kgsl_command_object),
|
||||
.synclist = (uint64_t)(uintptr_t) &cmd_syncpoint,
|
||||
.syncsize = sizeof(struct kgsl_command_syncpoint),
|
||||
.numsyncs = 1,
|
||||
.context_id = queue->msm_queue_id,
|
||||
};
|
||||
|
||||
if (profiling.cmd_obj) {
|
||||
req.flags |= KGSL_CMDBATCH_PROFILING;
|
||||
req.objlist = (uintptr_t) profiling.cmd_obj;
|
||||
req.objsize = sizeof(struct kgsl_command_object);
|
||||
req.numobjs = 1;
|
||||
}
|
||||
|
||||
ret = safe_ioctl(queue->device->physical_device->local_fd,
|
||||
IOCTL_KGSL_GPU_COMMAND, &req);
|
||||
|
||||
timestamp = req.timestamp;
|
||||
}
|
||||
|
||||
if (submit->bind_cmds.size != 0) {
|
||||
kgsl_bind_finalize(submit);
|
||||
|
||||
/* kgsl doesn't support multiple bind commands at once */
|
||||
uint32_t i = 0;
|
||||
util_dynarray_foreach(&submit->bind_cmds,
|
||||
struct kgsl_gpu_aux_command_bind, aux_cmd_bind) {
|
||||
struct kgsl_gpu_aux_command_generic aux_cmd_generic_bind = {
|
||||
.priv = (uint64_t)(uintptr_t) aux_cmd_bind,
|
||||
.size = sizeof(struct kgsl_gpu_aux_command_bind),
|
||||
.type = KGSL_GPU_AUX_COMMAND_BIND,
|
||||
};
|
||||
|
||||
aux_cmd = {
|
||||
.flags = KGSL_GPU_AUX_COMMAND_BIND,
|
||||
.cmdlist = (uint64_t)(uintptr_t) &aux_cmd_generic_bind,
|
||||
.cmdsize = sizeof(struct kgsl_gpu_aux_command_generic),
|
||||
.numcmds = 1,
|
||||
.context_id = queue->msm_queue_id,
|
||||
};
|
||||
|
||||
if (i == 0) {
|
||||
aux_cmd.flags |= KGSL_GPU_AUX_COMMAND_SYNC;
|
||||
aux_cmd.synclist = (uint64_t)(uintptr_t) &cmd_syncpoint;
|
||||
aux_cmd.syncsize = sizeof(struct kgsl_command_syncpoint);
|
||||
aux_cmd.numsyncs = 1;
|
||||
}
|
||||
|
||||
ret = safe_ioctl(queue->device->physical_device->local_fd,
|
||||
IOCTL_KGSL_GPU_AUX_COMMAND, &aux_cmd);
|
||||
|
||||
timestamp = aux_cmd.timestamp;
|
||||
i++;
|
||||
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_PERFETTO
|
||||
if (profiling.buffer)
|
||||
ret = kgsl_profiling_end_perfetto_submit(&profiling, queue);
|
||||
#endif
|
||||
|
||||
VkResult result = VK_SUCCESS;
|
||||
if (ret) {
|
||||
result = vk_device_set_lost(&queue->device->vk,
|
||||
"submit failed: %s\n", strerror(errno));
|
||||
goto fail_submit;
|
||||
}
|
||||
|
||||
aux_cmd = {
|
||||
.flags = KGSL_GPU_AUX_COMMAND_TIMELINE,
|
||||
.cmdlist = (uint64_t)(uintptr_t) &aux_cmd_generic_timeline,
|
||||
.cmdsize = sizeof(struct kgsl_gpu_aux_command_generic),
|
||||
.numcmds = 1,
|
||||
.context_id = queue->msm_queue_id
|
||||
};
|
||||
|
||||
ret = safe_ioctl(queue->device->physical_device->local_fd,
|
||||
IOCTL_KGSL_GPU_AUX_COMMAND, &aux_cmd);
|
||||
if (ret) {
|
||||
result = vk_device_set_lost(&queue->device->vk,
|
||||
"timeline submit failed: %s\n",
|
||||
strerror(errno));
|
||||
goto fail_submit;
|
||||
}
|
||||
|
||||
timestamp = aux_cmd.timestamp;
|
||||
queue->kgsl_queue_timeline_seqno = current_submit_timeline_seqno;
|
||||
|
||||
p_atomic_set(&queue->fence, timestamp);
|
||||
|
||||
if (u_trace_submission_data)
|
||||
u_trace_submission_data->gpu_ts_offset = profiling.gpu_offset;
|
||||
|
||||
fail_submit:
|
||||
if (result != VK_SUCCESS && u_trace_submission_data)
|
||||
kgsl_profiling_free(&profiling, queue, u_trace_submission_data);
|
||||
|
||||
STACK_ARRAY_FINISH(signal_timeline_vals);
|
||||
STACK_ARRAY_FINISH(wait_timeline_vals);
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
kgsl_queue_submit(struct tu_queue *queue, void *submit,
|
||||
struct vk_sync_wait *waits, uint32_t wait_count,
|
||||
struct vk_sync_signal *signals, uint32_t signal_count,
|
||||
struct tu_u_trace_submission_data *u_trace_submission_data)
|
||||
{
|
||||
switch (queue->device->physical_device->kgsl_sync_impl_type) {
|
||||
case TU_KGSL_SYNC_IMPL_TYPE_SYNCOBJ:
|
||||
return kgsl_queue_submit_syncobj(queue, submit,
|
||||
waits, wait_count,
|
||||
signals, signal_count,
|
||||
u_trace_submission_data);
|
||||
case TU_KGSL_SYNC_IMPL_TYPE_TIMELINE:
|
||||
return kgsl_queue_submit_timeline(queue, submit,
|
||||
waits, wait_count,
|
||||
signals, signal_count,
|
||||
u_trace_submission_data);
|
||||
}
|
||||
|
||||
return VK_ERROR_UNKNOWN;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
kgsl_device_init(struct tu_device *dev)
|
||||
{
|
||||
|
|
@ -1731,6 +2283,7 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd)
|
|||
static const char dma_heap_path[] = "/dev/dma_heap/system";
|
||||
static const char ion_path[] = "/dev/ion";
|
||||
int dma_fd;
|
||||
uint32_t dummy_timeline_id = 0;
|
||||
|
||||
dma_fd = open(dma_heap_path, O_RDONLY);
|
||||
if (dma_fd >= 0) {
|
||||
|
|
@ -1796,11 +2349,27 @@ tu_knl_kgsl_load(struct tu_instance *instance, int fd)
|
|||
device->has_raytracing = tu_kgsl_get_raytracing(fd);
|
||||
|
||||
device->submitqueue_priority_count = 1;
|
||||
|
||||
device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
|
||||
|
||||
device->sync_types[0] = &vk_kgsl_sync_type;
|
||||
device->sync_types[1] = &device->timeline_type.sync;
|
||||
/* Prefer timeline-based sync implementation if supported by the kernel. */
|
||||
if (kgsl_timeline_create_ioctl(fd, 0, &dummy_timeline_id) == 0) {
|
||||
kgsl_timeline_destroy_ioctl(fd, dummy_timeline_id);
|
||||
device->kgsl_sync_impl_type = TU_KGSL_SYNC_IMPL_TYPE_TIMELINE;
|
||||
} else {
|
||||
device->kgsl_sync_impl_type = TU_KGSL_SYNC_IMPL_TYPE_SYNCOBJ;
|
||||
}
|
||||
|
||||
switch (device->kgsl_sync_impl_type) {
|
||||
case TU_KGSL_SYNC_IMPL_TYPE_SYNCOBJ:
|
||||
device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
|
||||
device->sync_types[0] = &vk_kgsl_sync_type;
|
||||
device->sync_types[1] = &device->timeline_type.sync;
|
||||
break;
|
||||
case TU_KGSL_SYNC_IMPL_TYPE_TIMELINE:
|
||||
device->binary_type = vk_sync_binary_get_type(&vk_kgsl_timeline_type);
|
||||
device->sync_types[0] = &vk_kgsl_timeline_type;
|
||||
device->sync_types[1] = &device->binary_type.sync;
|
||||
break;
|
||||
}
|
||||
device->sync_types[2] = NULL;
|
||||
|
||||
device->heap.size = tu_get_system_heap_size(device);
|
||||
|
|
|
|||
|
|
@ -36,6 +36,9 @@ struct tu_queue
|
|||
unsigned render_pass_idx;
|
||||
|
||||
int fence; /* timestamp/fence of the last queue submission */
|
||||
|
||||
uint32_t kgsl_queue_timeline_id;
|
||||
uint64_t kgsl_queue_timeline_seqno;
|
||||
};
|
||||
VK_DEFINE_HANDLE_CASTS(tu_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue