freedreno/replay: Add limited support for KGSL

KGSL doesn't support IOVAs being set form userspace, so as a workaround
we pre-allocate a single buffer and hope that it always starts from the
same address.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25005>
This commit is contained in:
Danylo Piliaiev 2023-05-16 13:31:55 +02:00 committed by Marge Bot
parent e2648d4255
commit e3bbd1688b
2 changed files with 281 additions and 56 deletions

View file

@ -70,6 +70,14 @@ if dep_libarchive.found()
build_by_default: false,
)
replay_flags = []
replay_deps = []
if freedreno_kmds.contains('kgsl')
replay_flags += '-DFD_REPLAY_KGSL'
else
replay_deps += dep_libdrm
endif
replay = executable(
'replay',
[
@ -80,11 +88,9 @@ if dep_libarchive.found()
inc_include,
inc_src,
],
c_args : [no_override_init_args],
c_args : [no_override_init_args, replay_flags],
gnu_symbol_visibility: 'hidden',
dependencies: [
dep_libdrm,
],
dependencies: replay_deps,
link_with: [
libfreedreno_cffdec,
libfreedreno_io,

View file

@ -18,8 +18,13 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#if !FD_REPLAY_KGSL
#include <xf86drm.h>
#include "drm-uapi/msm_drm.h"
#else
#include "../vulkan/msm_kgsl.h"
#endif
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
@ -43,6 +48,9 @@
* - echo 1 > /sys/module/msm/parameters/rd_full
*
* Requires kernel with MSM_INFO_SET_IOVA support.
* In case userspace IOVAs are not supported, like on KGSL, we have to
* pre-allocate a single buffer and hope it always allocated starting
* from the same address.
*
* TODO: Misrendering, would require marking framebuffer images
* at each renderpass in order to fetch and decode them.
@ -56,6 +64,8 @@
static const char *exename = NULL;
static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024;
static int handle_file(const char *filename, uint32_t first_submit,
uint32_t last_submit, uint32_t submit_to_override,
const char *cmdstreamgen);
@ -166,6 +176,16 @@ struct device {
struct util_vma_heap vma;
struct u_vector cmdstreams;
bool has_set_iova;
uint32_t va_id;
void *va_map;
uint64_t va_iova;
#ifdef FD_REPLAY_KGSL
uint32_t context_id;
#endif
};
void buffer_mem_free(struct device *dev, struct buffer *buf);
@ -195,43 +215,6 @@ rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
return 0;
}
static struct device *
device_create()
{
struct device *dev = calloc(sizeof(struct device), 1);
dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
if (dev->fd < 0) {
errx(1, "Cannot open MSM fd!");
}
uint64_t va_start, va_size;
struct drm_msm_param req = {
.pipe = MSM_PIPE_3D0,
.param = MSM_PARAM_VA_START,
};
int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
va_start = req.value;
if (!ret) {
req.param = MSM_PARAM_VA_SIZE;
ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
va_size = req.value;
}
if (ret) {
err(1, "MSM_INFO_SET_IOVA is unsupported");
}
rb_tree_init(&dev->buffers);
util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
return dev;
}
static struct buffer *
device_get_buffer(struct device *dev, uint64_t iova)
{
@ -261,6 +244,7 @@ device_free_unused_buffers(struct device *dev)
}
}
#if !FD_REPLAY_KGSL
static inline void
get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
{
@ -270,6 +254,76 @@ get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
tv->tv_nsec = t.tv_nsec + ns % 1000000000;
}
static struct device *
device_create()
{
struct device *dev = calloc(sizeof(struct device), 1);
dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
if (dev->fd < 0) {
errx(1, "Cannot open MSM fd!");
}
uint64_t va_start, va_size;
struct drm_msm_param req = {
.pipe = MSM_PIPE_3D0,
.param = MSM_PARAM_VA_START,
};
int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
va_start = req.value;
if (!ret) {
req.param = MSM_PARAM_VA_SIZE;
ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
va_size = req.value;
dev->has_set_iova = true;
}
if (ret) {
printf("MSM_INFO_SET_IOVA is not supported!\n");
struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT};
drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new));
dev->va_id = req_new.handle;
struct drm_msm_gem_info req_info = {
.handle = req_new.handle,
.info = MSM_INFO_GET_IOVA,
};
drmCommandWriteRead(dev->fd,
DRM_MSM_GEM_INFO, &req_info, sizeof(req_info));
dev->va_iova = req_info.value;
struct drm_msm_gem_info req_offset = {
.handle = req_new.handle,
.info = MSM_INFO_GET_OFFSET,
};
drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset));
dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
dev->fd, req_offset.value);
if (dev->va_map == MAP_FAILED) {
err(1, "mmap failure");
}
va_start = dev->va_iova;
va_size = FAKE_ADDRESS_SPACE_SIZE;
printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
}
rb_tree_init(&dev->buffers);
util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
return dev;
}
static void
device_submit_cmdstreams(struct device *dev)
{
@ -285,7 +339,6 @@ device_submit_cmdstreams(struct device *dev)
struct cmdstream *cmd;
u_vector_foreach(cmd, &dev->cmdstreams) {
struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
uint32_t bo_idx = 0;
rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
@ -295,10 +348,17 @@ device_submit_cmdstreams(struct device *dev)
bo_idx++;
}
if (cmdstream_buf)
cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
submit_cmd->type = MSM_SUBMIT_CMD_BUF;
submit_cmd->submit_idx = bo_idx;
submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
if (dev->has_set_iova) {
submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
} else {
submit_cmd->submit_offset = cmd->iova - dev->va_iova;
}
submit_cmd->size = cmd->size;
submit_cmd->pad = 0;
submit_cmd->nr_relocs = 0;
@ -313,17 +373,29 @@ device_submit_cmdstreams(struct device *dev)
bo_count++;
}
if (!dev->has_set_iova) {
bo_count = 1;
}
struct drm_msm_gem_submit_bo *bo_list =
calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
uint32_t bo_idx = 0;
rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
submit_bo->handle = buf->gem_handle;
submit_bo->flags = buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
submit_bo->presumed = buf->iova;
if (dev->has_set_iova) {
uint32_t bo_idx = 0;
rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
submit_bo->handle = buf->gem_handle;
submit_bo->flags =
buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
submit_bo->presumed = buf->iova;
buf->flags = 0;
buf->flags = 0;
}
} else {
bo_list[0].handle = dev->va_id;
bo_list[0].flags =
MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
bo_list[0].presumed = dev->va_iova;
}
struct drm_msm_gem_submit submit_req = {
@ -372,6 +444,13 @@ buffer_mem_alloc(struct device *dev, struct buffer *buf)
{
util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
if (!dev->has_set_iova) {
uint64_t offset = buf->iova - dev->va_iova;
assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE);
buf->map = ((uint8_t*)dev->va_map) + offset;
return;
}
{
struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
@ -424,16 +503,147 @@ buffer_mem_alloc(struct device *dev, struct buffer *buf)
void
buffer_mem_free(struct device *dev, struct buffer *buf)
{
munmap(buf->map, buf->size);
if (dev->has_set_iova) {
munmap(buf->map, buf->size);
struct drm_gem_close req = {
.handle = buf->gem_handle,
};
drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
struct drm_gem_close req = {
.handle = buf->gem_handle,
};
drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
}
util_vma_heap_free(&dev->vma, buf->iova, buf->size);
}
#else
static int
safe_ioctl(int fd, unsigned long request, void *arg)
{
int ret;
do {
ret = ioctl(fd, request, arg);
} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
return ret;
}
static struct device *
device_create()
{
struct device *dev = calloc(sizeof(struct device), 1);
static const char path[] = "/dev/kgsl-3d0";
dev->fd = open(path, O_RDWR | O_CLOEXEC);
if (dev->fd < 0) {
errx(1, "Cannot open KGSL fd!");
}
struct kgsl_gpumem_alloc_id req = {
.size = FAKE_ADDRESS_SPACE_SIZE,
.flags = KGSL_MEMFLAGS_IOCOHERENT,
};
int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
if (ret) {
err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure");
}
dev->va_id = req.id;
dev->va_iova = req.gpuaddr;
dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE,
MAP_SHARED, dev->fd, req.id << 12);
rb_tree_init(&dev->buffers);
util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096));
u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
struct kgsl_drawctxt_create drawctxt_req = {
.flags = KGSL_CONTEXT_SAVE_GMEM |
KGSL_CONTEXT_NO_GMEM_ALLOC |
KGSL_CONTEXT_PREAMBLE,
};
ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req);
if (ret) {
err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure");
}
printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
dev->context_id = drawctxt_req.drawctxt_id;
return dev;
}
static void
device_submit_cmdstreams(struct device *dev)
{
device_free_unused_buffers(dev);
device_mark_buffers(dev);
if (!u_vector_length(&dev->cmdstreams))
return;
struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)];
uint32_t idx = 0;
struct cmdstream *cmd;
u_vector_foreach(cmd, &dev->cmdstreams) {
struct kgsl_command_object *submit_cmd = &cmds[idx++];
submit_cmd->gpuaddr = cmd->iova;
submit_cmd->size = cmd->size;
submit_cmd->flags = KGSL_CMDLIST_IB;
submit_cmd->id = dev->va_id;
}
struct kgsl_gpu_command submit_req = {
.flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
.cmdlist = (uintptr_t) &cmds,
.cmdsize = sizeof(struct kgsl_command_object),
.numcmds = u_vector_length(&dev->cmdstreams),
.numsyncs = 0,
.context_id = dev->context_id,
};
int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req);
if (ret) {
err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret);
}
struct kgsl_device_waittimestamp_ctxtid wait = {
.context_id = dev->context_id,
.timestamp = submit_req.timestamp,
.timeout = 3000,
};
ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
if (ret) {
err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret);
}
u_vector_finish(&dev->cmdstreams);
u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
}
static void
buffer_mem_alloc(struct device *dev, struct buffer *buf)
{
util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
}
void
buffer_mem_free(struct device *dev, struct buffer *buf)
{
util_vma_heap_free(&dev->vma, buf->iova, buf->size);
}
#endif
static void
upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
void *hostptr)
@ -444,7 +654,9 @@ upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
buf = calloc(sizeof(struct buffer), 1);
buf->iova = iova;
buf->size = size;
rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
if (dev->has_set_iova)
rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
buffer_mem_alloc(dev, buf);
} else if (buf->size != size) {
@ -462,14 +674,21 @@ static int
override_cmdstream(struct device *dev, struct cmdstream *cs,
const char *cmdstreamgen)
{
#if FD_REPLAY_KGSL
static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd";
#else
static const char *tmpfilename = "/tmp/cmdstream_override.rd";
#endif
/* Find a free space for the new cmdstreams and resources we will use
* when overriding existing cmdstream.
*/
/* TODO: should the size be configurable? */
uint64_t hole_size = 32 * 1024 * 1024;
dev->vma.alloc_high = true;
uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
dev->vma.alloc_high = false;
util_vma_heap_free(&dev->vma, hole_iova, hole_size);
char cmd[2048];