diff --git a/src/asahi/drm-shim/asahi_noop.c b/src/asahi/drm-shim/asahi_noop.c index deebdda8b03..9407b408734 100644 --- a/src/asahi/drm-shim/asahi_noop.c +++ b/src/asahi/drm-shim/asahi_noop.c @@ -4,12 +4,118 @@ * SPDX-License-Identifier: MIT */ +#include + +#include "../lib/unstable_asahi_drm.h" #include "drm-shim/drm_shim.h" bool drm_shim_driver_prefers_first_render_node = true; +static const struct drm_asahi_params_global params = { + .unstable_uabi_version = DRM_ASAHI_UNSTABLE_UABI_VERSION, + .gpu_generation = 13, + .gpu_variant = 'G', + .gpu_revision = 0, + .vm_user_start = 0x1000000, + .vm_user_end = 0x5000000, + .vm_shader_start = 0x8000000, + .vm_shader_end = 0x9000000, + .vm_page_size = 4096, +}; + +struct asahi_bo { + struct shim_bo base; + uint32_t offset; +}; + +static struct asahi_bo * +asahi_bo(struct shim_bo *bo) +{ + return (struct asahi_bo *)bo; +} + +struct asahi_device { + uint64_t next_offset; +}; + +static struct asahi_device asahi = { + .next_offset = 0x1000, +}; + +static int +asahi_ioctl_noop(int fd, unsigned long request, void *arg) +{ + return 0; +} + +static int +asahi_ioctl_submit(int fd, unsigned long request, void *arg) +{ + return 0; +} + +static int +asahi_ioctl_gem_create(int fd, unsigned long request, void *arg) +{ + struct shim_fd *shim_fd = drm_shim_fd_lookup(fd); + struct drm_asahi_gem_create *create = arg; + struct asahi_bo *bo = calloc(1, sizeof(*bo)); + + drm_shim_bo_init(&bo->base, create->size); + + assert(UINT64_MAX - asahi.next_offset > create->size); + bo->offset = asahi.next_offset; + asahi.next_offset += create->size; + + create->handle = drm_shim_bo_get_handle(shim_fd, &bo->base); + + drm_shim_bo_put(&bo->base); + + return 0; +} + +static int +asahi_ioctl_gem_mmap_offset(int fd, unsigned long request, void *arg) +{ + struct shim_fd *shim_fd = drm_shim_fd_lookup(fd); + struct drm_asahi_gem_mmap_offset *map = arg; + struct shim_bo *bo = drm_shim_bo_lookup(shim_fd, map->handle); + + map->offset = drm_shim_bo_get_mmap_offset(shim_fd, bo); + + drm_shim_bo_put(bo); + + return 0; +} + +static int +asahi_ioctl_get_param(int fd, unsigned long request, void *arg) +{ + struct drm_asahi_get_params *gp = arg; + + switch (gp->param_group) { + case 0: + assert(gp->size == sizeof(struct drm_asahi_params_global)); + memcpy((void *)gp->pointer, ¶ms, gp->size); + return 0; + + default: + fprintf(stderr, "Unknown DRM_IOCTL_ASAHI_GET_PARAMS %d\n", + gp->param_group); + return -1; + } +} + static ioctl_fn_t driver_ioctls[] = { - /* The Asahi Linux UAPI is not yet upstream */ + [DRM_ASAHI_GET_PARAMS] = asahi_ioctl_get_param, + [DRM_ASAHI_VM_CREATE] = asahi_ioctl_noop, + [DRM_ASAHI_VM_DESTROY] = asahi_ioctl_noop, + [DRM_ASAHI_GEM_CREATE] = asahi_ioctl_gem_create, + [DRM_ASAHI_GEM_MMAP_OFFSET] = asahi_ioctl_gem_mmap_offset, + [DRM_ASAHI_GEM_BIND] = asahi_ioctl_noop, + [DRM_ASAHI_QUEUE_CREATE] = asahi_ioctl_noop, + [DRM_ASAHI_QUEUE_DESTROY] = asahi_ioctl_noop, + [DRM_ASAHI_SUBMIT] = asahi_ioctl_submit, }; void diff --git a/src/asahi/drm-shim/meson.build b/src/asahi/drm-shim/meson.build index ec0781d8754..cfc2541604f 100644 --- a/src/asahi/drm-shim/meson.build +++ b/src/asahi/drm-shim/meson.build @@ -4,8 +4,8 @@ libasahi_noop_drm_shim = shared_library( 'asahi_noop_drm_shim', 'asahi_noop.c', - include_directories: inc_src, - dependencies: dep_drm_shim, + include_directories: [inc_include, inc_src], + dependencies: [dep_drm_shim, dep_valgrind], gnu_symbol_visibility : 'hidden', install : true, ) diff --git a/src/asahi/lib/agx_bo.c b/src/asahi/lib/agx_bo.c index c51319efcab..a306d824f97 100644 --- a/src/asahi/lib/agx_bo.c +++ b/src/asahi/lib/agx_bo.c @@ -189,7 +189,7 @@ agx_bo_unreference(struct agx_bo *bo) * lock, let's make sure it's still not referenced before freeing it. */ if (p_atomic_read(&bo->refcnt) == 0) { - assert(!p_atomic_read_relaxed(&bo->writer_syncobj)); + assert(!p_atomic_read_relaxed(&bo->writer)); if (dev->debug & AGX_DBG_TRACE) agxdecode_track_free(dev->agxdecode, bo); @@ -225,12 +225,12 @@ agx_bo_create_aligned(struct agx_device *dev, unsigned size, unsigned align, * flush the cache to make space for the new allocation. */ if (!bo) - bo = agx_bo_alloc(dev, size, align, flags); + bo = dev->ops.bo_alloc(dev, size, align, flags); if (!bo) bo = agx_bo_cache_fetch(dev, size, align, flags, false); if (!bo) { agx_bo_cache_evict_all(dev); - bo = agx_bo_alloc(dev, size, align, flags); + bo = dev->ops.bo_alloc(dev, size, align, flags); } if (!bo) { diff --git a/src/asahi/lib/agx_bo.h b/src/asahi/lib/agx_bo.h index d92d74d94c8..665e1d68549 100644 --- a/src/asahi/lib/agx_bo.h +++ b/src/asahi/lib/agx_bo.h @@ -80,8 +80,8 @@ struct agx_bo { /* DMA-BUF fd clone for adding fences to imports/exports */ int prime_fd; - /* Syncobj handle of the current writer, if any */ - uint32_t writer_syncobj; + /* Current writer, if any (queue in upper 32 bits, syncobj in lower 32 bits) */ + uint64_t writer; /* Owner */ struct agx_device *dev; @@ -97,8 +97,30 @@ struct agx_bo { /* For debugging */ const char *label; + + /* virtio blob_id */ + uint32_t blob_id; + uint32_t vbo_res_id; }; +static inline uint32_t +agx_bo_writer_syncobj(uint64_t writer) +{ + return writer; +} + +static inline uint32_t +agx_bo_writer_queue(uint64_t writer) +{ + return writer >> 32; +} + +static inline uint64_t +agx_bo_writer(uint32_t queue, uint32_t syncobj) +{ + return (((uint64_t)queue) << 32) | syncobj; +} + struct agx_bo *agx_bo_create_aligned(struct agx_device *dev, unsigned size, unsigned align, enum agx_bo_flags flags, const char *label); @@ -115,8 +137,6 @@ struct agx_bo *agx_bo_import(struct agx_device *dev, int fd); int agx_bo_export(struct agx_bo *bo); void agx_bo_free(struct agx_device *dev, struct agx_bo *bo); -struct agx_bo *agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, - enum agx_bo_flags flags); struct agx_bo *agx_bo_cache_fetch(struct agx_device *dev, size_t size, size_t align, uint32_t flags, const bool dontwait); diff --git a/src/asahi/lib/agx_device.c b/src/asahi/lib/agx_device.c index efacc89a18c..193cc44d17d 100644 --- a/src/asahi/lib/agx_device.c +++ b/src/asahi/lib/agx_device.c @@ -1,6 +1,7 @@ /* * Copyright 2021 Alyssa Rosenzweig * Copyright 2019 Collabora, Ltd. + * Copyright 2020 Igalia S.L. * SPDX-License-Identifier: MIT */ @@ -10,6 +11,7 @@ #include "util/timespec.h" #include "agx_bo.h" #include "agx_compile.h" +#include "agx_device_virtio.h" #include "agx_scratch.h" #include "decode.h" #include "glsl_types.h" @@ -20,16 +22,25 @@ #include "drm-uapi/dma-buf.h" #include "util/blob.h" #include "util/log.h" +#include "util/mesa-sha1.h" #include "util/os_file.h" #include "util/os_mman.h" #include "util/os_time.h" #include "util/simple_mtx.h" #include "git_sha1.h" #include "nir_serialize.h" +#include "unstable_asahi_drm.h" +#include "vdrm.h" -/* TODO: Linux UAPI. Dummy defines to get some things to compile. */ -#define ASAHI_BIND_READ 0 -#define ASAHI_BIND_WRITE 0 +static inline int +asahi_simple_ioctl(struct agx_device *dev, unsigned cmd, void *req) +{ + if (dev->is_virtio) { + return agx_virtio_simple_ioctl(dev, cmd, req); + } else { + return drmIoctl(dev->fd, cmd, req); + } +} /* clang-format off */ static const struct debug_named_value agx_debug_options[] = { @@ -101,10 +112,26 @@ static int agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, uint32_t flags) { - unreachable("Linux UAPI not yet upstream"); + struct drm_asahi_gem_bind gem_bind = { + .op = ASAHI_BIND_OP_BIND, + .flags = flags, + .handle = bo->handle, + .vm_id = dev->vm_id, + .offset = 0, + .range = bo->size, + .addr = addr, + }; + + int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND, &gem_bind); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND failed: %m (handle=%d)\n", + bo->handle); + } + + return ret; } -struct agx_bo * +static struct agx_bo * agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, enum agx_bo_flags flags) { @@ -117,7 +144,23 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, /* executable implies low va */ assert(!(flags & AGX_BO_EXEC) || (flags & AGX_BO_LOW_VA)); - unreachable("Linux UAPI not yet upstream"); + struct drm_asahi_gem_create gem_create = {.size = size}; + + if (flags & AGX_BO_WRITEBACK) + gem_create.flags |= ASAHI_GEM_WRITEBACK; + + if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) { + gem_create.flags |= ASAHI_GEM_VM_PRIVATE; + gem_create.vm_id = dev->vm_id; + } + + int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_CREATE, &gem_create); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_CREATE failed: %m\n"); + return NULL; + } + + handle = gem_create.handle; pthread_mutex_lock(&dev->bo_map_lock); bo = agx_lookup_bo(dev, handle); @@ -128,7 +171,7 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo))); bo->type = AGX_ALLOC_REGULAR; - bo->size = size; /* TODO: gem_create.size */ + bo->size = gem_create.size; bo->align = MAX2(dev->params.vm_page_size, align); bo->flags = flags; bo->dev = dev; @@ -157,13 +200,13 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, bind |= ASAHI_BIND_WRITE; } - int ret = agx_bo_bind(dev, bo, bo->ptr.gpu, bind); + ret = dev->ops.bo_bind(dev, bo, bo->ptr.gpu, bind); if (ret) { agx_bo_free(dev, bo); return NULL; } - agx_bo_mmap(bo); + dev->ops.bo_mmap(bo); if (flags & AGX_BO_LOW_VA) bo->ptr.gpu -= dev->shader_base; @@ -173,10 +216,31 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, return bo; } -void +static void agx_bo_mmap(struct agx_bo *bo) { - unreachable("Linux UAPI not yet upstream"); + struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = bo->handle}; + int ret; + + if (bo->ptr.cpu) + return; + + ret = + drmIoctl(bo->dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_MMAP_BO failed: %m\n"); + assert(0); + } + + bo->ptr.cpu = os_mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, + bo->dev->fd, gem_mmap_offset.offset); + if (bo->ptr.cpu == MAP_FAILED) { + bo->ptr.cpu = NULL; + fprintf(stderr, + "mmap failed: result=%p size=0x%llx fd=%i offset=0x%llx %m\n", + bo->ptr.cpu, (long long)bo->size, bo->dev->fd, + (long long)gem_mmap_offset.offset); + } } struct agx_bo * @@ -239,8 +303,12 @@ agx_bo_import(struct agx_device *dev, int fd) abort(); } - ret = - agx_bo_bind(dev, bo, bo->ptr.gpu, ASAHI_BIND_READ | ASAHI_BIND_WRITE); + if (dev->is_virtio) { + bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, bo->handle); + } + + ret = dev->ops.bo_bind(dev, bo, bo->ptr.gpu, + ASAHI_BIND_READ | ASAHI_BIND_WRITE); if (ret) { fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n", (long long)bo->ptr.gpu); @@ -293,11 +361,11 @@ agx_bo_export(struct agx_bo *bo) /* If there is a pending writer to this BO, import it into the buffer * for implicit sync. */ - uint32_t writer_syncobj = p_atomic_read_relaxed(&bo->writer_syncobj); - if (writer_syncobj) { + uint64_t writer = p_atomic_read_relaxed(&bo->writer); + if (writer) { int out_sync_fd = -1; - int ret = - drmSyncobjExportSyncFile(bo->dev->fd, writer_syncobj, &out_sync_fd); + int ret = drmSyncobjExportSyncFile( + bo->dev->fd, agx_bo_writer_syncobj(writer), &out_sync_fd); assert(ret >= 0); assert(out_sync_fd >= 0); @@ -331,10 +399,38 @@ agx_get_global_id(struct agx_device *dev) static ssize_t agx_get_params(struct agx_device *dev, void *buf, size_t size) { - /* TODO: Linux UAPI */ - unreachable("Linux UAPI not yet upstream"); + struct drm_asahi_get_params get_param = { + .param_group = 0, + .pointer = (uint64_t)(uintptr_t)buf, + .size = size, + }; + + memset(buf, 0, size); + + int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GET_PARAMS, &get_param); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_GET_PARAMS failed: %m\n"); + return -EINVAL; + } + + return get_param.size; } +static int +agx_submit(struct agx_device *dev, struct drm_asahi_submit *submit, + uint32_t vbo_res_id) +{ + return drmIoctl(dev->fd, DRM_IOCTL_ASAHI_SUBMIT, submit); +} + +const agx_device_ops_t agx_device_drm_ops = { + .bo_alloc = agx_bo_alloc, + .bo_bind = agx_bo_bind, + .bo_mmap = agx_bo_mmap, + .get_params = agx_get_params, + .submit = agx_submit, +}; + bool agx_open_device(void *memctx, struct agx_device *dev) { @@ -342,21 +438,119 @@ agx_open_device(void *memctx, struct agx_device *dev) debug_get_flags_option("ASAHI_MESA_DEBUG", agx_debug_options, 0); dev->agxdecode = agxdecode_new_context(); + dev->ops = agx_device_drm_ops; ssize_t params_size = -1; + drmVersionPtr version; - /* TODO: Linux UAPI */ - return false; + version = drmGetVersion(dev->fd); + if (!version) { + fprintf(stderr, "cannot get version: %s", strerror(errno)); + return NULL; + } - params_size = agx_get_params(dev, &dev->params, sizeof(dev->params)); + if (!strcmp(version->name, "asahi")) { + dev->is_virtio = false; + dev->ops = agx_device_drm_ops; + } else if (!strcmp(version->name, "virtio_gpu")) { + dev->is_virtio = true; + if (!agx_virtio_open_device(dev)) { + fprintf(stderr, + "Error opening virtio-gpu device for Asahi native context\n"); + return false; + } + } else { + return false; + } + + params_size = dev->ops.get_params(dev, &dev->params, sizeof(dev->params)); if (params_size <= 0) { assert(0); return false; } assert(params_size >= sizeof(dev->params)); - /* TODO: Linux UAPI: Params */ - unreachable("Linux UAPI not yet upstream"); + /* Refuse to probe. */ + if (dev->params.unstable_uabi_version != DRM_ASAHI_UNSTABLE_UABI_VERSION) { + fprintf( + stderr, + "You are attempting to use upstream Mesa with a downstream kernel!\n" + "This WILL NOT work.\n" + "The Asahi UABI is unstable and NOT SUPPORTED in upstream Mesa.\n" + "UABI related code in upstream Mesa is not for use!\n" + "\n" + "Do NOT attempt to patch out checks, you WILL break your system.\n" + "Do NOT report bugs.\n" + "Do NOT ask Mesa developers for support.\n" + "Do NOT write guides about how to patch out these checks.\n" + "Do NOT package patches to Mesa to bypass this.\n" + "\n" + "~~~\n" + "This is not a place of honor.\n" + "No highly esteemed deed is commemorated here.\n" + "Nothing valued is here.\n" + "\n" + "What is here was dangerous and repulsive to us.\n" + "This message is a warning about danger.\n" + "\n" + "The danger is still present, in your time, as it was in ours.\n" + "The danger is unleashed only if you substantially disturb this place physically.\n" + "This place is best shunned and left uninhabited.\n" + "~~~\n" + "\n" + "THIS IS NOT A BUG. THIS IS YOU DOING SOMETHING BROKEN!\n"); + abort(); + } + + uint64_t incompat = + dev->params.feat_incompat & (~AGX_SUPPORTED_INCOMPAT_FEATURES); + if (incompat) { + fprintf(stderr, "Missing GPU incompat features: 0x%" PRIx64 "\n", + incompat); + assert(0); + return false; + } + + if (dev->params.gpu_generation >= 13 && dev->params.gpu_variant != 'P') { + const char *variant = " Unknown"; + switch (dev->params.gpu_variant) { + case 'G': + variant = ""; + break; + case 'S': + variant = " Pro"; + break; + case 'C': + variant = " Max"; + break; + case 'D': + variant = " Ultra"; + break; + } + snprintf(dev->name, sizeof(dev->name), "Apple M%d%s (G%d%c %02X)", + dev->params.gpu_generation - 12, variant, + dev->params.gpu_generation, dev->params.gpu_variant, + dev->params.gpu_revision + 0xA0); + } else { + // Note: untested, theoretically this is the logic for at least a few + // generations back. + const char *variant = " Unknown"; + switch (dev->params.gpu_variant) { + case 'P': + variant = ""; + break; + case 'G': + variant = "X"; + break; + } + snprintf(dev->name, sizeof(dev->name), "Apple A%d%s (G%d%c %02X)", + dev->params.gpu_generation + 1, variant, + dev->params.gpu_generation, dev->params.gpu_variant, + dev->params.gpu_revision + 0xA0); + } + + dev->guard_size = dev->params.vm_page_size; + dev->shader_base = dev->params.vm_shader_start; util_sparse_array_init(&dev->bo_map, sizeof(struct agx_bo), 512); pthread_mutex_init(&dev->bo_map_lock, NULL); @@ -367,7 +561,14 @@ agx_open_device(void *memctx, struct agx_device *dev) for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) list_inithead(&dev->bo_cache.buckets[i]); - /* TODO: Linux UAPI: Create VM */ + struct drm_asahi_vm_create vm_create = {}; + + int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_VM_CREATE, &vm_create); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_VM_CREATE failed: %m\n"); + assert(0); + return false; + } simple_mtx_init(&dev->vma_lock, mtx_plain); util_vma_heap_init(&dev->main_heap, dev->params.vm_user_start, @@ -376,6 +577,8 @@ agx_open_device(void *memctx, struct agx_device *dev) &dev->usc_heap, dev->params.vm_shader_start, dev->params.vm_shader_end - dev->params.vm_shader_start + 1); + dev->vm_id = vm_create.vm_id; + agx_get_global_ids(dev); glsl_type_singleton_init_or_ref(); @@ -406,9 +609,34 @@ agx_close_device(struct agx_device *dev) } uint32_t -agx_create_command_queue(struct agx_device *dev, uint32_t caps) +agx_create_command_queue(struct agx_device *dev, uint32_t caps, + uint32_t priority) { - unreachable("Linux UAPI not yet upstream"); + struct drm_asahi_queue_create queue_create = { + .vm_id = dev->vm_id, + .queue_caps = caps, + .priority = priority, + .flags = 0, + }; + + int ret = + asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_QUEUE_CREATE, &queue_create); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_QUEUE_CREATE failed: %m\n"); + assert(0); + } + + return queue_create.queue_id; +} + +int +agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id) +{ + struct drm_asahi_queue_destroy queue_destroy = { + .queue_id = queue_id, + }; + + return drmIoctl(dev->fd, DRM_IOCTL_ASAHI_QUEUE_DESTROY, &queue_destroy); } int @@ -507,3 +735,56 @@ agx_get_gpu_timestamp(struct agx_device *dev) #error "invalid architecture for asahi" #endif } + +/* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */ +#define UUID_SIZE 16 + +void +agx_get_device_uuid(const struct agx_device *dev, void *uuid) +{ + struct mesa_sha1 sha1_ctx; + _mesa_sha1_init(&sha1_ctx); + + /* The device UUID uniquely identifies the given device within the machine. + * Since we never have more than one device, this doesn't need to be a real + * UUID, so we use SHA1("agx" + gpu_generation + gpu_variant + gpu_revision). + */ + static const char *device_name = "agx"; + _mesa_sha1_update(&sha1_ctx, device_name, strlen(device_name)); + + _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_generation, + sizeof(dev->params.gpu_generation)); + _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_variant, + sizeof(dev->params.gpu_variant)); + _mesa_sha1_update(&sha1_ctx, &dev->params.gpu_revision, + sizeof(dev->params.gpu_revision)); + + uint8_t sha1[SHA1_DIGEST_LENGTH]; + _mesa_sha1_final(&sha1_ctx, sha1); + + assert(SHA1_DIGEST_LENGTH >= UUID_SIZE); + memcpy(uuid, sha1, UUID_SIZE); +} + +void +agx_get_driver_uuid(void *uuid) +{ + const char *driver_id = PACKAGE_VERSION MESA_GIT_SHA1; + + /* The driver UUID is used for determining sharability of images and memory + * between two Vulkan instances in separate processes, but also to + * determining memory objects and sharability between Vulkan and OpenGL + * driver. People who want to share memory need to also check the device + * UUID. + */ + struct mesa_sha1 sha1_ctx; + _mesa_sha1_init(&sha1_ctx); + + _mesa_sha1_update(&sha1_ctx, driver_id, strlen(driver_id)); + + uint8_t sha1[SHA1_DIGEST_LENGTH]; + _mesa_sha1_final(&sha1_ctx, sha1); + + assert(SHA1_DIGEST_LENGTH >= UUID_SIZE); + memcpy(uuid, sha1, UUID_SIZE); +} diff --git a/src/asahi/lib/agx_device.h b/src/asahi/lib/agx_device.h index 65cb918a793..c061f938647 100644 --- a/src/asahi/lib/agx_device.h +++ b/src/asahi/lib/agx_device.h @@ -5,6 +5,7 @@ #pragma once +#include #include "util/simple_mtx.h" #include "util/sparse_array.h" #include "util/timespec.h" @@ -12,6 +13,11 @@ #include "agx_bo.h" #include "agx_formats.h" #include "decode.h" +#include "unstable_asahi_drm.h" + +// TODO: this is a lie right now +static const uint64_t AGX_SUPPORTED_INCOMPAT_FEATURES = + DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION; enum agx_dbg { AGX_DBG_TRACE = BITFIELD_BIT(0), @@ -37,29 +43,6 @@ enum agx_dbg { AGX_DBG_FEEDBACK = BITFIELD_BIT(20), }; -/* Dummy partial declarations, pending real UAPI */ -enum drm_asahi_cmd_type { DRM_ASAHI_CMD_TYPE_PLACEHOLDER_FOR_DOWNSTREAM_UAPI }; -enum drm_asahi_sync_type { DRM_ASAHI_SYNC_SYNCOBJ }; -struct drm_asahi_sync { - uint32_t sync_type; - uint32_t handle; -}; -struct drm_asahi_params_global { - uint64_t vm_page_size; - uint64_t vm_user_start; - uint64_t vm_user_end; - uint64_t vm_shader_start; - uint64_t vm_shader_end; - uint32_t chip_id; - uint32_t num_clusters_total; - uint32_t gpu_generation; - uint32_t gpu_variant; - uint32_t num_dies; - uint32_t timer_frequency_hz; - uint32_t num_cores_per_cluster; - uint64_t core_masks[32]; -}; - /* How many power-of-two levels in the BO cache do we want? 2^14 minimum chosen * as it is the page size that all allocations are rounded to */ @@ -72,6 +55,20 @@ struct drm_asahi_params_global { /* Forward decl only, do not pull in all of NIR */ struct nir_shader; +#define BARRIER_RENDER (1 << DRM_ASAHI_SUBQUEUE_RENDER) +#define BARRIER_COMPUTE (1 << DRM_ASAHI_SUBQUEUE_COMPUTE) + +typedef struct { + struct agx_bo *(*bo_alloc)(struct agx_device *dev, size_t size, size_t align, + enum agx_bo_flags flags); + int (*bo_bind)(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, + uint32_t flags); + void (*bo_mmap)(struct agx_bo *bo); + ssize_t (*get_params)(struct agx_device *dev, void *buf, size_t size); + int (*submit)(struct agx_device *dev, struct drm_asahi_submit *submit, + uint32_t vbo_res_id); +} agx_device_ops_t; + struct agx_device { uint32_t debug; @@ -81,6 +78,12 @@ struct agx_device { char name[64]; struct drm_asahi_params_global params; uint64_t next_global_id, last_global_id; + bool is_virtio; + agx_device_ops_t ops; + + /* vdrm device */ + struct vdrm_device *vdrm; + uint32_t next_blob_id; /* Device handle */ int fd; @@ -136,11 +139,11 @@ agx_lookup_bo(struct agx_device *dev, uint32_t handle) return util_sparse_array_get(&dev->bo_map, handle); } -void agx_bo_mmap(struct agx_bo *bo); - uint64_t agx_get_global_id(struct agx_device *dev); -uint32_t agx_create_command_queue(struct agx_device *dev, uint32_t caps); +uint32_t agx_create_command_queue(struct agx_device *dev, uint32_t caps, + uint32_t priority); +int agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id); int agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd); int agx_export_sync_file(struct agx_device *dev, struct agx_bo *bo); @@ -154,3 +157,6 @@ agx_gpu_time_to_ns(struct agx_device *dev, uint64_t gpu_time) { return (gpu_time * NSEC_PER_SEC) / dev->params.timer_frequency_hz; } + +void agx_get_device_uuid(const struct agx_device *dev, void *uuid); +void agx_get_driver_uuid(void *uuid); diff --git a/src/asahi/lib/agx_device_virtio.c b/src/asahi/lib/agx_device_virtio.c new file mode 100644 index 00000000000..8a31b8bb383 --- /dev/null +++ b/src/asahi/lib/agx_device_virtio.c @@ -0,0 +1,326 @@ +/* + * Copyright 2024 Sergio Lopez + * SPDX-License-Identifier: MIT + */ + +#include "agx_device_virtio.h" + +#include +#include + +#include "drm-uapi/virtgpu_drm.h" + +#define VIRGL_RENDERER_UNSTABLE_APIS 1 +#include "vdrm.h" +#include "virglrenderer_hw.h" + +#include "asahi_proto.h" + +/** + * Helper for simple pass-thru ioctls + */ +int +agx_virtio_simple_ioctl(struct agx_device *dev, unsigned cmd, void *_req) +{ + struct vdrm_device *vdrm = dev->vdrm; + unsigned req_len = sizeof(struct asahi_ccmd_ioctl_simple_req); + unsigned rsp_len = sizeof(struct asahi_ccmd_ioctl_simple_rsp); + + req_len += _IOC_SIZE(cmd); + if (cmd & IOC_OUT) + rsp_len += _IOC_SIZE(cmd); + + uint8_t buf[req_len]; + struct asahi_ccmd_ioctl_simple_req *req = (void *)buf; + struct asahi_ccmd_ioctl_simple_rsp *rsp; + + req->hdr = ASAHI_CCMD(IOCTL_SIMPLE, req_len); + req->cmd = cmd; + memcpy(req->payload, _req, _IOC_SIZE(cmd)); + + rsp = vdrm_alloc_rsp(vdrm, &req->hdr, rsp_len); + + int ret = vdrm_send_req(vdrm, &req->hdr, true); + if (ret) { + fprintf(stderr, "simple_ioctl: vdrm_send_req failed\n"); + return ret; + } + + if (cmd & IOC_OUT) + memcpy(_req, rsp->payload, _IOC_SIZE(cmd)); + + return rsp->ret; +} + +static struct agx_bo * +agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align, + enum agx_bo_flags flags) +{ + struct agx_bo *bo; + unsigned handle = 0; + uint64_t ptr_gpu; + + size = ALIGN_POT(size, dev->params.vm_page_size); + + /* executable implies low va */ + assert(!(flags & AGX_BO_EXEC) || (flags & AGX_BO_LOW_VA)); + + struct asahi_ccmd_gem_new_req req = { + .hdr = ASAHI_CCMD(GEM_NEW, sizeof(req)), + .size = size, + }; + + if (flags & AGX_BO_WRITEBACK) + req.flags |= ASAHI_GEM_WRITEBACK; + + uint32_t blob_flags = + VIRTGPU_BLOB_FLAG_USE_MAPPABLE | VIRTGPU_BLOB_FLAG_USE_SHAREABLE; + + req.bind_flags = ASAHI_BIND_READ; + if (!(flags & AGX_BO_READONLY)) { + req.bind_flags |= ASAHI_BIND_WRITE; + } + + uint32_t blob_id = p_atomic_inc_return(&dev->next_blob_id); + + ASSERTED bool lo = (flags & AGX_BO_LOW_VA); + + struct util_vma_heap *heap; + if (lo) + heap = &dev->usc_heap; + else + heap = &dev->main_heap; + + simple_mtx_lock(&dev->vma_lock); + ptr_gpu = util_vma_heap_alloc(heap, size + dev->guard_size, + dev->params.vm_page_size); + simple_mtx_unlock(&dev->vma_lock); + if (!ptr_gpu) { + fprintf(stderr, "Failed to allocate BO VMA\n"); + return NULL; + } + + req.addr = ptr_gpu; + req.blob_id = blob_id; + req.vm_id = dev->vm_id; + + handle = vdrm_bo_create(dev->vdrm, size, blob_flags, blob_id, &req.hdr); + if (!handle) { + fprintf(stderr, "vdrm_bo_created failed\n"); + return NULL; + } + + pthread_mutex_lock(&dev->bo_map_lock); + bo = agx_lookup_bo(dev, handle); + dev->max_handle = MAX2(dev->max_handle, handle); + pthread_mutex_unlock(&dev->bo_map_lock); + + /* Fresh handle */ + assert(!memcmp(bo, &((struct agx_bo){}), sizeof(*bo))); + + bo->type = AGX_ALLOC_REGULAR; + bo->size = size; + bo->align = MAX2(dev->params.vm_page_size, align); + bo->flags = flags; + bo->dev = dev; + bo->handle = handle; + bo->prime_fd = -1; + bo->blob_id = blob_id; + bo->ptr.gpu = ptr_gpu; + bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, handle); + + dev->ops.bo_mmap(bo); + + if (flags & AGX_BO_LOW_VA) + bo->ptr.gpu -= dev->shader_base; + + assert(bo->ptr.gpu < (1ull << (lo ? 32 : 40))); + + return bo; +} + +static int +agx_virtio_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, + uint32_t flags) +{ + struct asahi_ccmd_gem_bind_req req = { + .op = ASAHI_BIND_OP_BIND, + .flags = flags, + .vm_id = dev->vm_id, + .res_id = bo->vbo_res_id, + .size = bo->size, + .addr = addr, + .hdr.cmd = ASAHI_CCMD_GEM_BIND, + .hdr.len = sizeof(struct asahi_ccmd_gem_bind_req), + }; + + int ret = vdrm_send_req(dev->vdrm, &req.hdr, false); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND failed: %d (handle=%d)\n", ret, + bo->handle); + } + + return ret; +} + +static void +agx_virtio_bo_mmap(struct agx_bo *bo) +{ + if (bo->ptr.cpu) { + return; + } + + bo->ptr.cpu = vdrm_bo_map(bo->dev->vdrm, bo->handle, bo->size, NULL); + if (bo->ptr.cpu == MAP_FAILED) { + bo->ptr.cpu = NULL; + fprintf(stderr, "mmap failed: result=%p size=0x%llx fd=%i\n", bo->ptr.cpu, + (long long)bo->size, bo->dev->fd); + } +} + +static ssize_t +agx_virtio_get_params(struct agx_device *dev, void *buf, size_t size) +{ + struct vdrm_device *vdrm = dev->vdrm; + struct asahi_ccmd_get_params_req req = { + .params.size = size, + .hdr.cmd = ASAHI_CCMD_GET_PARAMS, + .hdr.len = sizeof(struct asahi_ccmd_get_params_req), + }; + struct asahi_ccmd_get_params_rsp *rsp; + + rsp = + vdrm_alloc_rsp(vdrm, &req.hdr, sizeof(struct asahi_ccmd_get_params_rsp)); + + int ret = vdrm_send_req(vdrm, &req.hdr, true); + if (ret) + goto out; + + ret = rsp->ret; + if (!ret) { + memcpy(buf, &rsp->params, size); + return size; + } + +out: + return ret; +} + +static int +agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit, + uint32_t vbo_res_id) +{ + struct drm_asahi_command *commands = + (struct drm_asahi_command *)submit->commands; + struct drm_asahi_sync *in_syncs = (struct drm_asahi_sync *)submit->in_syncs; + struct drm_asahi_sync *out_syncs = + (struct drm_asahi_sync *)submit->out_syncs; + size_t req_len = sizeof(struct asahi_ccmd_submit_req); + + for (int i = 0; i < submit->command_count; i++) { + switch (commands[i].cmd_type) { + case DRM_ASAHI_CMD_COMPUTE: { + req_len += sizeof(struct drm_asahi_command) + + sizeof(struct drm_asahi_cmd_compute); + break; + } + + case DRM_ASAHI_CMD_RENDER: { + struct drm_asahi_cmd_render *render = + (struct drm_asahi_cmd_render *)commands[i].cmd_buffer; + req_len += sizeof(struct drm_asahi_command) + + sizeof(struct drm_asahi_cmd_render); + req_len += render->fragment_attachment_count * + sizeof(struct drm_asahi_attachment); + break; + } + + default: + return EINVAL; + } + } + + struct asahi_ccmd_submit_req *req = + (struct asahi_ccmd_submit_req *)calloc(1, req_len); + + req->queue_id = submit->queue_id; + req->result_res_id = vbo_res_id; + req->command_count = submit->command_count; + + char *ptr = (char *)&req->payload; + + for (int i = 0; i < submit->command_count; i++) { + memcpy(ptr, &commands[i], sizeof(struct drm_asahi_command)); + ptr += sizeof(struct drm_asahi_command); + + memcpy(ptr, (char *)commands[i].cmd_buffer, commands[i].cmd_buffer_size); + ptr += commands[i].cmd_buffer_size; + + if (commands[i].cmd_type == DRM_ASAHI_CMD_RENDER) { + struct drm_asahi_cmd_render *render = + (struct drm_asahi_cmd_render *)commands[i].cmd_buffer; + size_t fragments_size = sizeof(struct drm_asahi_attachment) * + render->fragment_attachment_count; + memcpy(ptr, (char *)render->fragment_attachments, fragments_size); + ptr += fragments_size; + } + } + + req->hdr.cmd = ASAHI_CCMD_SUBMIT; + req->hdr.len = req_len; + + struct drm_virtgpu_execbuffer_syncobj *vdrm_in_syncs = calloc( + submit->in_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj)); + for (int i = 0; i < submit->in_sync_count; i++) { + vdrm_in_syncs[i].handle = in_syncs[i].handle; + vdrm_in_syncs[i].point = in_syncs[i].timeline_value; + } + + struct drm_virtgpu_execbuffer_syncobj *vdrm_out_syncs = calloc( + submit->out_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj)); + for (int i = 0; i < submit->out_sync_count; i++) { + vdrm_out_syncs[i].handle = out_syncs[i].handle; + vdrm_out_syncs[i].point = out_syncs[i].timeline_value; + } + + struct vdrm_execbuf_params p = { + /* Signal the host we want to wait for the command to complete */ + .ring_idx = 1, + .req = &req->hdr, + .num_in_syncobjs = submit->in_sync_count, + .in_syncobjs = vdrm_in_syncs, + .num_out_syncobjs = submit->out_sync_count, + .out_syncobjs = vdrm_out_syncs, + }; + + int ret = vdrm_execbuf(dev->vdrm, &p); + + free(vdrm_out_syncs); + free(vdrm_in_syncs); + free(req); + return ret; +} + +const agx_device_ops_t agx_virtio_device_ops = { + .bo_alloc = agx_virtio_bo_alloc, + .bo_bind = agx_virtio_bo_bind, + .bo_mmap = agx_virtio_bo_mmap, + .get_params = agx_virtio_get_params, + .submit = agx_virtio_submit, +}; + +bool +agx_virtio_open_device(struct agx_device *dev) +{ + struct vdrm_device *vdrm; + + vdrm = vdrm_device_connect(dev->fd, 2); + if (!vdrm) { + fprintf(stderr, "could not connect vdrm\n"); + return false; + } + + dev->vdrm = vdrm; + dev->ops = agx_virtio_device_ops; + return true; +} diff --git a/src/asahi/lib/agx_device_virtio.h b/src/asahi/lib/agx_device_virtio.h new file mode 100644 index 00000000000..895fa311e1e --- /dev/null +++ b/src/asahi/lib/agx_device_virtio.h @@ -0,0 +1,13 @@ +/* + * Copyright 2024 Sergio Lopez + * SPDX-License-Identifier: MIT + */ + +#pragma once + +#include +#include "agx_device.h" + +int agx_virtio_simple_ioctl(struct agx_device *dev, unsigned cmd, void *_req); + +bool agx_virtio_open_device(struct agx_device *dev); diff --git a/src/asahi/lib/asahi_proto.h b/src/asahi/lib/asahi_proto.h new file mode 100644 index 00000000000..8820b46a5e7 --- /dev/null +++ b/src/asahi/lib/asahi_proto.h @@ -0,0 +1,133 @@ +/* + * Copyright 2024 Sergio Lopez + * Copyright 2022 Google LLC + * SPDX-License-Identifier: MIT + */ + +#ifndef ASAHI_PROTO_H_ +#define ASAHI_PROTO_H_ + +/** + * Defines the layout of shmem buffer used for host->guest communication. + */ +struct asahi_shmem { + struct vdrm_shmem base; + + /** + * Counter that is incremented on asynchronous errors, like SUBMIT + * or GEM_NEW failures. The guest should treat errors as context- + * lost. + */ + uint32_t async_error; + + /** + * Counter that is incremented on global fault (see MSM_PARAM_FAULTS) + */ + uint32_t global_faults; +}; +DEFINE_CAST(vdrm_shmem, asahi_shmem) + +/* + * Possible cmd types for "command stream", ie. payload of EXECBUF ioctl: + */ +enum asahi_ccmd { + ASAHI_CCMD_NOP = 1, /* No payload, can be used to sync with host */ + ASAHI_CCMD_IOCTL_SIMPLE, + ASAHI_CCMD_GET_PARAMS, + ASAHI_CCMD_GEM_NEW, + ASAHI_CCMD_GEM_BIND, + ASAHI_CCMD_SUBMIT, +}; + +#define ASAHI_CCMD(_cmd, _len) \ + (struct vdrm_ccmd_req) \ + { \ + .cmd = ASAHI_CCMD_##_cmd, .len = (_len), \ + } + +/* + * ASAHI_CCMD_NOP + */ +struct asahi_ccmd_nop_req { + struct vdrm_ccmd_req hdr; +}; + +/* + * ASAHI_CCMD_IOCTL_SIMPLE + * + * Forward simple/flat IOC_RW or IOC_W ioctls. Limited ioctls are supported. + */ +struct asahi_ccmd_ioctl_simple_req { + struct vdrm_ccmd_req hdr; + + uint32_t cmd; + uint8_t payload[]; +}; +DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_ioctl_simple_req) + +struct asahi_ccmd_ioctl_simple_rsp { + struct vdrm_ccmd_rsp hdr; + + /* ioctl return value, interrupted syscalls are handled on the host without + * returning to the guest. + */ + int32_t ret; + + /* The output payload for IOC_RW ioctls, the payload is the same size as + * asahi_context_cmd_ioctl_simple_req. + * + * For IOC_W ioctls (userspace writes, kernel reads) this is zero length. + */ + uint8_t payload[]; +}; + +struct asahi_ccmd_get_params_req { + struct vdrm_ccmd_req hdr; + struct drm_asahi_get_params params; +}; +DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_get_params_req) + +struct asahi_ccmd_get_params_rsp { + struct vdrm_ccmd_rsp hdr; + int32_t ret; + struct drm_asahi_params_global params; +}; + +struct asahi_ccmd_gem_new_req { + struct vdrm_ccmd_req hdr; + uint32_t flags; + uint32_t bind_flags; + uint32_t vm_id; + uint32_t blob_id; + uint64_t size; + uint64_t addr; +}; +DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_new_req) + +struct asahi_ccmd_gem_bind_req { + struct vdrm_ccmd_req hdr; + uint32_t op; + uint32_t flags; + uint32_t vm_id; + uint32_t res_id; + uint64_t size; + uint64_t addr; +}; +DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_bind_req) + +struct asahi_ccmd_gem_bind_rsp { + struct vdrm_ccmd_rsp hdr; + int32_t ret; +}; + +struct asahi_ccmd_submit_req { + struct vdrm_ccmd_req hdr; + uint32_t queue_id; + uint32_t result_res_id; + uint32_t command_count; + + uint8_t payload[]; +}; +DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_submit_req) + +#endif // ASAHI_PROTO_H_ diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index f5d32155bfc..e981e6ed4de 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -18,18 +18,11 @@ #include "util/u_hexdump.h" #include "decode.h" +#include "unstable_asahi_drm.h" #ifdef __APPLE__ #include "agx_iokit.h" #endif -/* Pending UAPI */ -struct drm_asahi_params_global { - int gpu_generation; - int gpu_variant; - int chip_id; - int num_clusters_total; -}; - struct libagxdecode_config lib_config; UNUSED static const char *agx_alloc_types[AGX_NUM_ALLOC] = {"mem", "map", @@ -283,6 +276,11 @@ agxdecode_map_read_write(struct agxdecode_ctx *ctx) DUMP_UNPACKED(T, temp, str "\n"); \ } +#define DUMP_FIELD(struct, fmt, field) \ + { \ + fprintf(agxdecode_dump_stream, #field " = " fmt "\n", struct->field); \ + } + #define agxdecode_log(str) fputs(str, agxdecode_dump_stream) #define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str) @@ -980,6 +978,116 @@ agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap, agxdecode_map_read_write(ctx); } +void +agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx, + struct drm_asahi_params_global *params, + struct drm_asahi_cmd_render *c, bool verbose) +{ + agxdecode_dump_file_open(); + + DUMP_FIELD(c, "%llx", flags); + DUMP_FIELD(c, "0x%llx", encoder_ptr); + agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_vdm, verbose, + params, NULL); + DUMP_FIELD(c, "0x%x", encoder_id); + DUMP_FIELD(c, "0x%x", cmd_ta_id); + DUMP_FIELD(c, "0x%x", cmd_3d_id); + DUMP_FIELD(c, "0x%x", ppp_ctrl); + DUMP_FIELD(c, "0x%llx", ppp_multisamplectl); + DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control"); + DUMP_FIELD(c, "0x%llx", depth_buffer_load); + DUMP_FIELD(c, "0x%llx", depth_buffer_store); + DUMP_FIELD(c, "0x%llx", depth_buffer_partial); + DUMP_FIELD(c, "0x%llx", stencil_buffer_load); + DUMP_FIELD(c, "0x%llx", stencil_buffer_store); + DUMP_FIELD(c, "0x%llx", stencil_buffer_partial); + DUMP_FIELD(c, "0x%llx", scissor_array); + DUMP_FIELD(c, "0x%llx", depth_bias_array); + DUMP_FIELD(c, "%d", fb_width); + DUMP_FIELD(c, "%d", fb_height); + DUMP_FIELD(c, "%d", layers); + DUMP_FIELD(c, "%d", samples); + DUMP_FIELD(c, "%d", sample_size); + DUMP_FIELD(c, "%d", tib_blocks); + DUMP_FIELD(c, "%d", utile_width); + DUMP_FIELD(c, "%d", utile_height); + DUMP_FIELD(c, "0x%x", load_pipeline); + DUMP_FIELD(c, "0x%x", load_pipeline_bind); + agxdecode_stateful(ctx, c->load_pipeline & ~0x7, "Load pipeline", + agxdecode_usc, verbose, params, NULL); + DUMP_FIELD(c, "0x%x", store_pipeline); + DUMP_FIELD(c, "0x%x", store_pipeline_bind); + agxdecode_stateful(ctx, c->store_pipeline & ~0x7, "Store pipeline", + agxdecode_usc, verbose, params, NULL); + DUMP_FIELD(c, "0x%x", partial_reload_pipeline); + DUMP_FIELD(c, "0x%x", partial_reload_pipeline_bind); + agxdecode_stateful(ctx, c->partial_reload_pipeline & ~0x7, + "Partial reload pipeline", agxdecode_usc, verbose, params, + NULL); + DUMP_FIELD(c, "0x%x", partial_store_pipeline); + DUMP_FIELD(c, "0x%x", partial_store_pipeline_bind); + agxdecode_stateful(ctx, c->partial_store_pipeline & ~0x7, + "Partial store pipeline", agxdecode_usc, verbose, params, + NULL); + + DUMP_FIELD(c, "0x%x", depth_dimensions); + DUMP_FIELD(c, "0x%x", isp_bgobjdepth); + DUMP_FIELD(c, "0x%x", isp_bgobjvals); + + agxdecode_sampler_heap(ctx, c->vertex_sampler_array, + c->vertex_sampler_count); + + /* Linux driver doesn't use this, at least for now */ + assert(c->fragment_sampler_array == c->vertex_sampler_array); + assert(c->fragment_sampler_count == c->vertex_sampler_count); + + DUMP_FIELD(c, "%d", vertex_attachment_count); + struct drm_asahi_attachment *vertex_attachments = + (void *)c->vertex_attachments; + for (unsigned i = 0; i < c->vertex_attachment_count; i++) { + DUMP_FIELD((&vertex_attachments[i]), "0x%x", order); + DUMP_FIELD((&vertex_attachments[i]), "0x%llx", size); + DUMP_FIELD((&vertex_attachments[i]), "0x%llx", pointer); + } + DUMP_FIELD(c, "%d", fragment_attachment_count); + struct drm_asahi_attachment *fragment_attachments = + (void *)c->fragment_attachments; + for (unsigned i = 0; i < c->fragment_attachment_count; i++) { + DUMP_FIELD((&fragment_attachments[i]), "0x%x", order); + DUMP_FIELD((&fragment_attachments[i]), "0x%llx", size); + DUMP_FIELD((&fragment_attachments[i]), "0x%llx", pointer); + } + + agxdecode_map_read_write(ctx); +} + +void +agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx, + struct drm_asahi_params_global *params, + struct drm_asahi_cmd_compute *c, bool verbose) +{ + agxdecode_dump_file_open(); + + DUMP_FIELD(c, "%llx", flags); + DUMP_FIELD(c, "0x%llx", encoder_ptr); + agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_cdm, verbose, + params, NULL); + DUMP_FIELD(c, "0x%x", encoder_id); + DUMP_FIELD(c, "0x%x", cmd_id); + + agxdecode_sampler_heap(ctx, c->sampler_array, c->sampler_count); + + agxdecode_map_read_write(ctx); + + if (c->helper_program & 1) { + fprintf(agxdecode_dump_stream, "Helper program:\n"); + uint8_t buf[1024]; + agx_disassemble( + buf, agxdecode_fetch_gpu_array(ctx, c->helper_program & ~1, buf), + agxdecode_dump_stream); + } +} + static void chip_id_to_params(decoder_params *params, uint32_t chip_id) { diff --git a/src/asahi/lib/decode.h b/src/asahi/lib/decode.h index 6ae6d8ec5d3..0e91be03376 100644 --- a/src/asahi/lib/decode.h +++ b/src/asahi/lib/decode.h @@ -10,6 +10,8 @@ #include #include "agx_bo.h" +#include "unstable_asahi_drm.h" + struct agxdecode_ctx; struct agxdecode_ctx *agxdecode_new_context(void); @@ -26,6 +28,16 @@ void agxdecode_cmdstream(struct agxdecode_ctx *ctx, unsigned cmdbuf_index, void agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap, unsigned nr_entries); +void agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx, + struct drm_asahi_params_global *params, + struct drm_asahi_cmd_render *cmdbuf, + bool verbose); + +void agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx, + struct drm_asahi_params_global *params, + struct drm_asahi_cmd_compute *cmdbuf, + bool verbose); + void agxdecode_dump_file_open(void); void agxdecode_track_alloc(struct agxdecode_ctx *ctx, struct agx_bo *alloc); diff --git a/src/asahi/lib/meson.build b/src/asahi/lib/meson.build index fefdbafcf10..7a99b95c689 100644 --- a/src/asahi/lib/meson.build +++ b/src/asahi/lib/meson.build @@ -9,6 +9,7 @@ libasahi_lib_files = files( 'agx_bo.c', 'agx_border.c', 'agx_device.c', + 'agx_device_virtio.c', 'agx_formats.c', 'agx_linker.c', 'agx_bg_eot.c', @@ -86,10 +87,10 @@ libagx_shaders = custom_target( libasahi_lib = static_library( 'asahi_lib', [libasahi_lib_files, libagx_shaders, agx_pack], - include_directories : inc_asahi, + include_directories : [inc_asahi, inc_virtio_gpu, inc_virtio_vdrm], c_args : [no_override_init_args], gnu_symbol_visibility : 'hidden', - link_with: [libasahi_decode], + link_with: [libasahi_decode, libvdrm], dependencies: [dep_libdrm, dep_valgrind, idep_nir], build_by_default : false, ) diff --git a/src/asahi/lib/unstable_asahi_drm.h b/src/asahi/lib/unstable_asahi_drm.h new file mode 100644 index 00000000000..09914724fb3 --- /dev/null +++ b/src/asahi/lib/unstable_asahi_drm.h @@ -0,0 +1,666 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) The Asahi Linux Contributors + * + * Based on asahi_drm.h which is + * + * Copyright © 2014-2018 Broadcom + * Copyright © 2019 Collabora ltd. + */ +#ifndef _ASAHI_DRM_H_ +#define _ASAHI_DRM_H_ + +#include "drm-uapi/drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/* + * The UAPI defined in this file MUST NOT BE USED. End users, DO NOT attempt to + * use upstream Mesa with asahi kernels, it will blow up. Distro packagers, DO + * NOT patch upstream Mesa to do the same. + */ +#define DRM_ASAHI_UNSTABLE_UABI_VERSION (0xDEADBEEF) + +#define DRM_ASAHI_GET_PARAMS 0x00 +#define DRM_ASAHI_VM_CREATE 0x01 +#define DRM_ASAHI_VM_DESTROY 0x02 +#define DRM_ASAHI_GEM_CREATE 0x03 +#define DRM_ASAHI_GEM_MMAP_OFFSET 0x04 +#define DRM_ASAHI_GEM_BIND 0x05 +#define DRM_ASAHI_QUEUE_CREATE 0x06 +#define DRM_ASAHI_QUEUE_DESTROY 0x07 +#define DRM_ASAHI_SUBMIT 0x08 +#define DRM_ASAHI_GET_TIME 0x09 + +#define DRM_ASAHI_MAX_CLUSTERS 32 + +struct drm_asahi_params_global { + __u32 unstable_uabi_version; + __u32 pad0; + + __u64 feat_compat; + __u64 feat_incompat; + + __u32 gpu_generation; + __u32 gpu_variant; + __u32 gpu_revision; + __u32 chip_id; + + __u32 num_dies; + __u32 num_clusters_total; + __u32 num_cores_per_cluster; + __u32 num_frags_per_cluster; + __u32 num_gps_per_cluster; + __u32 num_cores_total_active; + __u64 core_masks[DRM_ASAHI_MAX_CLUSTERS]; + + __u32 vm_page_size; + __u32 pad1; + __u64 vm_user_start; + __u64 vm_user_end; + __u64 vm_shader_start; + __u64 vm_shader_end; + + __u32 max_syncs_per_submission; + __u32 max_commands_per_submission; + __u32 max_commands_in_flight; + __u32 max_attachments; + + __u32 timer_frequency_hz; + __u32 min_frequency_khz; + __u32 max_frequency_khz; + __u32 max_power_mw; + + __u32 result_render_size; + __u32 result_compute_size; + + __u32 firmware_version[4]; +}; + +/* +enum drm_asahi_feat_compat { +}; +*/ + +enum drm_asahi_feat_incompat { + DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION = (1UL) << 0, +}; + +struct drm_asahi_get_params { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @param: Parameter group to fetch (MBZ) */ + __u32 param_group; + + /** @pad: MBZ */ + __u32 pad; + + /** @value: User pointer to write parameter struct */ + __u64 pointer; + + /** @value: Size of user buffer, max size supported on return */ + __u64 size; +}; + +struct drm_asahi_vm_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @value: Returned VM ID */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +struct drm_asahi_vm_destroy { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @value: VM ID to be destroyed */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +#define ASAHI_GEM_WRITEBACK (1L << 0) +#define ASAHI_GEM_VM_PRIVATE (1L << 1) + +struct drm_asahi_gem_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @size: Size of the BO */ + __u64 size; + + /** @flags: BO creation flags */ + __u32 flags; + + /** @handle: VM ID to assign to the BO, if ASAHI_GEM_VM_PRIVATE is set. */ + __u32 vm_id; + + /** @handle: Returned GEM handle for the BO */ + __u32 handle; +}; + +struct drm_asahi_gem_mmap_offset { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @handle: Handle for the object being mapped. */ + __u32 handle; + + /** @flags: Must be zero */ + __u32 flags; + + /** @offset: The fake offset to use for subsequent mmap call */ + __u64 offset; +}; + +enum drm_asahi_bind_op { + ASAHI_BIND_OP_BIND = 0, + ASAHI_BIND_OP_UNBIND = 1, + ASAHI_BIND_OP_UNBIND_ALL = 2, +}; + +#define ASAHI_BIND_READ (1L << 0) +#define ASAHI_BIND_WRITE (1L << 1) + +struct drm_asahi_gem_bind { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @obj: Bind operation */ + __u32 op; + + /** @flags: One or more of ASAHI_BIND_* */ + __u32 flags; + + /** @obj: GEM object to bind */ + __u32 handle; + + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** @offset: Offset into the object */ + __u64 offset; + + /** @range: Number of bytes from the object to bind to addr */ + __u64 range; + + /** @addr: Address to bind to */ + __u64 addr; +}; + +enum drm_asahi_cmd_type { + DRM_ASAHI_CMD_RENDER = 0, + DRM_ASAHI_CMD_BLIT = 1, + DRM_ASAHI_CMD_COMPUTE = 2, +}; + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum drm_asahi_queue_cap { + DRM_ASAHI_QUEUE_CAP_RENDER = (1UL << DRM_ASAHI_CMD_RENDER), + DRM_ASAHI_QUEUE_CAP_BLIT = (1UL << DRM_ASAHI_CMD_BLIT), + DRM_ASAHI_QUEUE_CAP_COMPUTE = (1UL << DRM_ASAHI_CMD_COMPUTE), +}; + +struct drm_asahi_queue_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @flags: MBZ */ + __u32 flags; + + /** @vm_id: The ID of the VM this queue is bound to */ + __u32 vm_id; + + /** @type: Bitmask of DRM_ASAHI_QUEUE_CAP_* */ + __u32 queue_caps; + + /** @priority: Queue priority, 0-3 */ + __u32 priority; + + /** @queue_id: The returned queue ID */ + __u32 queue_id; +}; + +struct drm_asahi_queue_destroy { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @queue_id: The queue ID to be destroyed */ + __u32 queue_id; +}; + +enum drm_asahi_sync_type { + DRM_ASAHI_SYNC_SYNCOBJ = 0, + DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ = 1, +}; + +struct drm_asahi_sync { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @sync_type: One of drm_asahi_sync_type */ + __u32 sync_type; + + /** @handle: The sync object handle */ + __u32 handle; + + /** @timeline_value: Timeline value for timeline sync objects */ + __u64 timeline_value; +}; + +enum drm_asahi_subqueue { + DRM_ASAHI_SUBQUEUE_RENDER = 0, /* Also blit */ + DRM_ASAHI_SUBQUEUE_COMPUTE = 1, + DRM_ASAHI_SUBQUEUE_COUNT = 2, +}; + +#define DRM_ASAHI_BARRIER_NONE ~(0U) + +struct drm_asahi_command { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @type: One of drm_asahi_cmd_type */ + __u32 cmd_type; + + /** @flags: Flags for command submission */ + __u32 flags; + + /** @cmdbuf: Pointer to the appropriate command buffer structure */ + __u64 cmd_buffer; + + /** @cmdbuf: Size of the command buffer structure */ + __u64 cmd_buffer_size; + + /** @cmdbuf: Offset into the result BO to return information about this command */ + __u64 result_offset; + + /** @cmdbuf: Size of the result data structure */ + __u64 result_size; + + /** @barriers: Array of command indices per subqueue to wait on */ + __u32 barriers[DRM_ASAHI_SUBQUEUE_COUNT]; +}; + +struct drm_asahi_submit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @in_syncs: An optional array of drm_asahi_sync to wait on before starting this job. */ + __u64 in_syncs; + + /** @in_syncs: An optional array of drm_asahi_sync objects to signal upon completion. */ + __u64 out_syncs; + + /** @commands: Pointer to the drm_asahi_command array of commands to submit. */ + __u64 commands; + + /** @flags: Flags for command submission (MBZ) */ + __u32 flags; + + /** @queue_id: The queue ID to be submitted to */ + __u32 queue_id; + + /** @result_handle: An optional BO handle to place result data in */ + __u32 result_handle; + + /** @in_sync_count: Number of sync objects to wait on before starting this job. */ + __u32 in_sync_count; + + /** @in_sync_count: Number of sync objects to signal upon completion of this job. */ + __u32 out_sync_count; + + /** @pad: Number of commands to be submitted */ + __u32 command_count; +}; + +struct drm_asahi_attachment { + /** @pointer: Base address of the attachment */ + __u64 pointer; + /** @size: Size of the attachment in bytes */ + __u64 size; + /** @order: Power of 2 exponent related to attachment size (?) */ + __u32 order; + /** @flags: MBZ */ + __u32 flags; +}; + +#define ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES (1UL << 0) +#define ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S (1UL << 1) +#define ASAHI_RENDER_VERTEX_SPILLS (1UL << 2) +#define ASAHI_RENDER_PROCESS_EMPTY_TILES (1UL << 3) +#define ASAHI_RENDER_NO_VERTEX_CLUSTERING (1UL << 4) +#define ASAHI_RENDER_MSAA_ZS (1UL << 5) +/* XXX check */ +#define ASAHI_RENDER_NO_PREEMPTION (1UL << 6) + +struct drm_asahi_cmd_render { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + __u64 flags; + + __u64 encoder_ptr; + + __u64 vertex_attachments; + __u64 fragment_attachments; + __u32 vertex_attachment_count; + __u32 fragment_attachment_count; + + __u32 vertex_helper_program; + __u32 fragment_helper_program; + __u32 vertex_helper_cfg; + __u32 fragment_helper_cfg; + __u64 vertex_helper_arg; + __u64 fragment_helper_arg; + + __u64 depth_buffer_load; + __u64 depth_buffer_load_stride; + __u64 depth_buffer_store; + __u64 depth_buffer_store_stride; + __u64 depth_buffer_partial; + __u64 depth_buffer_partial_stride; + __u64 depth_meta_buffer_load; + __u64 depth_meta_buffer_load_stride; + __u64 depth_meta_buffer_store; + __u64 depth_meta_buffer_store_stride; + __u64 depth_meta_buffer_partial; + __u64 depth_meta_buffer_partial_stride; + + __u64 stencil_buffer_load; + __u64 stencil_buffer_load_stride; + __u64 stencil_buffer_store; + __u64 stencil_buffer_store_stride; + __u64 stencil_buffer_partial; + __u64 stencil_buffer_partial_stride; + __u64 stencil_meta_buffer_load; + __u64 stencil_meta_buffer_load_stride; + __u64 stencil_meta_buffer_store; + __u64 stencil_meta_buffer_store_stride; + __u64 stencil_meta_buffer_partial; + __u64 stencil_meta_buffer_partial_stride; + + __u64 scissor_array; + __u64 depth_bias_array; + __u64 visibility_result_buffer; + + __u64 vertex_sampler_array; + __u32 vertex_sampler_count; + __u32 vertex_sampler_max; + + __u64 fragment_sampler_array; + __u32 fragment_sampler_count; + __u32 fragment_sampler_max; + + __u64 zls_ctrl; + __u64 ppp_multisamplectl; + __u32 ppp_ctrl; + + __u32 fb_width; + __u32 fb_height; + + __u32 utile_width; + __u32 utile_height; + + __u32 samples; + __u32 layers; + + __u32 encoder_id; + __u32 cmd_ta_id; + __u32 cmd_3d_id; + + __u32 sample_size; + __u32 tib_blocks; + __u32 iogpu_unk_214; + + __u32 merge_upper_x; + __u32 merge_upper_y; + + __u32 load_pipeline; + __u32 load_pipeline_bind; + + __u32 store_pipeline; + __u32 store_pipeline_bind; + + __u32 partial_reload_pipeline; + __u32 partial_reload_pipeline_bind; + + __u32 partial_store_pipeline; + __u32 partial_store_pipeline_bind; + + __u32 depth_dimensions; + __u32 isp_bgobjdepth; + __u32 isp_bgobjvals; +}; + +#define ASAHI_RENDER_UNK_UNK1 (1UL << 0) +#define ASAHI_RENDER_UNK_SET_TILE_CONFIG (1UL << 1) +#define ASAHI_RENDER_UNK_SET_UTILE_CONFIG (1UL << 2) +#define ASAHI_RENDER_UNK_SET_AUX_FB_UNK (1UL << 3) +#define ASAHI_RENDER_UNK_SET_G14_UNK (1UL << 4) + +#define ASAHI_RENDER_UNK_SET_FRG_UNK_140 (1UL << 20) +#define ASAHI_RENDER_UNK_SET_FRG_UNK_158 (1UL << 21) +#define ASAHI_RENDER_UNK_SET_FRG_TILECFG (1UL << 22) +#define ASAHI_RENDER_UNK_SET_LOAD_BGOBJVALS (1UL << 23) +#define ASAHI_RENDER_UNK_SET_FRG_UNK_38 (1UL << 24) +#define ASAHI_RENDER_UNK_SET_FRG_UNK_3C (1UL << 25) + +#define ASAHI_RENDER_UNK_SET_RELOAD_ZLSCTRL (1UL << 27) +#define ASAHI_RENDER_UNK_SET_UNK_BUF_10 (1UL << 28) +#define ASAHI_RENDER_UNK_SET_FRG_UNK_MASK (1UL << 29) + +#define ASAHI_RENDER_UNK_SET_IOGPU_UNK54 (1UL << 40) +#define ASAHI_RENDER_UNK_SET_IOGPU_UNK56 (1UL << 41) +#define ASAHI_RENDER_UNK_SET_TILING_CONTROL (1UL << 42) +#define ASAHI_RENDER_UNK_SET_TILING_CONTROL_2 (1UL << 43) +#define ASAHI_RENDER_UNK_SET_VTX_UNK_F0 (1UL << 44) +#define ASAHI_RENDER_UNK_SET_VTX_UNK_F8 (1UL << 45) +#define ASAHI_RENDER_UNK_SET_VTX_UNK_118 (1UL << 46) +#define ASAHI_RENDER_UNK_SET_VTX_UNK_MASK (1UL << 47) + +#define ASAHI_RENDER_EXT_UNKNOWNS 0xff00 + +/* XXX: Do not upstream this struct */ +struct drm_asahi_cmd_render_unknowns { + /** @type: Type ID of this extension */ + __u32 type; + __u32 pad; + /** @next: Pointer to the next extension struct, if any */ + __u64 next; + + __u64 flags; + + __u64 tile_config; + __u64 utile_config; + + __u64 aux_fb_unk; + __u64 g14_unk; + __u64 frg_unk_140; + __u64 frg_unk_158; + __u64 frg_tilecfg; + __u64 load_bgobjvals; + __u64 frg_unk_38; + __u64 frg_unk_3c; + __u64 reload_zlsctrl; + __u64 unk_buf_10; + __u64 frg_unk_mask; + + __u64 iogpu_unk54; + __u64 iogpu_unk56; + __u64 tiling_control; + __u64 tiling_control_2; + __u64 vtx_unk_f0; + __u64 vtx_unk_f8; + __u64 vtx_unk_118; + __u64 vtx_unk_mask; +}; + +/* XXX check */ +#define ASAHI_COMPUTE_NO_PREEMPTION (1UL << 0) + +struct drm_asahi_cmd_compute { + __u64 flags; + + __u64 encoder_ptr; + __u64 encoder_end; + + __u64 attachments; + __u32 attachment_count; + __u32 pad; + + __u32 helper_program; + __u32 helper_cfg; + __u64 helper_arg; + + __u32 encoder_id; + __u32 cmd_id; + + __u64 sampler_array; + __u32 sampler_count; + __u32 sampler_max; + + __u32 iogpu_unk_40; + __u32 unk_mask; +}; + +enum drm_asahi_status { + DRM_ASAHI_STATUS_PENDING = 0, + DRM_ASAHI_STATUS_COMPLETE, + DRM_ASAHI_STATUS_UNKNOWN_ERROR, + DRM_ASAHI_STATUS_TIMEOUT, + DRM_ASAHI_STATUS_FAULT, + DRM_ASAHI_STATUS_KILLED, + DRM_ASAHI_STATUS_NO_DEVICE, +}; + +enum drm_asahi_fault { + DRM_ASAHI_FAULT_NONE = 0, + DRM_ASAHI_FAULT_UNKNOWN, + DRM_ASAHI_FAULT_UNMAPPED, + DRM_ASAHI_FAULT_AF_FAULT, + DRM_ASAHI_FAULT_WRITE_ONLY, + DRM_ASAHI_FAULT_READ_ONLY, + DRM_ASAHI_FAULT_NO_ACCESS, +}; + +struct drm_asahi_result_info { + /** @status: One of enum drm_asahi_status */ + __u32 status; + + /** @reason: One of drm_asahi_fault_type */ + __u32 fault_type; + + /** @unit: Unit number, hardware dependent */ + __u32 unit; + + /** @sideband: Sideband information, hardware dependent */ + __u32 sideband; + + /** @level: Page table level at which the fault occurred, hardware dependent */ + __u8 level; + + /** @read: Fault was a read */ + __u8 is_read; + + /** @pad: MBZ */ + __u16 pad; + + /** @unk_5: Extra bits, hardware dependent */ + __u32 extra; + + /** @address: Fault address, cache line aligned */ + __u64 address; +}; + +#define DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF (1UL << 0) +#define DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN (1UL << 1) +#define DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED (1UL << 2) + +struct drm_asahi_result_render { + /** @address: Common result information */ + struct drm_asahi_result_info info; + + /** @flags: Zero or more of of DRM_ASAHI_RESULT_RENDER_* */ + __u64 flags; + + /** @vertex_ts_start: Timestamp of the start of vertex processing */ + __u64 vertex_ts_start; + + /** @vertex_ts_end: Timestamp of the end of vertex processing */ + __u64 vertex_ts_end; + + /** @fragment_ts_start: Timestamp of the start of fragment processing */ + __u64 fragment_ts_start; + + /** @fragment_ts_end: Timestamp of the end of fragment processing */ + __u64 fragment_ts_end; + + /** @tvb_size_bytes: TVB size at the start of this render */ + __u64 tvb_size_bytes; + + /** @tvb_usage_bytes: Total TVB usage in bytes for this render */ + __u64 tvb_usage_bytes; + + /** @num_tvb_overflows: Number of TVB overflows that occurred for this render */ + __u32 num_tvb_overflows; +}; + +struct drm_asahi_result_compute { + /** @address: Common result information */ + struct drm_asahi_result_info info; + + /** @flags: Zero or more of of DRM_ASAHI_RESULT_COMPUTE_* */ + __u64 flags; + + /** @ts_start: Timestamp of the start of this compute command */ + __u64 ts_start; + + /** @vertex_ts_end: Timestamp of the end of this compute command */ + __u64 ts_end; +}; + +struct drm_asahi_get_time { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @flags: MBZ. */ + __u64 flags; + + /** @tv_sec: On return, seconds part of a point in time */ + __s64 tv_sec; + + /** @tv_nsec: On return, nanoseconds part of a point in time */ + __s64 tv_nsec; + + /** @gpu_timestamp: On return, the GPU timestamp at that point in time */ + __u64 gpu_timestamp; +}; + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum { + DRM_IOCTL_ASAHI_GET_PARAMS = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GET_PARAMS, struct drm_asahi_get_params), + DRM_IOCTL_ASAHI_VM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_VM_CREATE, struct drm_asahi_vm_create), + DRM_IOCTL_ASAHI_VM_DESTROY = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_VM_DESTROY, struct drm_asahi_vm_destroy), + DRM_IOCTL_ASAHI_GEM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GEM_CREATE, struct drm_asahi_gem_create), + DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GEM_MMAP_OFFSET, struct drm_asahi_gem_mmap_offset), + DRM_IOCTL_ASAHI_GEM_BIND = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_GEM_BIND, struct drm_asahi_gem_bind), + DRM_IOCTL_ASAHI_QUEUE_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_QUEUE_CREATE, struct drm_asahi_queue_create), + DRM_IOCTL_ASAHI_QUEUE_DESTROY = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_QUEUE_DESTROY, struct drm_asahi_queue_destroy), + DRM_IOCTL_ASAHI_SUBMIT = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_SUBMIT, struct drm_asahi_submit), + DRM_IOCTL_ASAHI_GET_TIME = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GET_TIME, struct drm_asahi_get_time), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _ASAHI_DRM_H_ */ diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c index ccf11c7fb59..6143ea028fe 100644 --- a/src/gallium/drivers/asahi/agx_batch.c +++ b/src/gallium/drivers/asahi/agx_batch.c @@ -5,11 +5,13 @@ */ #include +#include "asahi/lib/agx_device_virtio.h" #include "asahi/lib/decode.h" #include "util/bitset.h" #include "util/u_dynarray.h" #include "util/u_range.h" #include "agx_state.h" +#include "vdrm.h" #define foreach_active(ctx, idx) \ BITSET_FOREACH_SET(idx, ctx->batches.active, AGX_MAX_BATCHES) @@ -156,13 +158,162 @@ agx_batch_init(struct agx_context *ctx, assert(!ret && batch->syncobj); } + batch->result_off = + (2 * sizeof(union agx_batch_result)) * agx_batch_idx(batch); + batch->result = + (void *)(((uint8_t *)ctx->result_buf->ptr.cpu) + batch->result_off); + memset(batch->result, 0, sizeof(union agx_batch_result) * 2); + agx_batch_mark_active(batch); } +const char *status_str[] = { + [DRM_ASAHI_STATUS_PENDING] = "(pending)", + [DRM_ASAHI_STATUS_COMPLETE] = "Complete", + [DRM_ASAHI_STATUS_UNKNOWN_ERROR] = "UNKNOWN ERROR", + [DRM_ASAHI_STATUS_TIMEOUT] = "TIMEOUT", + [DRM_ASAHI_STATUS_FAULT] = "FAULT", + [DRM_ASAHI_STATUS_KILLED] = "KILLED", + [DRM_ASAHI_STATUS_NO_DEVICE] = "NO DEVICE", +}; + +const char *fault_type_str[] = { + [DRM_ASAHI_FAULT_NONE] = "(none)", + [DRM_ASAHI_FAULT_UNKNOWN] = "Unknown", + [DRM_ASAHI_FAULT_UNMAPPED] = "Unmapped", + [DRM_ASAHI_FAULT_AF_FAULT] = "AF Fault", + [DRM_ASAHI_FAULT_WRITE_ONLY] = "Write Only", + [DRM_ASAHI_FAULT_READ_ONLY] = "Read Only", + [DRM_ASAHI_FAULT_NO_ACCESS] = "No Access", +}; + +const char *low_unit_str[16] = { + "DCMP", "UL1C", "CMP", "GSL1", "IAP", "VCE", "TE", "RAS", + "VDM", "PPP", "IPF", "IPF_CPF", "VF", "VF_CPF", "ZLS", "UNK", +}; + +const char *mid_unit_str[16] = { + "UNK", "dPM", "dCDM_KS0", "dCDM_KS1", "dCDM_KS2", "dIPP", + "dIPP_CS", "dVDM_CSD", "dVDM_SSD", "dVDM_ILF", "dVDM_ILD", "dRDE0", + "dRDE1", "FC", "GSL2", "UNK", +}; + +const char *high_unit_str[16] = { + "gPM_SP", "gVDM_CSD_SP", "gVDM_SSD_SP", "gVDM_ILF_SP", + "gVDM_TFP_SP", "gVDM_MMB_SP", "gCDM_CS_KS0_SP", "gCDM_CS_KS1_SP", + "gCDM_CS_KS2_SP", "gCDM_KS0_SP", "gCDM_KS1_SP", "gCDM_KS2_SP", + "gIPP_SP", "gIPP_CS_SP", "gRDE0_SP", "gRDE1_SP", +}; + +static void +agx_print_result(struct agx_device *dev, struct agx_context *ctx, + struct drm_asahi_result_info *info, unsigned batch_idx, + bool is_compute) +{ + if (unlikely(info->status != DRM_ASAHI_STATUS_COMPLETE)) { + ctx->any_faults = true; + } + + if (likely(info->status == DRM_ASAHI_STATUS_COMPLETE && + !((dev)->debug & AGX_DBG_STATS))) + return; + + if (is_compute) { + struct drm_asahi_result_compute *r = (void *)info; + float time = (r->ts_end - r->ts_start) / dev->params.timer_frequency_hz; + + mesa_logw( + "[Batch %d] Compute %s: %.06f\n", batch_idx, + info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?", + time); + } else { + struct drm_asahi_result_render *r = (void *)info; + float time_vtx = (r->vertex_ts_end - r->vertex_ts_start) / + (float)dev->params.timer_frequency_hz; + float time_frag = (r->fragment_ts_end - r->fragment_ts_start) / + (float)dev->params.timer_frequency_hz; + mesa_logw( + "[Batch %d] Render %s: TVB %9ld/%9ld bytes (%d ovf) %c%c%c | vtx %.06f frag %.06f\n", + batch_idx, + info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?", + (long)r->tvb_usage_bytes, (long)r->tvb_size_bytes, + (int)r->num_tvb_overflows, + r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF ? 'G' : ' ', + r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN ? 'M' : ' ', + r->flags & DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED ? 'O' : ' ', + time_vtx, time_frag); + } + + if (info->fault_type != DRM_ASAHI_FAULT_NONE) { + const char *unit_name; + int unit_index; + + switch (info->unit) { + case 0x00 ... 0x9f: + unit_name = low_unit_str[info->unit & 0xf]; + unit_index = info->unit >> 4; + break; + case 0xa0 ... 0xaf: + unit_name = mid_unit_str[info->unit & 0xf]; + unit_index = 0; + break; + case 0xb0 ... 0xb7: + unit_name = "GL2CC_META"; + unit_index = info->unit & 0x7; + break; + case 0xb8: + unit_name = "GL2CC_MB"; + unit_index = 0; + break; + case 0xe0 ... 0xff: + unit_name = high_unit_str[info->unit & 0xf]; + unit_index = (info->unit >> 4) & 1; + break; + default: + unit_name = "UNK"; + unit_index = 0; + break; + } + + mesa_logw( + "[Batch %d] Fault: %s : Addr 0x%llx %c Unit %02x (%s/%d) SB 0x%02x L%d Extra 0x%x\n", + batch_idx, + info->fault_type < ARRAY_SIZE(fault_type_str) + ? fault_type_str[info->fault_type] + : "?", + (long long)info->address, info->is_read ? 'r' : 'W', info->unit, + unit_name, unit_index, info->sideband, info->level, info->extra); + + agx_debug_fault(dev, info->address); + } + + /* Obscurely, we need to tolerate faults to pass the robustness parts of the + * CTS, so we can't assert that we don't fault. But it's helpful for any sort + * of debugging to crash on fault. + */ + if (dev->debug) { + assert(info->status == DRM_ASAHI_STATUS_COMPLETE || + info->status == DRM_ASAHI_STATUS_KILLED); + } +} + static void agx_batch_print_stats(struct agx_device *dev, struct agx_batch *batch) { - unreachable("Linux UAPI not yet upstream"); + unsigned batch_idx = agx_batch_idx(batch); + + if (!batch->result) + return; + + if (batch->cdm.bo) { + agx_print_result(dev, batch->ctx, &batch->result[0].compute.info, + batch_idx, true); + } + + if (batch->vdm.bo) { + agx_print_result(dev, batch->ctx, &batch->result[1].render.info, + batch_idx, false); + } } static void @@ -175,7 +326,18 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset) assert(ctx->batch != batch); uint64_t begin_ts = ~0, end_ts = 0; - /* TODO: UAPI pending */ + if (batch->result) { + if (batch->cdm.bo) { + begin_ts = MIN2(begin_ts, batch->result[0].compute.ts_start); + end_ts = MAX2(end_ts, batch->result[0].compute.ts_end); + } + + if (batch->vdm.bo) { + begin_ts = MIN2(begin_ts, batch->result[1].render.vertex_ts_start); + end_ts = MAX2(end_ts, batch->result[1].render.fragment_ts_end); + } + } + agx_finish_batch_queries(batch, begin_ts, end_ts); if (reset) { @@ -197,7 +359,8 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset) if (writer == batch) agx_writer_remove(ctx, handle); - p_atomic_cmpxchg(&bo->writer_syncobj, batch->syncobj, 0); + p_atomic_cmpxchg(&bo->writer, + agx_bo_writer(ctx->queue_id, batch->syncobj), 0); agx_bo_unreference(agx_lookup_bo(dev, handle)); } @@ -215,6 +378,9 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset) if (!(dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC))) { agx_batch_print_stats(dev, batch); } + + util_unreference_framebuffer_state(&batch->key); + agx_batch_mark_complete(batch); } int @@ -566,8 +732,8 @@ agx_add_sync(struct drm_asahi_sync *syncs, unsigned *count, uint32_t handle) void agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, - uint32_t barriers, enum drm_asahi_cmd_type cmd_type, - void *cmdbuf) + struct drm_asahi_cmd_compute *compute, + struct drm_asahi_cmd_render *render) { struct agx_device *dev = agx_device(ctx->base.screen); struct agx_screen *screen = agx_screen(ctx->base.screen); @@ -579,6 +745,9 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, feedback = true; #endif + /* Timer queries use the feedback timestamping */ + feedback |= (batch->timestamps.size > 0); + if (!feedback) batch->result = NULL; @@ -597,6 +766,29 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, .handle = batch->syncobj, }; + /* This lock protects against a subtle race scenario: + * - Context 1 submits and registers itself as writer for a BO + * - Context 2 runs the below loop, and finds the writer syncobj + * - Context 1 is destroyed, + * - flushing all batches, unregistering itself as a writer, and + * - Destroying syncobjs for all batches + * - Context 2 submits, with a now invalid syncobj ID + * + * Since batch syncobjs are only destroyed on context destruction, we can + * protect against this scenario with a screen-wide rwlock to ensure that + * the syncobj destroy code cannot run concurrently with any other + * submission. If a submit runs before the wrlock is taken, the syncobjs + * must still exist (even if the batch was flushed and no longer a writer). + * If it runs after the wrlock is released, then by definition the + * just-destroyed syncobjs cannot be writers for any BO at that point. + * + * A screen-wide (not device-wide) rwlock is sufficient because by definition + * resources can only be implicitly shared within a screen. Any shared + * resources across screens must have been imported and will go through the + * AGX_BO_SHARED path instead, which has no race (but is slower). + */ + u_rwlock_rdlock(&screen->destroy_lock); + int handle; AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) { struct agx_bo *bo = agx_lookup_bo(dev, handle); @@ -624,6 +816,29 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, /* And keep track of the BO for cloning the out_sync */ shared_bos[shared_bo_count++] = bo; + } else { + /* Deal with BOs which are not externally shared, but which have been + * written from another context within the same screen. We also need to + * wait on these using their syncobj. + */ + uint64_t writer = p_atomic_read_relaxed(&bo->writer); + if (writer && agx_bo_writer_queue(writer) != ctx->queue_id) { + batch_debug(batch, "Waits on inter-context BO @ 0x%" PRIx64, + bo->ptr.gpu); + + agx_add_sync(in_syncs, &in_sync_count, + agx_bo_writer_syncobj(writer)); + shared_bos[shared_bo_count++] = NULL; + } + } + } + + if (dev->debug & AGX_DBG_SCRATCH) { + if (compute) + agx_scratch_debug_pre(&ctx->scratch_cs); + if (render) { + agx_scratch_debug_pre(&ctx->scratch_vs); + agx_scratch_debug_pre(&ctx->scratch_fs); } } @@ -631,9 +846,71 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, agx_add_sync(in_syncs, &in_sync_count, agx_get_in_sync(ctx)); /* Submit! */ - /* TODO: UAPI */ - (void)screen; - (void)out_sync; + struct drm_asahi_command commands[2]; + unsigned command_count = 0; + + if (compute) { + commands[command_count++] = (struct drm_asahi_command){ + .cmd_type = DRM_ASAHI_CMD_COMPUTE, + .flags = 0, + .cmd_buffer = (uint64_t)(uintptr_t)compute, + .cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute), + .result_offset = feedback ? batch->result_off : 0, + .result_size = feedback ? sizeof(union agx_batch_result) : 0, + /* Barrier on previous submission */ + .barriers = {0, 0}, + }; + } + + if (render) { + commands[command_count++] = (struct drm_asahi_command){ + .cmd_type = DRM_ASAHI_CMD_RENDER, + .flags = 0, + .cmd_buffer = (uint64_t)(uintptr_t)render, + .cmd_buffer_size = sizeof(struct drm_asahi_cmd_render), + .result_offset = + feedback ? (batch->result_off + sizeof(union agx_batch_result)) : 0, + .result_size = feedback ? sizeof(union agx_batch_result) : 0, + /* Barrier on previous submission */ + .barriers = {compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0}, + }; + } + + struct drm_asahi_submit submit = { + .flags = 0, + .queue_id = ctx->queue_id, + .result_handle = feedback ? ctx->result_buf->handle : 0, + .in_sync_count = in_sync_count, + .out_sync_count = 1, + .command_count = command_count, + .in_syncs = (uint64_t)(uintptr_t)(in_syncs), + .out_syncs = (uint64_t)(uintptr_t)(&out_sync), + .commands = (uint64_t)(uintptr_t)(&commands[0]), + }; + + int ret = dev->ops.submit(dev, &submit, ctx->result_buf->vbo_res_id); + + u_rwlock_rdunlock(&screen->destroy_lock); + + if (ret) { + if (compute) { + fprintf(stderr, "DRM_IOCTL_ASAHI_SUBMIT compute failed: %m\n"); + } + + if (render) { + struct drm_asahi_cmd_render *c = render; + fprintf( + stderr, + "DRM_IOCTL_ASAHI_SUBMIT render failed: %m (%dx%d tile %dx%d layers %d samples %d)\n", + c->fb_width, c->fb_height, c->utile_width, c->utile_height, + c->layers, c->samples); + } + + assert(0); + } + + if (ret == ENODEV) + abort(); /* Now stash our batch fence into any shared BOs. */ if (shared_bo_count) { @@ -644,6 +921,9 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, assert(out_sync_fd >= 0); for (unsigned i = 0; i < shared_bo_count; i++) { + if (!shared_bos[i]) + continue; + batch_debug(batch, "Signals shared BO @ 0x%" PRIx64, shared_bos[i]->ptr.gpu); @@ -674,7 +954,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, /* But any BOs written by active batches are ours */ assert(writer == batch && "exclusive writer"); - p_atomic_set(&bo->writer_syncobj, batch->syncobj); + p_atomic_set(&bo->writer, agx_bo_writer(ctx->queue_id, batch->syncobj)); } free(in_syncs); @@ -682,11 +962,16 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) { if (dev->debug & AGX_DBG_TRACE) { - /* agxdecode DRM commands */ - switch (cmd_type) { - default: - unreachable("Linux UAPI not yet upstream"); + if (compute) { + agxdecode_drm_cmd_compute(dev->agxdecode, &dev->params, compute, + true); } + + if (render) { + agxdecode_drm_cmd_render(dev->agxdecode, &dev->params, render, + true); + } + agxdecode_next_frame(); } @@ -695,6 +980,19 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, assert(!ret); agx_batch_print_stats(dev, batch); + + if (dev->debug & AGX_DBG_SCRATCH) { + if (compute) { + fprintf(stderr, "CS scratch:\n"); + agx_scratch_debug_post(&ctx->scratch_cs); + } + if (render) { + fprintf(stderr, "VS scratch:\n"); + agx_scratch_debug_post(&ctx->scratch_vs); + fprintf(stderr, "FS scratch:\n"); + agx_scratch_debug_post(&ctx->scratch_fs); + } + } } agx_batch_mark_submitted(batch); @@ -767,6 +1065,9 @@ agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch) if (ctx->batch == batch) ctx->batch = NULL; + /* Elide printing stats */ + batch->result = NULL; + agx_batch_cleanup(ctx, batch, true); } diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 5f49e69cfbb..b3b3d29850a 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -12,6 +12,7 @@ #include "asahi/layout/layout.h" #include "asahi/lib/agx_formats.h" #include "asahi/lib/decode.h" +#include "asahi/lib/unstable_asahi_drm.h" #include "drm-uapi/drm_fourcc.h" #include "frontend/winsys_handle.h" #include "gallium/auxiliary/renderonly/renderonly.h" @@ -25,6 +26,7 @@ #include "pipe/p_defines.h" #include "pipe/p_screen.h" #include "pipe/p_state.h" +#include "util/bitscan.h" #include "util/format/u_format.h" #include "util/half_float.h" #include "util/macros.h" @@ -933,6 +935,7 @@ agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource, { struct agx_context *ctx = agx_context(pctx); struct agx_resource *rsrc = agx_resource(resource); + struct agx_device *dev = agx_device(ctx->base.screen); /* Can't map tiled/compressed directly */ if ((usage & PIPE_MAP_DIRECTLY) && rsrc->modifier != DRM_FORMAT_MOD_LINEAR) @@ -996,11 +999,11 @@ agx_transfer_map(struct pipe_context *pctx, struct pipe_resource *resource, agx_sync_writer(ctx, staging, "GPU read staging blit"); } - agx_bo_mmap(staging->bo); + dev->ops.bo_mmap(staging->bo); return staging->bo->ptr.cpu; } - agx_bo_mmap(rsrc->bo); + dev->ops.bo_mmap(rsrc->bo); if (ail_is_level_twiddled_uncompressed(&rsrc->layout, level)) { /* Should never happen for buffers, and it's not safe */ @@ -1226,6 +1229,323 @@ agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres) } } +#define MAX_ATTACHMENTS 16 + +struct attachments { + struct drm_asahi_attachment list[MAX_ATTACHMENTS]; + size_t count; +}; + +static void +asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc, + struct pipe_surface *surf) +{ + assert(att->count < MAX_ATTACHMENTS); + int idx = att->count++; + + att->list[idx].size = rsrc->layout.size_B; + att->list[idx].pointer = rsrc->bo->ptr.gpu; + att->list[idx].order = 1; // TODO: What does this do? + att->list[idx].flags = 0; +} + +static bool +is_aligned(unsigned x, unsigned pot_alignment) +{ + assert(util_is_power_of_two_nonzero(pot_alignment)); + return (x & (pot_alignment - 1)) == 0; +} + +static void +agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, + struct attachments *att, struct agx_pool *pool, + struct agx_batch *batch, struct pipe_framebuffer_state *framebuffer, + uint64_t encoder_ptr, uint64_t encoder_id, uint64_t cmd_ta_id, + uint64_t cmd_3d_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr, + uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear, + struct asahi_bg_eot pipeline_load, + struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures, + double clear_depth, unsigned clear_stencil, + struct agx_tilebuffer_layout *tib) +{ + memset(c, 0, sizeof(*c)); + + c->encoder_ptr = encoder_ptr; + c->encoder_id = encoder_id; + c->cmd_3d_id = cmd_3d_id; + c->cmd_ta_id = cmd_ta_id; + + /* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is + * advertised, we don't set it and lower in the vertex shader. + */ + c->ppp_ctrl = 0x202; + + c->fb_width = framebuffer->width; + c->fb_height = framebuffer->height; + + c->iogpu_unk_214 = 0xc000; + + c->isp_bgobjvals = 0x300; + + struct agx_resource *zres = NULL, *sres = NULL; + + agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) { + + if (framebuffer->zsbuf) { + struct pipe_surface *zsbuf = framebuffer->zsbuf; + struct agx_resource *zsres = agx_resource(zsbuf->texture); + + unsigned level = zsbuf->u.tex.level; + unsigned first_layer = zsbuf->u.tex.first_layer; + + const struct util_format_description *desc = util_format_description( + agx_resource(zsbuf->texture)->layout.format); + + assert(desc->format == PIPE_FORMAT_Z32_FLOAT || + desc->format == PIPE_FORMAT_Z16_UNORM || + desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + desc->format == PIPE_FORMAT_S8_UINT); + + c->depth_dimensions = + (framebuffer->width - 1) | ((framebuffer->height - 1) << 15); + + if (util_format_has_depth(desc)) + zres = zsres; + else + sres = zsres; + + if (zsres->separate_stencil) + sres = zsres->separate_stencil; + + if (zres) { + bool clear = (batch->clear & PIPE_CLEAR_DEPTH); + bool load = (batch->load & PIPE_CLEAR_DEPTH); + + zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH); + zls_control.z_load_enable = !clear && load; + + c->depth_buffer_load = agx_map_texture_gpu(zres, first_layer) + + ail_get_level_offset_B(&zres->layout, level); + + c->depth_buffer_store = c->depth_buffer_load; + c->depth_buffer_partial = c->depth_buffer_load; + + /* Main stride in pages */ + assert((zres->layout.depth_px == 1 || + is_aligned(zres->layout.layer_stride_B, AIL_PAGESIZE)) && + "Page aligned Z layers"); + + unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE; + c->depth_buffer_load_stride = ((stride_pages - 1) << 14) | 1; + c->depth_buffer_store_stride = c->depth_buffer_load_stride; + c->depth_buffer_partial_stride = c->depth_buffer_load_stride; + + assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile"); + + if (ail_is_compressed(&zres->layout)) { + c->depth_meta_buffer_load = + agx_map_texture_gpu(zres, 0) + + zres->layout.metadata_offset_B + + (first_layer * zres->layout.compression_layer_stride_B) + + zres->layout.level_offsets_compressed_B[level]; + + /* Meta stride in cache lines */ + assert(is_aligned(zres->layout.compression_layer_stride_B, + AIL_CACHELINE) && + "Cacheline aligned Z meta layers"); + unsigned stride_lines = + zres->layout.compression_layer_stride_B / AIL_CACHELINE; + c->depth_meta_buffer_load_stride = (stride_lines - 1) << 14; + + c->depth_meta_buffer_store = c->depth_meta_buffer_load; + c->depth_meta_buffer_store_stride = + c->depth_meta_buffer_load_stride; + c->depth_meta_buffer_partial = c->depth_meta_buffer_load; + c->depth_meta_buffer_partial_stride = + c->depth_meta_buffer_load_stride; + + zls_control.z_compress_1 = true; + zls_control.z_compress_2 = true; + } + + if (zres->base.format == PIPE_FORMAT_Z16_UNORM) { + const float scale = 0xffff; + c->isp_bgobjdepth = + (uint16_t)(SATURATE(clear_depth) * scale + 0.5f); + zls_control.z_format = AGX_ZLS_FORMAT_16; + c->iogpu_unk_214 |= 0x40000; + } else { + c->isp_bgobjdepth = fui(clear_depth); + zls_control.z_format = AGX_ZLS_FORMAT_32F; + } + } + + if (sres) { + bool clear = (batch->clear & PIPE_CLEAR_STENCIL); + bool load = (batch->load & PIPE_CLEAR_STENCIL); + + zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL); + zls_control.s_load_enable = !clear && load; + + c->stencil_buffer_load = + agx_map_texture_gpu(sres, first_layer) + + ail_get_level_offset_B(&sres->layout, level); + + c->stencil_buffer_store = c->stencil_buffer_load; + c->stencil_buffer_partial = c->stencil_buffer_load; + + /* Main stride in pages */ + assert((sres->layout.depth_px == 1 || + is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) && + "Page aligned S layers"); + unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE; + c->stencil_buffer_load_stride = ((stride_pages - 1) << 14) | 1; + c->stencil_buffer_store_stride = c->stencil_buffer_load_stride; + c->stencil_buffer_partial_stride = c->stencil_buffer_load_stride; + + if (ail_is_compressed(&sres->layout)) { + c->stencil_meta_buffer_load = + agx_map_texture_gpu(sres, 0) + + sres->layout.metadata_offset_B + + (first_layer * sres->layout.compression_layer_stride_B) + + sres->layout.level_offsets_compressed_B[level]; + + /* Meta stride in cache lines */ + assert(is_aligned(sres->layout.compression_layer_stride_B, + AIL_CACHELINE) && + "Cacheline aligned S meta layers"); + unsigned stride_lines = + sres->layout.compression_layer_stride_B / AIL_CACHELINE; + c->stencil_meta_buffer_load_stride = (stride_lines - 1) << 14; + + c->stencil_meta_buffer_store = c->stencil_meta_buffer_load; + c->stencil_meta_buffer_store_stride = + c->stencil_meta_buffer_load_stride; + c->stencil_meta_buffer_partial = c->stencil_meta_buffer_load; + c->stencil_meta_buffer_partial_stride = + c->stencil_meta_buffer_load_stride; + + zls_control.s_compress_1 = true; + zls_control.s_compress_2 = true; + } + + c->isp_bgobjvals |= clear_stencil; + } + } + } + + if (clear_pipeline_textures) + c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S; + else + c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES; + + if (zres && !(batch->clear & PIPE_CLEAR_DEPTH)) + c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S; + + if (sres && !(batch->clear & PIPE_CLEAR_STENCIL)) + c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S; + + if (dev->debug & AGX_DBG_NOCLUSTER) + c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING; + + /* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */ + if (tib->nr_samples > 1 && framebuffer->zsbuf) + c->flags |= ASAHI_RENDER_MSAA_ZS; + + memcpy(&c->load_pipeline_bind, &pipeline_clear.counts, + sizeof(struct agx_counts_packed)); + + memcpy(&c->store_pipeline_bind, &pipeline_store.counts, + sizeof(struct agx_counts_packed)); + + memcpy(&c->partial_reload_pipeline_bind, &pipeline_load.counts, + sizeof(struct agx_counts_packed)); + + memcpy(&c->partial_store_pipeline_bind, &pipeline_store.counts, + sizeof(struct agx_counts_packed)); + + /* XXX is this correct? */ + c->load_pipeline = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4); + c->store_pipeline = pipeline_store.usc | 4; + c->partial_reload_pipeline = pipeline_load.usc | 4; + c->partial_store_pipeline = pipeline_store.usc | 4; + + c->utile_width = tib->tile_size.width; + c->utile_height = tib->tile_size.height; + + c->samples = tib->nr_samples; + c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1); + + c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl; + c->sample_size = tib->sample_size_B; + + /* XXX OR 0x80 with eMRT? */ + c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(tib), 2048) / 2048; + + float tan_60 = 1.732051f; + c->merge_upper_x = fui(tan_60 / framebuffer->width); + c->merge_upper_y = fui(tan_60 / framebuffer->height); + + c->scissor_array = scissor_ptr; + c->depth_bias_array = depth_bias_ptr; + c->visibility_result_buffer = visibility_result_ptr; + + c->vertex_sampler_array = + batch->sampler_heap.bo ? batch->sampler_heap.bo->ptr.gpu : 0; + c->vertex_sampler_count = batch->sampler_heap.count; + c->vertex_sampler_max = batch->sampler_heap.count + 1; + + /* In the future we could split the heaps if useful */ + c->fragment_sampler_array = c->vertex_sampler_array; + c->fragment_sampler_count = c->vertex_sampler_count; + c->fragment_sampler_max = c->vertex_sampler_max; + + /* If a tile is empty, we do not want to process it, as the redundant + * roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of + * memory bandwidth. Any draw marks a tile as non-empty, so we only need to + * process empty tiles if the background+EOT programs have a side effect. + * This is the case exactly when there is an attachment we are clearing (some + * attachment A in clear and in resolve <==> non-empty intersection). + * + * This case matters a LOT for performance in workloads that split batches. + */ + if (batch->clear & batch->resolve) + c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES; + + for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) { + if (!framebuffer->cbufs[i]) + continue; + + asahi_add_attachment(att, agx_resource(framebuffer->cbufs[i]->texture), + framebuffer->cbufs[i]); + } + + if (framebuffer->zsbuf) { + struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture); + + asahi_add_attachment(att, rsrc, framebuffer->zsbuf); + + if (rsrc->separate_stencil) { + asahi_add_attachment(att, rsrc->separate_stencil, framebuffer->zsbuf); + } + } + + c->fragment_attachments = (uint64_t)(uintptr_t)&att->list[0]; + c->fragment_attachment_count = att->count; + + if (batch->vs_scratch) { + c->flags |= ASAHI_RENDER_VERTEX_SPILLS; + c->vertex_helper_arg = batch->ctx->scratch_vs.buf->ptr.gpu; + c->vertex_helper_cfg = batch->vs_preamble_scratch << 16; + c->vertex_helper_program = dev->helper->ptr.gpu | 1; + } + if (batch->fs_scratch) { + c->fragment_helper_arg = batch->ctx->scratch_fs.buf->ptr.gpu; + c->fragment_helper_cfg = batch->fs_preamble_scratch << 16; + c->fragment_helper_program = dev->helper->ptr.gpu | 1; + } +} + /* * context */ @@ -1255,23 +1575,66 @@ agx_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, } } -void -agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) +static void +agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch, + struct drm_asahi_cmd_compute *cmdbuf) { struct agx_device *dev = agx_device(ctx->base.screen); - assert(agx_batch_is_active(batch)); - assert(!agx_batch_is_submitted(batch)); + /* Finalize the encoder */ + agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _) + ; - /* Make sure there's something to submit. */ - if (!batch->clear) { - agx_batch_reset(ctx, batch); - return; - } + agx_batch_add_bo(batch, batch->cdm.bo); if (batch->cs_scratch) agx_batch_add_bo(batch, ctx->scratch_cs.buf); + unsigned cmdbuf_id = agx_get_global_id(dev); + unsigned encoder_id = agx_get_global_id(dev); + + *cmdbuf = (struct drm_asahi_cmd_compute){ + .flags = 0, + .encoder_ptr = batch->cdm.bo->ptr.gpu, + .encoder_end = batch->cdm.bo->ptr.gpu + + (batch->cdm.current - (uint8_t *)batch->cdm.bo->ptr.cpu), + .helper_arg = 0, + .helper_cfg = 0, + .helper_program = 0, + .iogpu_unk_40 = 0, + .sampler_array = + batch->sampler_heap.bo ? batch->sampler_heap.bo->ptr.gpu : 0, + .sampler_count = batch->sampler_heap.count, + .sampler_max = batch->sampler_heap.count + 1, + .encoder_id = encoder_id, + .cmd_id = cmdbuf_id, + .unk_mask = 0xffffffff, + }; + + if (batch->cs_scratch) { + // The commented out lines *may* be related to subgroup-level preemption, + // which we can't support without implementing threadgroup memory in the + // helper. Disable them for now. + + // cmdbuf->iogpu_unk_40 = 0x1c; + cmdbuf->helper_arg = ctx->scratch_cs.buf->ptr.gpu; + cmdbuf->helper_cfg = batch->cs_preamble_scratch << 16; + // cmdbuf->helper_cfg |= 0x40; + cmdbuf->helper_program = dev->helper->ptr.gpu | 1; + } +} + +static void +agx_flush_render(struct agx_context *ctx, struct agx_batch *batch, + struct drm_asahi_cmd_render *cmdbuf, struct attachments *att) +{ + struct agx_device *dev = agx_device(ctx->base.screen); + + if (batch->vs_scratch) + agx_batch_add_bo(batch, ctx->scratch_vs.buf); + if (batch->fs_scratch) + agx_batch_add_bo(batch, ctx->scratch_fs.buf); + assert(batch->initialized); /* Finalize the encoder */ @@ -1313,22 +1676,46 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) */ agx_batch_add_bo(batch, batch->vdm.bo); - if (batch->vs_scratch) - agx_batch_add_bo(batch, ctx->scratch_vs.buf); - if (batch->fs_scratch) - agx_batch_add_bo(batch, ctx->scratch_fs.buf); + unsigned cmd_ta_id = agx_get_global_id(dev); + unsigned cmd_3d_id = agx_get_global_id(dev); + unsigned encoder_id = agx_get_global_id(dev); - /* TODO: Linux UAPI submission */ - (void)dev; - (void)zbias; - (void)scissor; - (void)clear_pipeline_textures; - (void)pipeline_store; - (void)pipeline_background; - (void)pipeline_background_partial; + agx_cmdbuf(dev, cmdbuf, att, &batch->pool, batch, &batch->key, + batch->vdm.bo->ptr.gpu, encoder_id, cmd_ta_id, cmd_3d_id, scissor, + zbias, agx_get_occlusion_heap(batch), pipeline_background, + pipeline_background_partial, pipeline_store, + clear_pipeline_textures, batch->clear_depth, batch->clear_stencil, + &batch->tilebuffer_layout); +} - unreachable("Linux UAPI not yet upstream"); - agx_batch_submit(ctx, batch, 0, 0, NULL); +void +agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) +{ + assert(agx_batch_is_active(batch)); + assert(!agx_batch_is_submitted(batch)); + + struct attachments att = {.count = 0}; + struct drm_asahi_cmd_render render; + struct drm_asahi_cmd_compute compute; + bool has_vdm = false, has_cdm = false; + + if (batch->cdm.bo) { + agx_flush_compute(ctx, batch, &compute); + has_cdm = true; + } + + if (batch->vdm.bo && (batch->clear || batch->initialized)) { + agx_flush_render(ctx, batch, &render, &att); + has_vdm = true; + } + + if (!has_cdm && !has_vdm) { + agx_batch_reset(ctx, batch); + return; + } + + agx_batch_submit(ctx, batch, has_cdm ? &compute : NULL, + has_vdm ? &render : NULL); } static void @@ -1336,6 +1723,7 @@ agx_destroy_context(struct pipe_context *pctx) { struct agx_device *dev = agx_device(pctx->screen); struct agx_context *ctx = agx_context(pctx); + struct agx_screen *screen = agx_screen(pctx->screen); /* Batch state needs to be freed on completion, and we don't want to yank * buffers out from in-progress GPU jobs to avoid faults, so just wait until @@ -1357,6 +1745,11 @@ agx_destroy_context(struct pipe_context *pctx) agx_bo_unreference(ctx->result_buf); + /* Lock around the syncobj destruction, to avoid racing + * command submission in another context. + **/ + u_rwlock_wrlock(&screen->destroy_lock); + drmSyncobjDestroy(dev->fd, ctx->in_sync_obj); drmSyncobjDestroy(dev->fd, ctx->dummy_syncobj); if (ctx->in_sync_fd != -1) @@ -1367,12 +1760,16 @@ agx_destroy_context(struct pipe_context *pctx) drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj); } + u_rwlock_wrunlock(&screen->destroy_lock); + pipe_resource_reference(&ctx->heap, NULL); agx_scratch_fini(&ctx->scratch_vs); agx_scratch_fini(&ctx->scratch_fs); agx_scratch_fini(&ctx->scratch_cs); + agx_destroy_command_queue(dev, ctx->queue_id); + ralloc_free(ctx); } @@ -1426,6 +1823,20 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags) } pctx->const_uploader = pctx->stream_uploader; + uint32_t priority = 2; + if (flags & PIPE_CONTEXT_PRIORITY_LOW) + priority = 3; + else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM) + priority = 2; + else if (flags & PIPE_CONTEXT_PRIORITY_HIGH) + priority = 1; + + ctx->queue_id = agx_create_command_queue(agx_device(screen), + DRM_ASAHI_QUEUE_CAP_RENDER | + DRM_ASAHI_QUEUE_CAP_BLIT | + DRM_ASAHI_QUEUE_CAP_COMPUTE, + priority); + pctx->destroy = agx_destroy_context; pctx->flush = agx_flush; pctx->clear = agx_clear; @@ -1461,9 +1872,10 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags) ctx->blitter = util_blitter_create(pctx); - ctx->result_buf = agx_bo_create( - agx_device(screen), sizeof(union agx_batch_result) * AGX_MAX_BATCHES, - AGX_BO_WRITEBACK, "Batch result buffer"); + ctx->result_buf = + agx_bo_create(agx_device(screen), + (2 * sizeof(union agx_batch_result)) * AGX_MAX_BATCHES, + AGX_BO_WRITEBACK, "Batch result buffer"); assert(ctx->result_buf); /* Sync object/FD used for NATIVE_FENCE_FD. */ @@ -1764,6 +2176,10 @@ agx_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_TES_LAYER_VIEWPORT: return true; + case PIPE_CAP_CONTEXT_PRIORITY_MASK: + return PIPE_CONTEXT_PRIORITY_LOW | PIPE_CONTEXT_PRIORITY_MEDIUM | + PIPE_CONTEXT_PRIORITY_HIGH; + default: return u_pipe_screen_get_param_defaults(pscreen, param); } @@ -2179,6 +2595,18 @@ agx_get_timestamp(struct pipe_screen *pscreen) return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev)); } +static void +agx_screen_get_device_uuid(struct pipe_screen *pscreen, char *uuid) +{ + agx_get_device_uuid(agx_device(pscreen), uuid); +} + +static void +agx_screen_get_driver_uuid(struct pipe_screen *pscreen, char *uuid) +{ + agx_get_driver_uuid(uuid); +} + struct pipe_screen * agx_screen_create(int fd, struct renderonly *ro, const struct pipe_screen_config *config) @@ -2186,6 +2614,13 @@ agx_screen_create(int fd, struct renderonly *ro, struct agx_screen *agx_screen; struct pipe_screen *screen; + /* Refuse to probe. There is no stable UAPI yet. Upstream Mesa cannot be used + * yet with Asahi. Do not try. Do not patch out this check. Do not teach + * others about patching this check. Do not distribute upstream Mesa with + * this check patched out. + */ + return NULL; + agx_screen = rzalloc(NULL, struct agx_screen); if (!agx_screen) return NULL; @@ -2202,6 +2637,7 @@ agx_screen_create(int fd, struct renderonly *ro, agx_screen->dev.fd = fd; agx_screen->dev.ro = ro; + u_rwlock_init(&agx_screen->destroy_lock); /* Try to open an AGX device */ if (!agx_open_device(agx_screen, &agx_screen->dev)) { @@ -2209,8 +2645,6 @@ agx_screen_create(int fd, struct renderonly *ro, return NULL; } - agx_screen->queue_id = agx_create_command_queue(&agx_screen->dev, 0); - screen->destroy = agx_destroy_screen; screen->get_screen_fd = agx_screen_get_fd; screen->get_name = agx_get_name; @@ -2220,6 +2654,8 @@ agx_screen_create(int fd, struct renderonly *ro, screen->get_shader_param = agx_get_shader_param; screen->get_compute_param = agx_get_compute_param; screen->get_paramf = agx_get_paramf; + screen->get_device_uuid = agx_screen_get_device_uuid; + screen->get_driver_uuid = agx_screen_get_driver_uuid; screen->is_format_supported = agx_is_format_supported; screen->query_dmabuf_modifiers = agx_query_dmabuf_modifiers; screen->query_memory_info = agx_query_memory_info; diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 0d47fa447a2..3d139a3c71b 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -6,6 +6,7 @@ #pragma once +#include #include "asahi/compiler/agx_compile.h" #include "asahi/genxml/agx_pack.h" #include "asahi/layout/layout.h" @@ -18,6 +19,7 @@ #include "asahi/lib/agx_uvs.h" #include "asahi/lib/pool.h" #include "asahi/lib/shaders/geometry.h" +#include "asahi/lib/unstable_asahi_drm.h" #include "compiler/nir/nir_lower_blend.h" #include "compiler/shader_enums.h" #include "gallium/auxiliary/util/u_blitter.h" @@ -28,6 +30,7 @@ #include "util/bitset.h" #include "util/disk_cache.h" #include "util/hash_table.h" +#include "util/rwlock.h" #include "util/u_range.h" #include "agx_bg_eot.h" #include "agx_helpers.h" @@ -357,6 +360,8 @@ struct agx_stage { }; union agx_batch_result { + struct drm_asahi_result_render render; + struct drm_asahi_result_compute compute; }; /* This is a firmware limit. It should be possible to raise to 2048 in the @@ -632,6 +637,9 @@ struct agx_context { uint64_t generation[AGX_MAX_BATCHES]; } batches; + /* Queue handle */ + uint32_t queue_id; + struct agx_batch *batch; struct agx_bo *result_buf; @@ -872,8 +880,9 @@ struct agx_screen { struct pipe_screen pscreen; struct agx_device dev; struct disk_cache *disk_cache; - /* Queue handle */ - uint32_t queue_id; + + /* Lock to protect syncobj usage vs. destruction in context destroy */ + struct u_rwlock destroy_lock; }; static inline struct agx_screen * @@ -1053,9 +1062,12 @@ agx_batch_add_bo(struct agx_batch *batch, struct agx_bo *bo) #define AGX_BATCH_FOREACH_BO_HANDLE(batch, handle) \ BITSET_FOREACH_SET(handle, (batch)->bo_list.set, batch->bo_list.bit_count) +struct drm_asahi_cmd_compute; +struct drm_asahi_cmd_render; + void agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, - uint32_t barriers, enum drm_asahi_cmd_type cmd_type, - void *cmdbuf); + struct drm_asahi_cmd_compute *compute, + struct drm_asahi_cmd_render *render); void agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch); void agx_flush_batch_for_reason(struct agx_context *ctx, diff --git a/src/gallium/drivers/asahi/meson.build b/src/gallium/drivers/asahi/meson.build index 03793328cf8..f0f845ac615 100644 --- a/src/gallium/drivers/asahi/meson.build +++ b/src/gallium/drivers/asahi/meson.build @@ -20,7 +20,7 @@ files_asahi = files( libasahi = static_library( 'asahi', [files_asahi], - include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src, inc_asahi], + include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src, inc_asahi, inc_virtio_gpu, inc_virtio_vdrm], c_args : [c_msvc_compat_args], gnu_symbol_visibility : 'hidden', dependencies : [idep_nir, idep_mesautil, idep_agx_pack, dep_libdrm, idep_mesaclc],