From c64a2bbff5a3370bca7f40d6c9f41fb3cd6cf4ba Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 9 Apr 2025 09:31:53 -0400 Subject: [PATCH] asahi: port to stable uAPI Signed-off-by: Alyssa Rosenzweig Acked-by: Eric Engestrom Part-of: --- src/asahi/drm-shim/asahi_noop.c | 20 +- src/asahi/genxml/cmdbuf.xml | 5 + src/asahi/lib/.editorconfig | 4 - src/asahi/lib/agx_bo.c | 5 +- src/asahi/lib/agx_bo.h | 9 +- src/asahi/lib/agx_device.c | 244 +++++-------- src/asahi/lib/agx_device.h | 59 ++-- src/asahi/lib/agx_device_virtio.c | 227 +++--------- src/asahi/lib/agx_va.c | 2 +- src/asahi/lib/asahi_proto.h | 17 +- src/asahi/lib/decode.c | 160 +++++---- src/asahi/lib/decode.h | 14 +- src/asahi/vulkan/hk_buffer.c | 22 +- src/asahi/vulkan/hk_cmd_buffer.h | 1 - src/asahi/vulkan/hk_cmd_draw.c | 2 - src/asahi/vulkan/hk_device.c | 2 +- src/asahi/vulkan/hk_device_memory.c | 4 +- src/asahi/vulkan/hk_physical_device.c | 18 +- src/asahi/vulkan/hk_query_pool.c | 17 +- src/asahi/vulkan/hk_queue.c | 459 +++++++++++-------------- src/broadcom/simulator/v3d_simulator.c | 2 +- src/gallium/drivers/asahi/agx_batch.c | 370 +++++++------------- src/gallium/drivers/asahi/agx_pipe.c | 332 +++++++----------- src/gallium/drivers/asahi/agx_query.c | 4 +- src/gallium/drivers/asahi/agx_state.h | 18 +- 25 files changed, 760 insertions(+), 1257 deletions(-) delete mode 100644 src/asahi/lib/.editorconfig diff --git a/src/asahi/drm-shim/asahi_noop.c b/src/asahi/drm-shim/asahi_noop.c index b69c08b8452..50266e1fa49 100644 --- a/src/asahi/drm-shim/asahi_noop.c +++ b/src/asahi/drm-shim/asahi_noop.c @@ -6,21 +6,17 @@ #include -#include "../lib/unstable_asahi_drm.h" #include "drm-shim/drm_shim.h" +#include "drm-uapi/asahi_drm.h" bool drm_shim_driver_prefers_first_render_node = true; static const struct drm_asahi_params_global params = { - .unstable_uabi_version = DRM_ASAHI_UNSTABLE_UABI_VERSION, .gpu_generation = 13, .gpu_variant = 'G', .gpu_revision = 0, - .vm_user_start = 0x1000000, - .vm_user_end = 0x5000000, - .vm_usc_start = 0, - .vm_usc_end = 0, - .vm_page_size = 4096, + .vm_start = 0x1000000, + .vm_end = 0x5000000, }; struct asahi_bo { @@ -48,12 +44,6 @@ asahi_ioctl_noop(int fd, unsigned long request, void *arg) return 0; } -static int -asahi_ioctl_submit(int fd, unsigned long request, void *arg) -{ - return 0; -} - static int asahi_ioctl_gem_create(int fd, unsigned long request, void *arg) { @@ -110,12 +100,12 @@ static ioctl_fn_t driver_ioctls[] = { [DRM_ASAHI_GET_PARAMS] = asahi_ioctl_get_param, [DRM_ASAHI_VM_CREATE] = asahi_ioctl_noop, [DRM_ASAHI_VM_DESTROY] = asahi_ioctl_noop, + [DRM_ASAHI_VM_BIND] = asahi_ioctl_noop, [DRM_ASAHI_GEM_CREATE] = asahi_ioctl_gem_create, [DRM_ASAHI_GEM_MMAP_OFFSET] = asahi_ioctl_gem_mmap_offset, - [DRM_ASAHI_GEM_BIND] = asahi_ioctl_noop, [DRM_ASAHI_QUEUE_CREATE] = asahi_ioctl_noop, [DRM_ASAHI_QUEUE_DESTROY] = asahi_ioctl_noop, - [DRM_ASAHI_SUBMIT] = asahi_ioctl_submit, + [DRM_ASAHI_SUBMIT] = asahi_ioctl_noop, }; void diff --git a/src/asahi/genxml/cmdbuf.xml b/src/asahi/genxml/cmdbuf.xml index efa7a5b2e32..c497b791283 100644 --- a/src/asahi/genxml/cmdbuf.xml +++ b/src/asahi/genxml/cmdbuf.xml @@ -1119,4 +1119,9 @@ + + + + + diff --git a/src/asahi/lib/.editorconfig b/src/asahi/lib/.editorconfig deleted file mode 100644 index 7d29c2a2be9..00000000000 --- a/src/asahi/lib/.editorconfig +++ /dev/null @@ -1,4 +0,0 @@ -[unstable_asahi_drm.h] -indent_style = tab -indent_size = 8 -max_line_length = 100 diff --git a/src/asahi/lib/agx_bo.c b/src/asahi/lib/agx_bo.c index 534280de2b4..8dea9795901 100644 --- a/src/asahi/lib/agx_bo.c +++ b/src/asahi/lib/agx_bo.c @@ -11,6 +11,7 @@ #include "util/ralloc.h" #include "agx_device.h" #include "decode.h" +#include "layout.h" /* Helper to calculate the bucket index of a BO */ static unsigned @@ -355,8 +356,8 @@ agx_bo_create(struct agx_device *dev, size_t size, unsigned align, assert(size > 0); /* BOs are allocated in pages */ - size = ALIGN_POT(size, (size_t)dev->params.vm_page_size); - align = MAX2(align, dev->params.vm_page_size); + size = ALIGN_POT(size, AIL_PAGESIZE); + align = MAX2(align, AIL_PAGESIZE); /* See if we have a BO already in the cache */ bo = agx_bo_cache_fetch(dev, size, align, flags, true); diff --git a/src/asahi/lib/agx_bo.h b/src/asahi/lib/agx_bo.h index babc715eb75..4752cd5fb03 100644 --- a/src/asahi/lib/agx_bo.h +++ b/src/asahi/lib/agx_bo.h @@ -88,6 +88,11 @@ struct agx_bo { /* Process-local index */ uint32_t handle; + /* Handle to refer to this BO in uAPI calls. This is either the GEM handle + * on native Linux, or the virtio resource ID with virtgpu. + */ + uint32_t uapi_handle; + /* DMA-BUF fd clone for adding fences to imports/exports */ int prime_fd; @@ -99,10 +104,6 @@ struct agx_bo { /* For debugging */ const char *label; - - /* virtio blob_id */ - uint32_t blob_id; - uint32_t vbo_res_id; }; static inline uint32_t diff --git a/src/asahi/lib/agx_device.c b/src/asahi/lib/agx_device.c index a0285e64efa..08f9d6fde9d 100644 --- a/src/asahi/lib/agx_device.c +++ b/src/asahi/lib/agx_device.c @@ -8,6 +8,7 @@ #include "agx_device.h" #include #include "clc/asahi_clc.h" +#include "drm-uapi/asahi_drm.h" #include "util/macros.h" #include "util/ralloc.h" #include "util/timespec.h" @@ -18,6 +19,7 @@ #include "agx_scratch.h" #include "decode.h" #include "glsl_types.h" +#include "layout.h" #include "libagx_dgc.h" #include "libagx_shaders.h" @@ -34,7 +36,6 @@ #include "util/u_printf.h" #include "git_sha1.h" #include "nir_serialize.h" -#include "unstable_asahi_drm.h" #include "vdrm.h" static inline int @@ -80,7 +81,7 @@ static const struct debug_named_value agx_debug_options[] = { void agx_bo_free(struct agx_device *dev, struct agx_bo *bo) { - const uint64_t handle = bo->handle; + const uint64_t handle = bo->uapi_handle; if (bo->_map) munmap(bo->_map, bo->size); @@ -103,33 +104,58 @@ agx_bo_free(struct agx_device *dev, struct agx_bo *bo) } static int +agx_drm_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops, + uint32_t count) +{ + struct drm_asahi_vm_bind vm_bind = { + .num_binds = count, + .vm_id = dev->vm_id, + .userptr = (uintptr_t)ops, + .stride = sizeof(*ops), + }; + + int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_VM_BIND, &vm_bind); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_VM_BIND failed\n"); + } + + return ret; +} + +/* + * Convenience helper to bind a single BO regardless of kernel module. + */ +int agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, - size_t size_B, uint64_t offset_B, uint32_t flags, bool unbind) + size_t size_B, uint64_t offset_B, uint32_t flags) { assert((size_B % 16384) == 0 && "alignment required"); assert((offset_B % 16384) == 0 && "alignment required"); assert((addr % 16384) == 0 && "alignment required"); - struct drm_asahi_gem_bind gem_bind = { - .op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND, + struct drm_asahi_gem_bind_op op = { .flags = flags, - .handle = bo ? bo->handle : 0, - .vm_id = dev->vm_id, + .handle = bo ? bo->uapi_handle : 0, .offset = offset_B, .range = size_B, .addr = addr, }; - assert((size_B % 16384) == 0 && "page alignment required"); - assert((offset_B % 16384) == 0 && "page alignment required"); - assert((addr % 16384) == 0 && "page alignment required"); + return dev->ops.bo_bind(dev, &op, 1); +} - int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND, &gem_bind); - if (ret) { - fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND failed: %m (handle=%d)\n", - bo ? bo->handle : 0); - } +int +agx_bind_timestamps(struct agx_device *dev, struct agx_bo *bo, uint32_t *handle) +{ + struct drm_asahi_gem_bind_object bind = { + .op = DRM_ASAHI_BIND_OBJECT_OP_BIND, + .flags = DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS, + .handle = bo->uapi_handle, + .range = bo->size, + }; + int ret = dev->ops.bo_bind_object(dev, &bind); + *handle = bind.object_handle; return ret; } @@ -146,10 +172,10 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, struct drm_asahi_gem_create gem_create = {.size = size}; if (flags & AGX_BO_WRITEBACK) - gem_create.flags |= ASAHI_GEM_WRITEBACK; + gem_create.flags |= DRM_ASAHI_GEM_WRITEBACK; if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) { - gem_create.flags |= ASAHI_GEM_VM_PRIVATE; + gem_create.flags |= DRM_ASAHI_GEM_VM_PRIVATE; gem_create.vm_id = dev->vm_id; } @@ -173,7 +199,7 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, bo->size = gem_create.size; bo->align = align; bo->flags = flags; - bo->handle = handle; + bo->handle = bo->uapi_handle = handle; bo->prime_fd = -1; enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0; @@ -184,12 +210,12 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align, return NULL; } - uint32_t bind = ASAHI_BIND_READ; + uint32_t bind = DRM_ASAHI_BIND_READ; if (!(flags & AGX_BO_READONLY)) { - bind |= ASAHI_BIND_WRITE; + bind |= DRM_ASAHI_BIND_WRITE; } - ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind, false); + ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind); if (ret) { agx_bo_free(dev, bo); return NULL; @@ -203,7 +229,8 @@ agx_bo_mmap(struct agx_device *dev, struct agx_bo *bo) { assert(bo->_map == NULL && "not double mapped"); - struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = bo->handle}; + struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = + bo->uapi_handle}; int ret; ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset); @@ -245,7 +272,7 @@ agx_bo_import(struct agx_device *dev, int fd) if (!bo->size) { bo->dev = dev; bo->size = lseek(fd, 0, SEEK_END); - bo->align = dev->params.vm_page_size; + bo->align = AIL_PAGESIZE; /* Sometimes this can fail and return -1. size of -1 is not * a nice thing for mmap to try mmap. Be more robust also @@ -255,7 +282,7 @@ agx_bo_import(struct agx_device *dev, int fd) pthread_mutex_unlock(&dev->bo_map_lock); return NULL; } - if (bo->size & (dev->params.vm_page_size - 1)) { + if (bo->size & (AIL_PAGESIZE - 1)) { fprintf( stderr, "import failed: BO is not a multiple of the page size (0x%llx bytes)\n", @@ -281,11 +308,13 @@ agx_bo_import(struct agx_device *dev, int fd) } if (dev->is_virtio) { - bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, bo->handle); + bo->uapi_handle = vdrm_handle_to_res_id(dev->vdrm, bo->handle); + } else { + bo->uapi_handle = bo->handle; } - ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0, - ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); + ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0, + DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE); if (ret) { fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n", (long long)bo->va->addr); @@ -361,38 +390,24 @@ agx_bo_export(struct agx_device *dev, struct agx_bo *bo) } static int -agx_bo_bind_object(struct agx_device *dev, struct agx_bo *bo, - uint32_t *object_handle, size_t size_B, uint64_t offset_B, - uint32_t flags) +agx_bo_bind_object(struct agx_device *dev, + struct drm_asahi_gem_bind_object *bind) { - struct drm_asahi_gem_bind_object gem_bind = { - .op = ASAHI_BIND_OBJECT_OP_BIND, - .flags = flags, - .handle = bo->handle, - .vm_id = 0, - .offset = offset_B, - .range = size_B, - }; - - int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind); + int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, bind); if (ret) { fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n", - bo->handle); + bind->handle); } - *object_handle = gem_bind.object_handle; - return ret; } static int -agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle, - uint32_t flags) +agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle) { struct drm_asahi_gem_bind_object gem_bind = { - .op = ASAHI_BIND_OBJECT_OP_UNBIND, - .flags = flags, + .op = DRM_ASAHI_BIND_OBJECT_OP_UNBIND, .object_handle = object_handle, }; @@ -406,23 +421,6 @@ agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle, return ret; } -static void -agx_get_global_ids(struct agx_device *dev) -{ - dev->next_global_id = 0; - dev->last_global_id = 0x1000000; -} - -uint64_t -agx_get_global_id(struct agx_device *dev) -{ - if (unlikely(dev->next_global_id >= dev->last_global_id)) { - agx_get_global_ids(dev); - } - - return dev->next_global_id++; -} - static ssize_t agx_get_params(struct agx_device *dev, void *buf, size_t size) { @@ -452,7 +450,7 @@ agx_submit(struct agx_device *dev, struct drm_asahi_submit *submit, const agx_device_ops_t agx_device_drm_ops = { .bo_alloc = agx_bo_alloc, - .bo_bind = agx_bo_bind, + .bo_bind = agx_drm_bo_bind, .bo_mmap = agx_bo_mmap, .get_params = agx_get_params, .submit = agx_submit, @@ -475,16 +473,12 @@ gcd(uint64_t n, uint64_t m) static void agx_init_timestamps(struct agx_device *dev) { - uint64_t ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC); - - dev->timestamp_to_ns.num = NSEC_PER_SEC / ts_gcd; - dev->timestamp_to_ns.den = dev->params.timer_frequency_hz / ts_gcd; - - uint64_t user_ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC); + uint64_t user_ts_gcd = + gcd(dev->params.command_timestamp_frequency_hz, NSEC_PER_SEC); dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd; dev->user_timestamp_to_ns.den = - dev->params.user_timestamp_frequency_hz / user_ts_gcd; + dev->params.command_timestamp_frequency_hz / user_ts_gcd; } bool @@ -533,47 +527,6 @@ agx_open_device(void *memctx, struct agx_device *dev) } assert(params_size >= sizeof(dev->params)); - /* Refuse to probe. */ - if (dev->params.unstable_uabi_version != DRM_ASAHI_UNSTABLE_UABI_VERSION) { - fprintf( - stderr, - "You are attempting to use upstream Mesa with a downstream kernel!\n" - "This WILL NOT work.\n" - "The Asahi UABI is unstable and NOT SUPPORTED in upstream Mesa.\n" - "UABI related code in upstream Mesa is not for use!\n" - "\n" - "Do NOT attempt to patch out checks, you WILL break your system.\n" - "Do NOT report bugs.\n" - "Do NOT ask Mesa developers for support.\n" - "Do NOT write guides about how to patch out these checks.\n" - "Do NOT package patches to Mesa to bypass this.\n" - "\n" - "~~~\n" - "This is not a place of honor.\n" - "No highly esteemed deed is commemorated here.\n" - "Nothing valued is here.\n" - "\n" - "What is here was dangerous and repulsive to us.\n" - "This message is a warning about danger.\n" - "\n" - "The danger is still present, in your time, as it was in ours.\n" - "The danger is unleashed only if you substantially disturb this place physically.\n" - "This place is best shunned and left uninhabited.\n" - "~~~\n" - "\n" - "THIS IS NOT A BUG. THIS IS YOU DOING SOMETHING BROKEN!\n"); - abort(); - } - - uint64_t incompat = - dev->params.feat_incompat & (~AGX_SUPPORTED_INCOMPAT_FEATURES); - if (incompat) { - fprintf(stderr, "Missing GPU incompat features: 0x%" PRIx64 "\n", - incompat); - assert(0); - return false; - } - assert(dev->params.gpu_generation >= 13); const char *variant = " Unknown"; switch (dev->params.gpu_variant) { @@ -611,14 +564,10 @@ agx_open_device(void *memctx, struct agx_device *dev) assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS); reservation += LIBAGX_PRINTF_BUFFER_SIZE; - dev->guard_size = dev->params.vm_page_size; - if (dev->params.vm_usc_start) { - dev->shader_base = dev->params.vm_usc_start; - } else { - // Put the USC heap at the bottom of the user address space, 4GiB aligned - dev->shader_base = ALIGN_POT(MAX2(dev->params.vm_user_start, reservation), - 0x100000000ull); - } + dev->guard_size = AIL_PAGESIZE; + // Put the USC heap at the bottom of the user address space, 4GiB aligned + dev->shader_base = + ALIGN_POT(MAX2(dev->params.vm_start, reservation), 0x100000000ull); if (dev->shader_base < reservation) { /* Our robustness implementation requires the bottom unmapped */ @@ -631,8 +580,8 @@ agx_open_device(void *memctx, struct agx_device *dev) // Put the user heap after the USC heap uint64_t user_start = dev->shader_base + shader_size; - assert(dev->shader_base >= dev->params.vm_user_start); - assert(user_start < dev->params.vm_user_end); + assert(dev->shader_base >= dev->params.vm_start); + assert(user_start < dev->params.vm_end); dev->agxdecode = agxdecode_new_context(dev->shader_base); @@ -652,8 +601,8 @@ agx_open_device(void *memctx, struct agx_device *dev) // reasonable use case. uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30); struct drm_asahi_vm_create vm_create = { - .kernel_start = dev->params.vm_user_end - kernel_size, - .kernel_end = dev->params.vm_user_end, + .kernel_start = dev->params.vm_end - kernel_size, + .kernel_end = dev->params.vm_end, }; uint64_t user_size = vm_create.kernel_start - user_start; @@ -671,8 +620,6 @@ agx_open_device(void *memctx, struct agx_device *dev) dev->vm_id = vm_create.vm_id; - agx_get_global_ids(dev); - glsl_type_singleton_init_or_ref(); if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) { @@ -698,8 +645,8 @@ agx_open_device(void *memctx, struct agx_device *dev) */ { void *bo = agx_bo_create(dev, 16384, 0, 0, "Zero page"); - int ret = dev->ops.bo_bind(dev, bo, AGX_ZERO_PAGE_ADDRESS, 16384, 0, - ASAHI_BIND_READ, false); + int ret = agx_bo_bind(dev, bo, AGX_ZERO_PAGE_ADDRESS, 16384, 0, + DRM_ASAHI_BIND_READ); if (ret) { fprintf(stderr, "Failed to bind zero page"); return false; @@ -709,9 +656,9 @@ agx_open_device(void *memctx, struct agx_device *dev) void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK, "Printf/abort"); - ret = dev->ops.bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS, - LIBAGX_PRINTF_BUFFER_SIZE, 0, - ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); + ret = agx_bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS, + LIBAGX_PRINTF_BUFFER_SIZE, 0, + DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE); if (ret) { fprintf(stderr, "Failed to bind printf buffer"); return false; @@ -738,8 +685,8 @@ agx_close_device(struct agx_device *dev) } uint32_t -agx_create_command_queue(struct agx_device *dev, uint32_t caps, - uint32_t priority) +agx_create_command_queue(struct agx_device *dev, + enum drm_asahi_priority priority) { if (dev->debug & AGX_DBG_1QUEUE) { @@ -753,9 +700,8 @@ agx_create_command_queue(struct agx_device *dev, uint32_t caps, struct drm_asahi_queue_create queue_create = { .vm_id = dev->vm_id, - .queue_caps = caps, .priority = priority, - .flags = 0, + .usc_exec_base = dev->shader_base, }; int ret = @@ -873,28 +819,14 @@ agx_debug_fault(struct agx_device *dev, uint64_t addr) uint64_t agx_get_gpu_timestamp(struct agx_device *dev) { - if (dev->params.feat_compat & DRM_ASAHI_FEAT_GETTIME) { - struct drm_asahi_get_time get_time = {.flags = 0, .extensions = 0}; + struct drm_asahi_get_time get_time = {.flags = 0}; - int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time); - if (ret) { - fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n"); - } else { - return get_time.gpu_timestamp; - } + int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time); + if (ret) { + fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n"); } -#if DETECT_ARCH_AARCH64 - uint64_t ret; - __asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret)); - return ret; -#elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64 - /* Maps to the above when run under FEX without thunking */ - uint32_t high, low; - __asm__ volatile("rdtsc" : "=a"(low), "=d"(high)); - return (uint64_t)low | ((uint64_t)high << 32); -#else -#error "invalid architecture for asahi" -#endif + + return get_time.gpu_timestamp; } /* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */ diff --git a/src/asahi/lib/agx_device.h b/src/asahi/lib/agx_device.h index 45a69b4d2d3..266cf17d3e9 100644 --- a/src/asahi/lib/agx_device.h +++ b/src/asahi/lib/agx_device.h @@ -7,6 +7,7 @@ #include #include +#include "drm-uapi/asahi_drm.h" #include "util/ralloc.h" #include "util/simple_mtx.h" #include "util/sparse_array.h" @@ -18,16 +19,11 @@ #include "decode.h" #include "layout.h" #include "libagx_dgc.h" -#include "unstable_asahi_drm.h" #include "vdrm.h" #include "asahi_proto.h" -// TODO: this is a lie right now -static const uint64_t AGX_SUPPORTED_INCOMPAT_FEATURES = - DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION; - enum agx_dbg { AGX_DBG_TRACE = BITFIELD_BIT(0), AGX_DBG_BODUMP = BITFIELD_BIT(1), @@ -69,7 +65,6 @@ struct nir_shader; #define BARRIER_COMPUTE (1 << DRM_ASAHI_SUBQUEUE_COMPUTE) struct agx_submit_virt { - uint32_t vbo_res_id; uint32_t extres_count; struct asahi_ccmd_submit_res *extres; }; @@ -77,21 +72,23 @@ struct agx_submit_virt { typedef struct { struct agx_bo *(*bo_alloc)(struct agx_device *dev, size_t size, size_t align, enum agx_bo_flags flags); - int (*bo_bind)(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, - size_t size_B, uint64_t offset_B, uint32_t flags, - bool unbind); + int (*bo_bind)(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops, + uint32_t count); void (*bo_mmap)(struct agx_device *dev, struct agx_bo *bo); ssize_t (*get_params)(struct agx_device *dev, void *buf, size_t size); int (*submit)(struct agx_device *dev, struct drm_asahi_submit *submit, struct agx_submit_virt *virt); - int (*bo_bind_object)(struct agx_device *dev, struct agx_bo *bo, - uint32_t *object_handle, size_t size_B, - uint64_t offset_B, uint32_t flags); - int (*bo_unbind_object)(struct agx_device *dev, uint32_t object_handle, - uint32_t flags); - + int (*bo_bind_object)(struct agx_device *dev, + struct drm_asahi_gem_bind_object *bind); + int (*bo_unbind_object)(struct agx_device *dev, uint32_t object_handle); } agx_device_ops_t; +int agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, + size_t size_B, uint64_t offset_B, uint32_t flags); + +int agx_bind_timestamps(struct agx_device *dev, struct agx_bo *bo, + uint32_t *handle); + struct agx_device { uint32_t debug; @@ -100,7 +97,6 @@ struct agx_device { char name[64]; struct drm_asahi_params_global params; - uint64_t next_global_id, last_global_id; bool is_virtio; agx_device_ops_t ops; @@ -160,11 +156,6 @@ struct agx_device { /* Simplified device selection */ enum agx_chip chip; - struct { - uint64_t num; - uint64_t den; - } timestamp_to_ns; - struct { uint64_t num; uint64_t den; @@ -185,7 +176,7 @@ agx_bo_map(struct agx_bo *bo) static inline bool agx_has_soft_fault(struct agx_device *dev) { - return (dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS) && + return (dev->params.features & DRM_ASAHI_FEATURE_SOFT_FAULTS) && !(dev->debug & AGX_DBG_NOSOFT); } @@ -208,10 +199,8 @@ agx_lookup_bo(struct agx_device *dev, uint32_t handle) return util_sparse_array_get(&dev->bo_map, handle); } -uint64_t agx_get_global_id(struct agx_device *dev); - -uint32_t agx_create_command_queue(struct agx_device *dev, uint32_t caps, - uint32_t priority); +uint32_t agx_create_command_queue(struct agx_device *dev, + enum drm_asahi_priority priority); int agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id); int agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd); @@ -221,12 +210,6 @@ void agx_debug_fault(struct agx_device *dev, uint64_t addr); uint64_t agx_get_gpu_timestamp(struct agx_device *dev); -static inline uint64_t -agx_gpu_time_to_ns(struct agx_device *dev, uint64_t gpu_time) -{ - return (gpu_time * dev->timestamp_to_ns.num) / dev->timestamp_to_ns.den; -} - static inline uint64_t agx_gpu_timestamp_to_ns(struct agx_device *dev, uint64_t gpu_timestamp) { @@ -245,8 +228,14 @@ struct agx_va *agx_va_alloc(struct agx_device *dev, uint64_t size_B, uint64_t fixed_va); void agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind); -static inline bool -agx_supports_timestamps(const struct agx_device *dev) +static inline struct drm_asahi_cmd_header +agx_cmd_header(bool compute, uint16_t barrier_vdm, uint16_t barrier_cdm) { - return (dev->params.feat_compat & DRM_ASAHI_FEAT_USER_TIMESTAMPS); + return (struct drm_asahi_cmd_header){ + .cmd_type = compute ? DRM_ASAHI_CMD_COMPUTE : DRM_ASAHI_CMD_RENDER, + .size = compute ? sizeof(struct drm_asahi_cmd_compute) + : sizeof(struct drm_asahi_cmd_render), + .vdm_barrier = barrier_vdm, + .cdm_barrier = barrier_cdm, + }; } diff --git a/src/asahi/lib/agx_device_virtio.c b/src/asahi/lib/agx_device_virtio.c index 4fe7f641178..347faa4e26c 100644 --- a/src/asahi/lib/agx_device_virtio.c +++ b/src/asahi/lib/agx_device_virtio.c @@ -9,7 +9,6 @@ #include #include "drm-uapi/virtgpu_drm.h" -#include "unstable_asahi_drm.h" #include "vdrm.h" @@ -67,14 +66,14 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align, }; if (flags & AGX_BO_WRITEBACK) - req.flags |= ASAHI_GEM_WRITEBACK; + req.flags |= DRM_ASAHI_GEM_WRITEBACK; uint32_t blob_flags = VIRTGPU_BLOB_FLAG_USE_MAPPABLE | VIRTGPU_BLOB_FLAG_USE_SHAREABLE; - req.bind_flags = ASAHI_BIND_READ; + req.bind_flags = DRM_ASAHI_BIND_READ; if (!(flags & AGX_BO_READONLY)) { - req.bind_flags |= ASAHI_BIND_WRITE; + req.bind_flags |= DRM_ASAHI_BIND_WRITE; } uint32_t blob_id = p_atomic_inc_return(&dev->next_blob_id); @@ -86,7 +85,6 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align, return NULL; } - /* Note: optional, can zero out for not mapping for sparse */ req.addr = va->addr; req.blob_id = blob_id; req.vm_id = dev->vm_id; @@ -111,55 +109,46 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align, bo->flags = flags; bo->handle = handle; bo->prime_fd = -1; - bo->blob_id = blob_id; bo->va = va; - bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, handle); + bo->uapi_handle = vdrm_handle_to_res_id(dev->vdrm, handle); return bo; } static int -agx_virtio_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, - size_t size_B, uint64_t offset_B, uint32_t flags, - bool unbind) +agx_virtio_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops, + uint32_t count) { - struct asahi_ccmd_gem_bind_req req = { - .hdr.cmd = ASAHI_CCMD_GEM_BIND, - .hdr.len = sizeof(struct asahi_ccmd_gem_bind_req), - .bind = { - .op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND, - .flags = flags, - .vm_id = dev->vm_id, - .handle = bo ? bo->vbo_res_id : 0, - .offset = offset_B, - .range = size_B, - .addr = addr, - }}; + size_t payload_size = sizeof(*ops) * count; + size_t req_len = sizeof(struct asahi_ccmd_vm_bind_req) + payload_size; + struct asahi_ccmd_vm_bind_req *req = calloc(1, req_len); - int ret = vdrm_send_req(dev->vdrm, &req.hdr, false); + *req = (struct asahi_ccmd_vm_bind_req){ + .hdr.cmd = ASAHI_CCMD_VM_BIND, + .hdr.len = sizeof(struct asahi_ccmd_vm_bind_req), + .vm_id = dev->vm_id, + .stride = sizeof(*ops), + .count = count, + }; + + memcpy(req->payload, ops, payload_size); + + int ret = vdrm_send_req(dev->vdrm, &req->hdr, false); if (ret) { - fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d (handle=%d)\n", ret, - bo ? bo->handle : 0); + fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d\n", ret); } return ret; } static int -agx_virtio_bo_bind_object(struct agx_device *dev, struct agx_bo *bo, - uint32_t *object_handle, size_t size_B, - uint64_t offset_B, uint32_t flags) +agx_virtio_bo_bind_object(struct agx_device *dev, + struct drm_asahi_gem_bind_object *bind) { struct asahi_ccmd_gem_bind_object_req req = { .hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT, .hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req), - .bind = { - .op = ASAHI_BIND_OBJECT_OP_BIND, - .flags = flags, - .vm_id = 0, - .handle = bo->vbo_res_id, - .offset = offset_B, - .range = size_B, - }}; + .bind = *bind, + }; struct asahi_ccmd_gem_bind_object_rsp *rsp; @@ -170,25 +159,23 @@ agx_virtio_bo_bind_object(struct agx_device *dev, struct agx_bo *bo, if (ret || rsp->ret) { fprintf(stderr, "ASAHI_CCMD_GEM_BIND_OBJECT bind failed: %d:%d (handle=%d)\n", - ret, rsp->ret, bo->handle); + ret, rsp->ret, bind->handle); } if (!rsp->ret) - *object_handle = rsp->object_handle; + bind->object_handle = rsp->object_handle; return rsp->ret; } static int -agx_virtio_bo_unbind_object(struct agx_device *dev, uint32_t object_handle, - uint32_t flags) +agx_virtio_bo_unbind_object(struct agx_device *dev, uint32_t object_handle) { struct asahi_ccmd_gem_bind_object_req req = { .hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT, .hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req), .bind = { - .op = ASAHI_BIND_OBJECT_OP_UNBIND, - .flags = flags, + .op = DRM_ASAHI_BIND_OBJECT_OP_UNBIND, .object_handle = object_handle, }}; @@ -228,89 +215,24 @@ agx_virtio_get_params(struct agx_device *dev, void *buf, size_t size) sizeof(struct asahi_ccmd_get_params_rsp) + size); int ret = vdrm_send_req(vdrm, &req.hdr, true); - if (ret) - goto out; - - if (rsp->virt_uabi_version != ASAHI_PROTO_UNSTABLE_UABI_VERSION) { - fprintf(stderr, "Virt UABI mismatch: Host %d, Mesa %d\n", - rsp->virt_uabi_version, ASAHI_PROTO_UNSTABLE_UABI_VERSION); - return -1; - } + if (!ret) + return ret; ret = rsp->ret; - if (!ret) { - memcpy(buf, &rsp->payload, size); - return size; - } + if (ret) + return ret; -out: - return ret; -} - -static void -agx_virtio_serialize_attachments(char **ptr, uint64_t attachments, - uint32_t count) -{ - if (!count) - return; - - size_t attachments_size = sizeof(struct drm_asahi_attachment) * count; - memcpy(*ptr, (char *)(uintptr_t)attachments, attachments_size); - *ptr += attachments_size; + memcpy(buf, &rsp->payload, size); + return size; } static int agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit, struct agx_submit_virt *virt) { - struct drm_asahi_command *commands = - (struct drm_asahi_command *)(uintptr_t)submit->commands; - struct drm_asahi_sync *in_syncs = - (struct drm_asahi_sync *)(uintptr_t)submit->in_syncs; - struct drm_asahi_sync *out_syncs = - (struct drm_asahi_sync *)(uintptr_t)submit->out_syncs; - size_t req_len = sizeof(struct asahi_ccmd_submit_req); - - for (int i = 0; i < submit->command_count; i++) { - switch (commands[i].cmd_type) { - case DRM_ASAHI_CMD_COMPUTE: { - struct drm_asahi_cmd_compute *compute = - (struct drm_asahi_cmd_compute *)(uintptr_t)commands[i].cmd_buffer; - req_len += sizeof(struct drm_asahi_command) + - sizeof(struct drm_asahi_cmd_compute); - req_len += - compute->attachment_count * sizeof(struct drm_asahi_attachment); - - if (compute->extensions) { - assert(*(uint32_t *)(uintptr_t)compute->extensions == - ASAHI_COMPUTE_EXT_TIMESTAMPS); - req_len += sizeof(struct drm_asahi_cmd_compute_user_timestamps); - } - break; - } - - case DRM_ASAHI_CMD_RENDER: { - struct drm_asahi_cmd_render *render = - (struct drm_asahi_cmd_render *)(uintptr_t)commands[i].cmd_buffer; - req_len += sizeof(struct drm_asahi_command) + - sizeof(struct drm_asahi_cmd_render); - req_len += render->fragment_attachment_count * - sizeof(struct drm_asahi_attachment); - req_len += render->vertex_attachment_count * - sizeof(struct drm_asahi_attachment); - - if (render->extensions) { - assert(*(uint32_t *)(uintptr_t)render->extensions == - ASAHI_RENDER_EXT_TIMESTAMPS); - req_len += sizeof(struct drm_asahi_cmd_render_user_timestamps); - } - break; - } - - default: - return EINVAL; - } - } + struct drm_asahi_sync *syncs = + (struct drm_asahi_sync *)(uintptr_t)submit->syncs; + size_t req_len = sizeof(struct asahi_ccmd_submit_req) + submit->cmdbuf_size; size_t extres_size = sizeof(struct asahi_ccmd_submit_res) * virt->extres_count; @@ -320,55 +242,13 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit, (struct asahi_ccmd_submit_req *)calloc(1, req_len); req->queue_id = submit->queue_id; - req->result_res_id = virt->vbo_res_id; - req->command_count = submit->command_count; req->extres_count = virt->extres_count; + req->cmdbuf_size = submit->cmdbuf_size; char *ptr = (char *)&req->payload; - for (int i = 0; i < submit->command_count; i++) { - memcpy(ptr, &commands[i], sizeof(struct drm_asahi_command)); - ptr += sizeof(struct drm_asahi_command); - - memcpy(ptr, (char *)(uintptr_t)commands[i].cmd_buffer, - commands[i].cmd_buffer_size); - ptr += commands[i].cmd_buffer_size; - - switch (commands[i].cmd_type) { - case DRM_ASAHI_CMD_RENDER: { - struct drm_asahi_cmd_render *render = - (struct drm_asahi_cmd_render *)(uintptr_t)commands[i].cmd_buffer; - agx_virtio_serialize_attachments(&ptr, render->vertex_attachments, - render->vertex_attachment_count); - agx_virtio_serialize_attachments(&ptr, render->fragment_attachments, - render->fragment_attachment_count); - if (render->extensions) { - struct drm_asahi_cmd_render_user_timestamps *ext = - (struct drm_asahi_cmd_render_user_timestamps *)(uintptr_t) - render->extensions; - assert(!ext->next); - memcpy(ptr, (void *)ext, sizeof(*ext)); - ptr += sizeof(*ext); - } - break; - } - case DRM_ASAHI_CMD_COMPUTE: { - struct drm_asahi_cmd_compute *compute = - (struct drm_asahi_cmd_compute *)(uintptr_t)commands[i].cmd_buffer; - agx_virtio_serialize_attachments(&ptr, compute->attachments, - compute->attachment_count); - if (compute->extensions) { - struct drm_asahi_cmd_compute_user_timestamps *ext = - (struct drm_asahi_cmd_compute_user_timestamps *)(uintptr_t) - compute->extensions; - assert(!ext->next); - memcpy(ptr, (void *)ext, sizeof(*ext)); - ptr += sizeof(*ext); - } - break; - } - } - } + memcpy(ptr, (void *)(uintptr_t)submit->cmdbuf, req->cmdbuf_size); + ptr += req->cmdbuf_size; memcpy(ptr, virt->extres, extres_size); ptr += extres_size; @@ -376,18 +256,12 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit, req->hdr.cmd = ASAHI_CCMD_SUBMIT; req->hdr.len = req_len; - struct drm_virtgpu_execbuffer_syncobj *vdrm_in_syncs = calloc( - submit->in_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj)); - for (int i = 0; i < submit->in_sync_count; i++) { - vdrm_in_syncs[i].handle = in_syncs[i].handle; - vdrm_in_syncs[i].point = in_syncs[i].timeline_value; - } - - struct drm_virtgpu_execbuffer_syncobj *vdrm_out_syncs = calloc( - submit->out_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj)); - for (int i = 0; i < submit->out_sync_count; i++) { - vdrm_out_syncs[i].handle = out_syncs[i].handle; - vdrm_out_syncs[i].point = out_syncs[i].timeline_value; + uint32_t total_syncs = submit->in_sync_count + submit->out_sync_count; + struct drm_virtgpu_execbuffer_syncobj *vdrm_syncs = + calloc(total_syncs, sizeof(struct drm_virtgpu_execbuffer_syncobj)); + for (int i = 0; i < total_syncs; i++) { + vdrm_syncs[i].handle = syncs[i].handle; + vdrm_syncs[i].point = syncs[i].timeline_value; } struct vdrm_execbuf_params p = { @@ -395,15 +269,14 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit, .ring_idx = 1, .req = &req->hdr, .num_in_syncobjs = submit->in_sync_count, - .in_syncobjs = vdrm_in_syncs, + .in_syncobjs = vdrm_syncs, .num_out_syncobjs = submit->out_sync_count, - .out_syncobjs = vdrm_out_syncs, + .out_syncobjs = vdrm_syncs + submit->in_sync_count, }; int ret = vdrm_execbuf(dev->vdrm, &p); - free(vdrm_out_syncs); - free(vdrm_in_syncs); + free(vdrm_syncs); free(req); return ret; } diff --git a/src/asahi/lib/agx_va.c b/src/asahi/lib/agx_va.c index 9908a1b5774..a2b56fc8146 100644 --- a/src/asahi/lib/agx_va.c +++ b/src/asahi/lib/agx_va.c @@ -56,7 +56,7 @@ agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind) return; if (unbind) { - dev->ops.bo_bind(dev, NULL, va->addr, va->size_B, 0, 0, true); + agx_bo_bind(dev, NULL, va->addr, va->size_B, 0, DRM_ASAHI_BIND_UNBIND); } struct util_vma_heap *heap = agx_vma_heap(dev, va->flags); diff --git a/src/asahi/lib/asahi_proto.h b/src/asahi/lib/asahi_proto.h index c0f317c2737..f098d20f9a6 100644 --- a/src/asahi/lib/asahi_proto.h +++ b/src/asahi/lib/asahi_proto.h @@ -7,7 +7,7 @@ #ifndef ASAHI_PROTO_H_ #define ASAHI_PROTO_H_ -#define ASAHI_PROTO_UNSTABLE_UABI_VERSION 1 +#include "drm-uapi/asahi_drm.h" /** * Defines the layout of shmem buffer used for host->guest communication. @@ -37,7 +37,7 @@ enum asahi_ccmd { ASAHI_CCMD_IOCTL_SIMPLE, ASAHI_CCMD_GET_PARAMS, ASAHI_CCMD_GEM_NEW, - ASAHI_CCMD_GEM_BIND, + ASAHI_CCMD_VM_BIND, ASAHI_CCMD_SUBMIT, ASAHI_CCMD_GEM_BIND_OBJECT, }; @@ -93,7 +93,6 @@ DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_get_params_req) struct asahi_ccmd_get_params_rsp { struct vdrm_ccmd_rsp hdr; int32_t ret; - uint32_t virt_uabi_version; uint8_t payload[]; }; @@ -108,11 +107,14 @@ struct asahi_ccmd_gem_new_req { }; DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_new_req) -struct asahi_ccmd_gem_bind_req { +struct asahi_ccmd_vm_bind_req { struct vdrm_ccmd_req hdr; - struct drm_asahi_gem_bind bind; + uint32_t vm_id; + uint32_t stride; + uint32_t count; + uint8_t payload[]; }; -DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_bind_req) +DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_vm_bind_req) struct asahi_ccmd_gem_bind_object_req { struct vdrm_ccmd_req hdr; @@ -138,8 +140,7 @@ struct asahi_ccmd_submit_req { struct vdrm_ccmd_req hdr; uint32_t flags; uint32_t queue_id; - uint32_t result_res_id; - uint32_t command_count; + uint32_t cmdbuf_size; uint32_t extres_count; uint8_t payload[]; diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index a8c2a42b531..5fee99c27ef 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -18,7 +18,6 @@ #include "util/u_hexdump.h" #include "decode.h" -#include "unstable_asahi_drm.h" struct libagxdecode_config lib_config; @@ -801,106 +800,119 @@ agxdecode_helper(struct agxdecode_ctx *ctx, const char *prefix, uint64_t helper) } } -void +static void agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx, struct drm_asahi_params_global *params, struct drm_asahi_cmd_render *c, bool verbose) { - agxdecode_dump_file_open(); - - DUMP_FIELD(c, "%llx", flags); - DUMP_FIELD(c, "0x%llx", encoder_ptr); - agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_vdm, verbose, - params, NULL); - DUMP_FIELD(c, "0x%x", encoder_id); - DUMP_FIELD(c, "0x%x", cmd_ta_id); - DUMP_FIELD(c, "0x%x", cmd_3d_id); + DUMP_FIELD(c, "%x", flags); + DUMP_FIELD(c, "0x%llx", vdm_ctrl_stream_base); + agxdecode_stateful(ctx, c->vdm_ctrl_stream_base, "Encoder", agxdecode_vdm, + verbose, params, NULL); DUMP_FIELD(c, "0x%x", ppp_ctrl); DUMP_FIELD(c, "0x%llx", ppp_multisamplectl); DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control"); - DUMP_FIELD(c, "0x%llx", depth_buffer_load); - DUMP_FIELD(c, "0x%llx", depth_buffer_store); - DUMP_FIELD(c, "0x%llx", depth_buffer_partial); - DUMP_FIELD(c, "0x%llx", stencil_buffer_load); - DUMP_FIELD(c, "0x%llx", stencil_buffer_store); - DUMP_FIELD(c, "0x%llx", stencil_buffer_partial); - DUMP_FIELD(c, "0x%llx", scissor_array); - DUMP_FIELD(c, "0x%llx", depth_bias_array); - DUMP_FIELD(c, "%d", fb_width); - DUMP_FIELD(c, "%d", fb_height); + DUMP_FIELD(c, "0x%llx", depth.base); + DUMP_FIELD(c, "0x%llx", depth.comp_base); + DUMP_FIELD(c, "%u", depth.stride); + DUMP_FIELD(c, "%u", depth.comp_stride); + DUMP_FIELD(c, "0x%llx", stencil.base); + DUMP_FIELD(c, "0x%llx", stencil.comp_base); + DUMP_FIELD(c, "%u", stencil.stride); + DUMP_FIELD(c, "%u", stencil.comp_stride); + DUMP_FIELD(c, "0x%llx", isp_scissor_base); + DUMP_FIELD(c, "0x%llx", isp_dbias_base); + DUMP_FIELD(c, "%d", width_px); + DUMP_FIELD(c, "%d", height_px); DUMP_FIELD(c, "%d", layers); DUMP_FIELD(c, "%d", samples); - DUMP_FIELD(c, "%d", sample_size); - DUMP_FIELD(c, "%d", tib_blocks); - DUMP_FIELD(c, "%d", utile_width); - DUMP_FIELD(c, "%d", utile_height); - DUMP_FIELD(c, "0x%x", load_pipeline); - DUMP_FIELD(c, "0x%x", load_pipeline_bind); - agxdecode_stateful(ctx, decode_usc(ctx, c->load_pipeline & ~0x7), - "Load pipeline", agxdecode_usc, verbose, params, NULL); - DUMP_FIELD(c, "0x%x", store_pipeline); - DUMP_FIELD(c, "0x%x", store_pipeline_bind); - agxdecode_stateful(ctx, decode_usc(ctx, c->store_pipeline & ~0x7), - "Store pipeline", agxdecode_usc, verbose, params, NULL); - DUMP_FIELD(c, "0x%x", partial_reload_pipeline); - DUMP_FIELD(c, "0x%x", partial_reload_pipeline_bind); - agxdecode_stateful(ctx, decode_usc(ctx, c->partial_reload_pipeline & ~0x7), + DUMP_FIELD(c, "%d", sample_size_B); + DUMP_FIELD(c, "%d", utile_width_px); + DUMP_FIELD(c, "%d", utile_height_px); + DUMP_FIELD(c, "0x%x", bg.usc); + DUMP_FIELD(c, "0x%x", bg.rsrc_spec); + agxdecode_stateful(ctx, decode_usc(ctx, c->bg.usc & ~0x7), "Load pipeline", + agxdecode_usc, verbose, params, NULL); + DUMP_FIELD(c, "0x%x", eot.usc); + DUMP_FIELD(c, "0x%x", eot.rsrc_spec); + agxdecode_stateful(ctx, decode_usc(ctx, c->eot.usc & ~0x7), "Store pipeline", + agxdecode_usc, verbose, params, NULL); + DUMP_FIELD(c, "0x%x", partial_bg.usc); + DUMP_FIELD(c, "0x%x", partial_bg.rsrc_spec); + agxdecode_stateful(ctx, decode_usc(ctx, c->partial_bg.usc & ~0x7), "Partial reload pipeline", agxdecode_usc, verbose, params, NULL); - DUMP_FIELD(c, "0x%x", partial_store_pipeline); - DUMP_FIELD(c, "0x%x", partial_store_pipeline_bind); - agxdecode_stateful(ctx, decode_usc(ctx, c->partial_store_pipeline & ~0x7), + DUMP_FIELD(c, "0x%x", partial_eot.usc); + DUMP_FIELD(c, "0x%x", partial_eot.rsrc_spec); + agxdecode_stateful(ctx, decode_usc(ctx, c->partial_eot.usc & ~0x7), "Partial store pipeline", agxdecode_usc, verbose, params, NULL); - DUMP_FIELD(c, "0x%x", depth_dimensions); + DUMP_FIELD(c, "0x%x", isp_zls_pixels); DUMP_FIELD(c, "0x%x", isp_bgobjdepth); DUMP_FIELD(c, "0x%x", isp_bgobjvals); - agxdecode_sampler_heap(ctx, c->vertex_sampler_array, - c->vertex_sampler_count); + agxdecode_sampler_heap(ctx, c->sampler_heap, c->sampler_count); - /* Linux driver doesn't use this, at least for now */ - assert(c->fragment_sampler_array == c->vertex_sampler_array); - assert(c->fragment_sampler_count == c->vertex_sampler_count); - - DUMP_FIELD(c, "%d", vertex_attachment_count); - struct drm_asahi_attachment *vertex_attachments = - (void *)(uintptr_t)c->vertex_attachments; - for (unsigned i = 0; i < c->vertex_attachment_count; i++) { - DUMP_FIELD((&vertex_attachments[i]), "0x%x", order); - DUMP_FIELD((&vertex_attachments[i]), "0x%llx", size); - DUMP_FIELD((&vertex_attachments[i]), "0x%llx", pointer); - } - DUMP_FIELD(c, "%d", fragment_attachment_count); - struct drm_asahi_attachment *fragment_attachments = - (void *)(uintptr_t)c->fragment_attachments; - for (unsigned i = 0; i < c->fragment_attachment_count; i++) { - DUMP_FIELD((&fragment_attachments[i]), "0x%x", order); - DUMP_FIELD((&fragment_attachments[i]), "0x%llx", size); - DUMP_FIELD((&fragment_attachments[i]), "0x%llx", pointer); - } - - agxdecode_helper(ctx, "Vertex", c->vertex_helper_program); - agxdecode_helper(ctx, "Fragment", c->fragment_helper_program); + agxdecode_helper(ctx, "Vertex", c->vertex_helper.binary); + agxdecode_helper(ctx, "Fragment", c->fragment_helper.binary); } -void +static void agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx, struct drm_asahi_params_global *params, struct drm_asahi_cmd_compute *c, bool verbose) +{ + DUMP_FIELD(c, "%x", flags); + DUMP_FIELD(c, "0x%llx", cdm_ctrl_stream_base); + agxdecode_stateful(ctx, c->cdm_ctrl_stream_base, "Encoder", agxdecode_cdm, + verbose, params, NULL); + + agxdecode_sampler_heap(ctx, c->sampler_heap, c->sampler_count); + agxdecode_helper(ctx, "Compute", c->helper.binary); +} + +static void +agxdecode_drm_attachments(const char *name, struct drm_asahi_attachment *atts, + size_t size) +{ + fprintf(agxdecode_dump_stream, "%s attachments:\n", name); + unsigned count = size / sizeof(struct drm_asahi_attachment); + for (unsigned i = 0; i < count; i++) { + DUMP_FIELD((&atts[i]), "0x%llx", size); + DUMP_FIELD((&atts[i]), "0x%llx", pointer); + } +} + +void +agxdecode_drm_cmdbuf(struct agxdecode_ctx *ctx, + struct drm_asahi_params_global *params, + struct util_dynarray *cmdbuf, bool verbose) { agxdecode_dump_file_open(); - DUMP_FIELD(c, "%llx", flags); - DUMP_FIELD(c, "0x%llx", encoder_ptr); - agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_cdm, verbose, - params, NULL); - DUMP_FIELD(c, "0x%x", encoder_id); - DUMP_FIELD(c, "0x%x", cmd_id); + for (unsigned offs = 0; offs < cmdbuf->size;) { + struct drm_asahi_cmd_header *header = + (void *)((uint8_t *)cmdbuf->data) + offs; + offs += sizeof(*header); + void *data = (void *)((uint8_t *)cmdbuf->data) + offs; - agxdecode_sampler_heap(ctx, c->sampler_array, c->sampler_count); - agxdecode_helper(ctx, "Compute", c->helper_program); + if (header->cmd_type == DRM_ASAHI_CMD_RENDER) { + agxdecode_drm_cmd_render(ctx, params, data, verbose); + } else if (header->cmd_type == DRM_ASAHI_CMD_COMPUTE) { + agxdecode_drm_cmd_compute(ctx, params, data, verbose); + } else if (header->cmd_type == DRM_ASAHI_SET_VERTEX_ATTACHMENTS) { + agxdecode_drm_attachments("Vertex", data, header->size); + } else if (header->cmd_type == DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS) { + agxdecode_drm_attachments("Fragment", data, header->size); + } else if (header->cmd_type == DRM_ASAHI_SET_COMPUTE_ATTACHMENTS) { + agxdecode_drm_attachments("Compute", data, header->size); + } else { + unreachable("Invalid command type"); + } + + offs += header->size; + } } static void diff --git a/src/asahi/lib/decode.h b/src/asahi/lib/decode.h index 46aa6669efa..e2e68e0fc64 100644 --- a/src/asahi/lib/decode.h +++ b/src/asahi/lib/decode.h @@ -10,7 +10,7 @@ #include #include "agx_bo.h" -#include "unstable_asahi_drm.h" +#include "drm-uapi/asahi_drm.h" struct agxdecode_ctx; @@ -28,15 +28,11 @@ void agxdecode_cmdstream(struct agxdecode_ctx *ctx, unsigned cmdbuf_index, void agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap, unsigned nr_entries); -void agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx, - struct drm_asahi_params_global *params, - struct drm_asahi_cmd_render *cmdbuf, - bool verbose); +struct util_dynarray; -void agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx, - struct drm_asahi_params_global *params, - struct drm_asahi_cmd_compute *cmdbuf, - bool verbose); +void agxdecode_drm_cmdbuf(struct agxdecode_ctx *ctx, + struct drm_asahi_params_global *params, + struct util_dynarray *cmdbuf, bool verbose); void agxdecode_dump_file_open(void); diff --git a/src/asahi/vulkan/hk_buffer.c b/src/asahi/vulkan/hk_buffer.c index c7f62c5f9b2..88a055eb151 100644 --- a/src/asahi/vulkan/hk_buffer.c +++ b/src/asahi/vulkan/hk_buffer.c @@ -81,17 +81,9 @@ VkResult hk_bind_scratch(struct hk_device *dev, struct agx_va *va, unsigned offset_B, size_t size_B) { - VkResult result = VK_SUCCESS; - - for (unsigned i = 0; i < size_B; i += AIL_PAGESIZE) { - result = dev->dev.ops.bo_bind(&dev->dev, dev->sparse.write, - va->addr + offset_B + i, AIL_PAGESIZE, 0, - ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); - if (result != VK_SUCCESS) - return result; - } - - return result; + return agx_bo_bind( + &dev->dev, dev->sparse.write, va->addr + offset_B, size_B, 0, + DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE | DRM_ASAHI_BIND_SINGLE_PAGE); } VKAPI_ATTR VkResult VKAPI_CALL @@ -253,11 +245,9 @@ hk_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount, if (buffer->va) { VK_FROM_HANDLE(hk_device, dev, device); size_t size = MIN2(mem->bo->size, buffer->va->size_B); - int ret = - dev->dev.ops.bo_bind(&dev->dev, mem->bo, buffer->vk.device_address, - size, pBindInfos[i].memoryOffset, - ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); - + int ret = agx_bo_bind(&dev->dev, mem->bo, buffer->vk.device_address, + size, pBindInfos[i].memoryOffset, + DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE); if (ret) return VK_ERROR_UNKNOWN; } else { diff --git a/src/asahi/vulkan/hk_cmd_buffer.h b/src/asahi/vulkan/hk_cmd_buffer.h index 132f7fc2368..c9c3bf3b178 100644 --- a/src/asahi/vulkan/hk_cmd_buffer.h +++ b/src/asahi/vulkan/hk_cmd_buffer.h @@ -151,7 +151,6 @@ struct hk_render_registers { uint32_t isp_bgobjdepth; uint32_t isp_bgobjvals; struct agx_zls_control_packed zls_control, zls_control_partial; - uint32_t iogpu_unk_214; uint32_t depth_dimensions; bool process_empty_tiles; enum u_tristate dbias_is_int; diff --git a/src/asahi/vulkan/hk_cmd_draw.c b/src/asahi/vulkan/hk_cmd_draw.c index bdb813091b3..5a6fcc87046 100644 --- a/src/asahi/vulkan/hk_cmd_draw.c +++ b/src/asahi/vulkan/hk_cmd_draw.c @@ -680,8 +680,6 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingAttachmentInfo *attach_s = pRenderingInfo->pStencilAttachment; - render->cr.iogpu_unk_214 = 0xc000; - struct ail_layout *z_layout = NULL, *s_layout = NULL; if (attach_z != NULL && attach_z != VK_NULL_HANDLE && attach_z->imageView) { diff --git a/src/asahi/vulkan/hk_device.c b/src/asahi/vulkan/hk_device.c index bae1f8f19e2..cf2cef41522 100644 --- a/src/asahi/vulkan/hk_device.c +++ b/src/asahi/vulkan/hk_device.c @@ -285,7 +285,7 @@ static VkResult hk_get_timestamp(struct vk_device *device, uint64_t *timestamp) { struct hk_device *dev = container_of(device, struct hk_device, vk); - *timestamp = agx_gpu_time_to_ns(&dev->dev, agx_get_gpu_timestamp(&dev->dev)); + *timestamp = agx_get_gpu_timestamp(&dev->dev); return VK_SUCCESS; } diff --git a/src/asahi/vulkan/hk_device_memory.c b/src/asahi/vulkan/hk_device_memory.c index b0b1052ae99..194080edb9d 100644 --- a/src/asahi/vulkan/hk_device_memory.c +++ b/src/asahi/vulkan/hk_device_memory.c @@ -52,7 +52,7 @@ hk_memory_type_flags(const VkMemoryType *type, static void hk_add_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo) { - uint32_t id = bo->vbo_res_id; + uint32_t id = bo->uapi_handle; unsigned count = util_dynarray_num_elements(&dev->external_bos.list, struct asahi_ccmd_submit_res); @@ -89,7 +89,7 @@ hk_add_ext_bo(struct hk_device *dev, struct agx_bo *bo) static void hk_remove_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo) { - uint32_t id = bo->vbo_res_id; + uint32_t id = bo->uapi_handle; unsigned count = util_dynarray_num_elements(&dev->external_bos.list, struct asahi_ccmd_submit_res); diff --git a/src/asahi/vulkan/hk_physical_device.c b/src/asahi/vulkan/hk_physical_device.c index 63ae85eaf6f..79be2e425db 100644 --- a/src/asahi/vulkan/hk_physical_device.c +++ b/src/asahi/vulkan/hk_physical_device.c @@ -23,7 +23,6 @@ #include "util/simple_mtx.h" #include "vulkan/vulkan_core.h" #include "vulkan/wsi/wsi_common.h" -#include "unstable_asahi_drm.h" #include "vk_drm_syncobj.h" #include "vk_shader_module.h" @@ -274,13 +273,10 @@ hk_get_device_features( .sparseResidencyAliased = true, .sparseResidencyImage2D = true, - /* We depend on soft fault to implement sparse residency on buffers with - * the appropriate semantics. Lifting this requirement would be possible - * but challenging, given the requirements imposed by - * sparseResidencyNonResidentStrict. + /* TODO: We need to implement sparse buffer without soft fault to avoid + * tying our hands later. */ - .sparseResidencyBuffer = - (dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS), + .sparseResidencyBuffer = false, /* This needs investigation. */ .sparseResidencyImage3D = false, @@ -743,7 +739,7 @@ hk_get_device_properties(const struct agx_device *dev, .sampledImageStencilSampleCounts = sample_counts, .storageImageSampleCounts = sample_counts, .maxSampleMaskWords = 1, - .timestampComputeAndGraphics = agx_supports_timestamps(dev), + .timestampComputeAndGraphics = true, /* FIXME: Is timestamp period actually 1? */ .timestampPeriod = 1.0f, .maxClipDistances = 8, @@ -1141,9 +1137,6 @@ hk_create_drm_physical_device(struct vk_instance *_instance, struct hk_instance *instance = (struct hk_instance *)_instance; VkResult result; - /* Blanket refusal to probe due to unstable UAPI. */ - return VK_ERROR_INCOMPATIBLE_DRIVER; - if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) || drm_device->bustype != DRM_BUS_PLATFORM) return VK_ERROR_INCOMPATIBLE_DRIVER; @@ -1433,8 +1426,7 @@ hk_GetPhysicalDeviceQueueFamilyProperties2( { p->queueFamilyProperties.queueFlags = queue_family->queue_flags; p->queueFamilyProperties.queueCount = queue_family->queue_count; - p->queueFamilyProperties.timestampValidBits = - agx_supports_timestamps(&pdev->dev) ? 64 : 0; + p->queueFamilyProperties.timestampValidBits = 64; p->queueFamilyProperties.minImageTransferGranularity = (VkExtent3D){1, 1, 1}; diff --git a/src/asahi/vulkan/hk_query_pool.c b/src/asahi/vulkan/hk_query_pool.c index 336a931ab03..9e8a98e19cf 100644 --- a/src/asahi/vulkan/hk_query_pool.c +++ b/src/asahi/vulkan/hk_query_pool.c @@ -24,6 +24,7 @@ #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" +#include "drm-uapi/asahi_drm.h" #include "util/os_time.h" #include "util/u_dynarray.h" #include "vulkan/vulkan_core.h" @@ -88,10 +89,6 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo, bool timestamp = pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP; unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0; - /* Workaround for DXVK on old kernels */ - if (!agx_supports_timestamps(&dev->dev)) - timestamp = false; - pool = vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool)); if (!pool) @@ -131,10 +128,7 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo, * them. */ if (timestamp) { - int ret = dev->dev.ops.bo_bind_object( - &dev->dev, pool->bo, &pool->handle, pool->bo->size, 0, - ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS); - + int ret = agx_bind_timestamps(&dev->dev, pool->bo, &pool->handle); if (ret) { hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool), pAllocator); @@ -186,7 +180,7 @@ hk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool, } if (pool->handle) - dev->dev.ops.bo_unbind_object(&dev->dev, pool->handle, 0); + dev->dev.ops.bo_unbind_object(&dev->dev, pool->handle); agx_bo_unreference(&dev->dev, pool->bo); vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk); @@ -390,12 +384,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer, VK_FROM_HANDLE(hk_query_pool, pool, queryPool); struct hk_device *dev = hk_cmd_buffer_device(cmd); - /* Workaround for DXVK on old kernels */ - if (!agx_supports_timestamps(&dev->dev)) - return; - uint64_t report_addr = hk_query_report_addr(dev, pool, query); - bool after_gfx = cmd->current_cs.gfx != NULL; /* When writing timestamps for compute, we split the control stream at each diff --git a/src/asahi/vulkan/hk_queue.c b/src/asahi/vulkan/hk_queue.c index 4b0ba037c03..98d8d6a2742 100644 --- a/src/asahi/vulkan/hk_queue.c +++ b/src/asahi/vulkan/hk_queue.c @@ -26,9 +26,9 @@ #include "hk_physical_device.h" #include -#include "asahi/lib/unstable_asahi_drm.h" #include "util/list.h" #include "util/macros.h" +#include "util/u_dynarray.h" #include "vulkan/vulkan_core.h" #include "hk_private.h" @@ -78,67 +78,39 @@ queue_submit_empty(struct hk_device *dev, struct hk_queue *queue, static void asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs, - struct drm_asahi_cmd_compute *cmd, - struct drm_asahi_cmd_compute_user_timestamps *timestamps) + struct drm_asahi_cmd_compute *cmd) { size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start); *cmd = (struct drm_asahi_cmd_compute){ - .encoder_ptr = cs->addr, - .encoder_end = cs->addr + len, + .cdm_ctrl_stream_base = cs->addr, + .cdm_ctrl_stream_end = cs->addr + len, - .sampler_array = dev->samplers.table.bo->va->addr, + .sampler_heap = dev->samplers.table.bo->va->addr, .sampler_count = dev->samplers.table.alloc, - .sampler_max = dev->samplers.table.alloc + 1, - .usc_base = dev->dev.shader_base, - - .encoder_id = agx_get_global_id(&dev->dev), - .cmd_id = agx_get_global_id(&dev->dev), - .unk_mask = 0xffffffff, + .ts.end.handle = cs->timestamp.end.handle, + .ts.end.offset = cs->timestamp.end.offset_B, }; - if (cs->timestamp.end.handle) { - assert(agx_supports_timestamps(&dev->dev)); - - *timestamps = (struct drm_asahi_cmd_compute_user_timestamps){ - .type = ASAHI_COMPUTE_EXT_TIMESTAMPS, - .end_handle = cs->timestamp.end.handle, - .end_offset = cs->timestamp.end.offset_B, - }; - - cmd->extensions = (uint64_t)(uintptr_t)timestamps; - } - if (cs->scratch.cs.main || cs->scratch.cs.preamble) { - cmd->helper_arg = dev->scratch.cs.buf->va->addr; - cmd->helper_cfg = cs->scratch.cs.preamble ? (1 << 16) : 0; - cmd->helper_program = agx_helper_program(&dev->bg_eot); + cmd->helper.data = dev->scratch.cs.buf->va->addr; + cmd->helper.cfg = cs->scratch.cs.preamble ? (1 << 16) : 0; + cmd->helper.binary = agx_helper_program(&dev->bg_eot); } } static void asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs, - struct drm_asahi_cmd_render *c, - struct drm_asahi_cmd_render_user_timestamps *timestamps) + struct drm_asahi_cmd_render *c) { - unsigned cmd_ta_id = agx_get_global_id(&dev->dev); - unsigned cmd_3d_id = agx_get_global_id(&dev->dev); - unsigned encoder_id = agx_get_global_id(&dev->dev); - memset(c, 0, sizeof(*c)); - c->encoder_ptr = cs->addr; - c->encoder_id = encoder_id; - c->cmd_3d_id = cmd_3d_id; - c->cmd_ta_id = cmd_ta_id; + c->vdm_ctrl_stream_base = cs->addr; c->ppp_ctrl = 0x202; - c->fragment_usc_base = dev->dev.shader_base; - c->vertex_usc_base = c->fragment_usc_base; - - c->fb_width = cs->cr.width; - c->fb_height = cs->cr.height; + c->width_px = cs->cr.width; + c->height_px = cs->cr.height; c->isp_bgobjdepth = cs->cr.isp_bgobjdepth; c->isp_bgobjvals = cs->cr.isp_bgobjvals; @@ -146,65 +118,30 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs, static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control)); memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control)); - c->depth_dimensions = - (cs->cr.zls_width - 1) | ((cs->cr.zls_height - 1) << 15); + agx_pack(&c->isp_zls_pixels, CR_ISP_ZLS_PIXELS, cfg) { + cfg.x = cs->cr.zls_width; + cfg.y = cs->cr.zls_height; + } - c->depth_buffer_load = cs->cr.depth.buffer; - c->depth_buffer_store = cs->cr.depth.buffer; - c->depth_buffer_partial = cs->cr.depth.buffer; - - c->depth_buffer_load_stride = cs->cr.depth.stride; - c->depth_buffer_store_stride = cs->cr.depth.stride; - c->depth_buffer_partial_stride = cs->cr.depth.stride; - - c->depth_meta_buffer_load = cs->cr.depth.meta; - c->depth_meta_buffer_store = cs->cr.depth.meta; - c->depth_meta_buffer_partial = cs->cr.depth.meta; - - c->depth_meta_buffer_load_stride = cs->cr.depth.stride; - c->depth_meta_buffer_store_stride = cs->cr.depth.meta_stride; - c->depth_meta_buffer_partial_stride = cs->cr.depth.meta_stride; - - c->stencil_buffer_load = cs->cr.stencil.buffer; - c->stencil_buffer_store = cs->cr.stencil.buffer; - c->stencil_buffer_partial = cs->cr.stencil.buffer; - - c->stencil_buffer_load_stride = cs->cr.stencil.stride; - c->stencil_buffer_store_stride = cs->cr.stencil.stride; - c->stencil_buffer_partial_stride = cs->cr.stencil.stride; - - c->stencil_meta_buffer_load = cs->cr.stencil.meta; - c->stencil_meta_buffer_store = cs->cr.stencil.meta; - c->stencil_meta_buffer_partial = cs->cr.stencil.meta; - - c->stencil_meta_buffer_load_stride = cs->cr.stencil.stride; - c->stencil_meta_buffer_store_stride = cs->cr.stencil.meta_stride; - c->stencil_meta_buffer_partial_stride = cs->cr.stencil.meta_stride; - - c->iogpu_unk_214 = cs->cr.iogpu_unk_214; + c->depth.base = cs->cr.depth.buffer; + c->depth.stride = cs->cr.depth.stride; + c->depth.comp_base = cs->cr.depth.meta; + c->depth.comp_stride = cs->cr.depth.meta_stride; + c->stencil.base = cs->cr.stencil.buffer; + c->stencil.stride = cs->cr.stencil.stride; + c->stencil.comp_base = cs->cr.stencil.meta; + c->stencil.comp_stride = cs->cr.stencil.meta_stride; if (cs->cr.dbias_is_int == U_TRISTATE_YES) { - c->iogpu_unk_214 |= 0x40000; + c->flags |= DRM_ASAHI_RENDER_DBIAS_IS_INT; } if (dev->dev.debug & AGX_DBG_NOCLUSTER) { - c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING; - } else { - /* XXX: We don't know what this does exactly, and the name is - * surely wrong. But it fixes dEQP-VK.memory.pipeline_barrier.* tests on - * G14C when clustering is enabled... - */ - c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES; + c->flags |= DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING; } -#if 0 - /* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */ - if (tib->nr_samples > 1 && framebuffer->zsbuf) - c->flags |= ASAHI_RENDER_MSAA_ZS; -#endif - - c->utile_width = cs->tib.tile_size.width; - c->utile_height = cs->tib.tile_size.height; + c->utile_width_px = cs->tib.tile_size.width; + c->utile_height_px = cs->tib.tile_size.height; /* Can be 0 for attachmentless rendering with no draws */ c->samples = MAX2(cs->tib.nr_samples, 1); @@ -217,75 +154,62 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs, * * XXX: Hack for vkd3d-proton. */ - if (c->layers == 2048 && c->fb_width == 16384 && c->fb_height == 16384) { + if (c->layers == 2048 && c->width_px == 16384 && c->height_px == 16384) { mesa_log(MESA_LOG_WARN, MESA_LOG_TAG, "Clamping massive framebuffer"); c->layers = 32; } c->ppp_multisamplectl = cs->ppp_multisamplectl; - c->sample_size = cs->tib.sample_size_B; - c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(&cs->tib), 2048) / 2048; + c->sample_size_B = cs->tib.sample_size_B; float tan_60 = 1.732051f; - c->merge_upper_x = fui(tan_60 / cs->cr.width); - c->merge_upper_y = fui(tan_60 / cs->cr.height); + c->isp_merge_upper_x = fui(tan_60 / cs->cr.width); + c->isp_merge_upper_y = fui(tan_60 / cs->cr.height); - c->load_pipeline = cs->cr.bg.main.usc | 4; - c->store_pipeline = cs->cr.eot.main.usc | 4; - c->partial_reload_pipeline = cs->cr.bg.partial.usc | 4; - c->partial_store_pipeline = cs->cr.eot.partial.usc | 4; + c->bg.usc = cs->cr.bg.main.usc | 4; + c->eot.usc = cs->cr.eot.main.usc | 4; + c->partial_bg.usc = cs->cr.bg.partial.usc | 4; + c->partial_eot.usc = cs->cr.eot.partial.usc | 4; - memcpy(&c->load_pipeline_bind, &cs->cr.bg.main.counts, + memcpy(&c->bg.rsrc_spec, &cs->cr.bg.main.counts, sizeof(struct agx_counts_packed)); - memcpy(&c->store_pipeline_bind, &cs->cr.eot.main.counts, + memcpy(&c->eot.rsrc_spec, &cs->cr.eot.main.counts, sizeof(struct agx_counts_packed)); - memcpy(&c->partial_reload_pipeline_bind, &cs->cr.bg.partial.counts, + memcpy(&c->partial_bg.rsrc_spec, &cs->cr.bg.partial.counts, sizeof(struct agx_counts_packed)); - memcpy(&c->partial_store_pipeline_bind, &cs->cr.eot.partial.counts, + memcpy(&c->partial_eot.rsrc_spec, &cs->cr.eot.partial.counts, sizeof(struct agx_counts_packed)); - c->scissor_array = cs->uploaded_scissor; - c->depth_bias_array = cs->uploaded_zbias; + c->isp_scissor_base = cs->uploaded_scissor; + c->isp_dbias_base = cs->uploaded_zbias; - c->vertex_sampler_array = dev->samplers.table.bo->va->addr; - c->vertex_sampler_count = dev->samplers.table.alloc; - c->vertex_sampler_max = dev->samplers.table.alloc + 1; + c->sampler_heap = dev->samplers.table.bo->va->addr; + c->sampler_count = dev->samplers.table.alloc; - c->fragment_sampler_array = c->vertex_sampler_array; - c->fragment_sampler_count = c->vertex_sampler_count; - c->fragment_sampler_max = c->vertex_sampler_max; - - c->visibility_result_buffer = dev->occlusion_queries.bo->va->addr; + c->isp_oclqry_base = dev->occlusion_queries.bo->va->addr; if (cs->cr.process_empty_tiles) - c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES; + c->flags |= DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES; if (cs->scratch.vs.main || cs->scratch.vs.preamble) { - c->flags |= ASAHI_RENDER_VERTEX_SPILLS; - c->vertex_helper_arg = dev->scratch.vs.buf->va->addr; - c->vertex_helper_cfg = cs->scratch.vs.preamble ? (1 << 16) : 0; - c->vertex_helper_program = agx_helper_program(&dev->bg_eot); + c->flags |= DRM_ASAHI_RENDER_VERTEX_SCRATCH; + c->vertex_helper.data = dev->scratch.vs.buf->va->addr; + c->vertex_helper.cfg = cs->scratch.vs.preamble ? (1 << 16) : 0; + c->vertex_helper.binary = agx_helper_program(&dev->bg_eot); } if (cs->scratch.fs.main || cs->scratch.fs.preamble) { - c->fragment_helper_arg = dev->scratch.fs.buf->va->addr; - c->fragment_helper_cfg = cs->scratch.fs.preamble ? (1 << 16) : 0; - c->fragment_helper_program = agx_helper_program(&dev->bg_eot); + c->fragment_helper.data = dev->scratch.fs.buf->va->addr; + c->fragment_helper.cfg = cs->scratch.fs.preamble ? (1 << 16) : 0; + c->fragment_helper.binary = agx_helper_program(&dev->bg_eot); } if (cs->timestamp.end.handle) { - assert(agx_supports_timestamps(&dev->dev)); - - c->extensions = (uint64_t)(uintptr_t)timestamps; - - *timestamps = (struct drm_asahi_cmd_render_user_timestamps){ - .type = ASAHI_RENDER_EXT_TIMESTAMPS, - .frg_end_handle = cs->timestamp.end.handle, - .frg_end_offset = cs->timestamp.end.offset_B, - }; + c->ts_frag.end.handle = cs->timestamp.end.handle; + c->ts_frag.end.offset = cs->timestamp.end.offset_B; } } @@ -314,11 +238,6 @@ union drm_asahi_cmd { struct drm_asahi_cmd_render render; }; -union drm_asahi_user_timestamps { - struct drm_asahi_cmd_compute_user_timestamps compute; - struct drm_asahi_cmd_render_user_timestamps render; -}; - /* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes * on the CTS once lossless compression is enabled. This needs to be * investigated before we can reenable this mechanism. We are likely missing a @@ -333,11 +252,7 @@ max_commands_per_submit(struct hk_device *dev) static VkResult queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit) { - /* Currently we don't use the result buffer or implicit sync */ - struct agx_submit_virt virt = { - .vbo_res_id = 0, - .extres_count = 0, - }; + struct agx_submit_virt virt = {0}; if (dev->dev.is_virtio) { u_rwlock_rdlock(&dev->external_bos.lock); @@ -367,14 +282,19 @@ queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit) * bounds. */ static VkResult -queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit) +queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit, + unsigned command_count) { - struct drm_asahi_command *cmds = (void *)(uintptr_t)submit->commands; - unsigned commands_remaining = submit->command_count; - unsigned submitted[DRM_ASAHI_SUBQUEUE_COUNT] = {0}; + uint8_t *cmdbuf = (uint8_t *)(uintptr_t)submit->cmdbuf; + uint32_t offs = 0; + unsigned submitted_vdm = 0, submitted_cdm = 0; + unsigned commands_remaining = command_count; + + uint64_t out_syncs = + submit->syncs + sizeof(struct drm_asahi_sync) * submit->in_sync_count; while (commands_remaining) { - bool first = commands_remaining == submit->command_count; + bool first = commands_remaining == command_count; bool last = commands_remaining <= max_commands_per_submit(dev); unsigned count = MIN2(commands_remaining, max_commands_per_submit(dev)); @@ -383,13 +303,27 @@ queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit) assert(!last || commands_remaining == 0); assert(count > 0); + unsigned base_offs = offs; + unsigned cdm_count = 0, vdm_count = 0; + /* We need to fix up the barriers since barriers are ioctl-relative */ for (unsigned i = 0; i < count; ++i) { - for (unsigned q = 0; q < DRM_ASAHI_SUBQUEUE_COUNT; ++q) { - if (cmds[i].barriers[q] != DRM_ASAHI_BARRIER_NONE) { - assert(cmds[i].barriers[q] >= submitted[q]); - cmds[i].barriers[q] -= submitted[q]; - } + struct drm_asahi_cmd_header *cmd = (void *)(cmdbuf + offs); + offs += sizeof(*cmd) + cmd->size; + + if (cmd->cmd_type == DRM_ASAHI_CMD_RENDER) + vdm_count++; + else if (cmd->cmd_type == DRM_ASAHI_CMD_COMPUTE) + cdm_count++; + + if (cmd->vdm_barrier != DRM_ASAHI_BARRIER_NONE) { + assert(cmd->vdm_barrier >= submitted_vdm); + cmd->vdm_barrier -= submitted_vdm; + } + + if (cmd->cdm_barrier != DRM_ASAHI_BARRIER_NONE) { + assert(cmd->cdm_barrier >= submitted_cdm); + cmd->cdm_barrier -= submitted_cdm; } } @@ -399,38 +333,35 @@ queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit) * TODO: there might be a more performant way to do this. */ if (last && !first) { - for (unsigned q = 0; q < DRM_ASAHI_SUBQUEUE_COUNT; ++q) { - if (cmds[0].barriers[q] == DRM_ASAHI_BARRIER_NONE) - cmds[0].barriers[q] = 0; - } + struct drm_asahi_cmd_header *cmd = (void *)(cmdbuf + base_offs); + + if (cmd->vdm_barrier == DRM_ASAHI_BARRIER_NONE) + cmd->vdm_barrier = 0; + + if (cmd->cdm_barrier == DRM_ASAHI_BARRIER_NONE) + cmd->cdm_barrier = 0; } + bool has_in_syncs = first; + bool has_out_syncs = last; + struct drm_asahi_submit submit_ioctl = { .flags = submit->flags, .queue_id = submit->queue_id, - .result_handle = submit->result_handle, - .commands = (uint64_t)(uintptr_t)(cmds), - .command_count = count, - .in_syncs = first ? submit->in_syncs : 0, - .in_sync_count = first ? submit->in_sync_count : 0, - .out_syncs = last ? submit->out_syncs : 0, - .out_sync_count = last ? submit->out_sync_count : 0, + .cmdbuf = submit->cmdbuf + base_offs, + .cmdbuf_size = offs - base_offs, + + .syncs = has_in_syncs ? submit->syncs : out_syncs, + .in_sync_count = has_in_syncs ? submit->in_sync_count : 0, + .out_sync_count = has_out_syncs ? submit->out_sync_count : 0, }; VkResult result = queue_submit_single(dev, &submit_ioctl); if (result != VK_SUCCESS) return result; - for (unsigned i = 0; i < count; ++i) { - if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) - submitted[DRM_ASAHI_SUBQUEUE_COMPUTE]++; - else if (cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER) - submitted[DRM_ASAHI_SUBQUEUE_RENDER]++; - else - unreachable("unknown subqueue"); - } - - cmds += count; + submitted_cdm += cdm_count; + submitted_vdm += vdm_count; } return VK_SUCCESS; @@ -449,18 +380,24 @@ struct hk_bind_builder { VkDeviceSize size; VkDeviceSize memoryOffset; VkResult result; + + /* Array of drm_asahi_gem_bind_op's */ + struct util_dynarray binds; }; static inline struct hk_bind_builder hk_bind_builder(struct hk_device *dev, struct vk_object_base *obj_base, struct agx_va *va, struct hk_image *image) { - return (struct hk_bind_builder){ + struct hk_bind_builder b = { .dev = dev, .obj_base = obj_base, .va = va, .image = image, }; + + util_dynarray_init(&b.binds, NULL); + return b; } static VkResult @@ -523,13 +460,43 @@ hk_flush_bind(struct hk_bind_builder *b) /* When the app wants to unbind, replace the bound pages with scratch pages * so we don't leave a gap. */ + struct drm_asahi_gem_bind_op op; if (!b->mem) { - return hk_bind_scratch(b->dev, b->va, b->resourceOffset, b->size); + op = (struct drm_asahi_gem_bind_op){ + .handle = b->dev->sparse.write->uapi_handle, + .flags = DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE | + DRM_ASAHI_BIND_SINGLE_PAGE, + .addr = b->va->addr + b->resourceOffset, + .range = b->size, + }; } else { - return b->dev->dev.ops.bo_bind(&b->dev->dev, b->mem->bo, va_addr, b->size, - b->memoryOffset, - ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); + op = (struct drm_asahi_gem_bind_op){ + .handle = b->mem->bo->uapi_handle, + .flags = DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE, + .addr = va_addr, + .offset = b->memoryOffset, + .range = b->size, + }; } + + util_dynarray_append(&b->binds, struct drm_asahi_gem_bind_op, op); + return VK_SUCCESS; +} + +static int +hk_bind_builder_finish(struct hk_bind_builder *b) +{ + hk_flush_bind(b); + + /* Submit everything to the kernel at once */ + if (b->binds.size > 0) { + b->dev->dev.ops.bo_bind( + &b->dev->dev, b->binds.data, + util_dynarray_num_elements(&b->binds, struct drm_asahi_gem_bind_op)); + } + + util_dynarray_fini(&b->binds); + return b->result; } static void @@ -577,7 +544,7 @@ hk_sparse_buffer_bind_memory(struct hk_device *device, bind->pBinds[i].size, bind->pBinds[i].memoryOffset); } - return hk_flush_bind(&b); + return hk_bind_builder_finish(&b); } static VkResult @@ -623,7 +590,7 @@ hk_sparse_image_opaque_bind_memory( } } - return hk_flush_bind(&b); + return hk_bind_builder_finish(&b); } static void @@ -714,7 +681,7 @@ hk_sparse_image_bind_memory(struct hk_device *device, } } - return hk_flush_bind(&b); + return hk_bind_builder_finish(&b); } static VkResult @@ -778,11 +745,9 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue, return queue_submit_empty(dev, queue, submit); unsigned wait_count = 0; - struct drm_asahi_sync *waits = - alloca(submit->wait_count * sizeof(struct drm_asahi_sync)); - - struct drm_asahi_sync *signals = - alloca((submit->signal_count + 1) * sizeof(struct drm_asahi_sync)); + struct drm_asahi_sync *syncs = + alloca((submit->wait_count + submit->signal_count + 1) * + sizeof(struct drm_asahi_sync)); for (unsigned i = 0; i < submit->wait_count; ++i) { /* The kernel rejects the submission if we try to wait on the same @@ -808,36 +773,31 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue, continue; } - asahi_fill_sync(&waits[wait_count++], submit->waits[i].sync, + asahi_fill_sync(&syncs[wait_count++], submit->waits[i].sync, submit->waits[i].wait_value); } for (unsigned i = 0; i < submit->signal_count; ++i) { - asahi_fill_sync(&signals[i], submit->signals[i].sync, + asahi_fill_sync(&syncs[wait_count + i], submit->signals[i].sync, submit->signals[i].signal_value); } /* Signal progress on the queue itself */ - signals[submit->signal_count] = (struct drm_asahi_sync){ + syncs[wait_count + submit->signal_count] = (struct drm_asahi_sync){ .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ, .handle = queue->drm.syncobj, .timeline_value = ++queue->drm.timeline_value, }; /* Now setup the command structs */ - struct drm_asahi_command *cmds = malloc(sizeof(*cmds) * command_count); - union drm_asahi_cmd *cmds_inner = - malloc(sizeof(*cmds_inner) * command_count); - union drm_asahi_user_timestamps *ts_inner = - malloc(sizeof(*ts_inner) * command_count); - if (cmds == NULL || cmds_inner == NULL || ts_inner == NULL) { - free(ts_inner); - free(cmds_inner); + struct util_dynarray payload; + util_dynarray_init(&payload, NULL); + union drm_asahi_cmd *cmds = malloc(sizeof(*cmds) * command_count); + if (cmds == NULL) { free(cmds); return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); } - unsigned cmd_it = 0; unsigned nr_vdm = 0, nr_cdm = 0; for (unsigned i = 0; i < submit->command_buffer_count; ++i) { @@ -845,15 +805,11 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue, (struct hk_cmd_buffer *)submit->command_buffers[i]; list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) { - assert(cmd_it < command_count); + /* Barrier on previous command */ + struct drm_asahi_cmd_header header = + agx_cmd_header(cs->type == HK_CS_CDM, nr_vdm, nr_cdm); - struct drm_asahi_command cmd = { - .cmd_buffer = (uint64_t)(uintptr_t)&cmds_inner[cmd_it], - .result_offset = 0 /* TODO */, - .result_size = 0 /* TODO */, - /* Barrier on previous command */ - .barriers = {nr_vdm, nr_cdm}, - }; + util_dynarray_append(&payload, struct drm_asahi_cmd_header, header); if (cs->type == HK_CS_CDM) { perf_debug( @@ -864,17 +820,10 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue, assert(cs->stats.cmds > 0 || cs->stats.flushes > 0 || cs->timestamp.end.handle); - cmd.cmd_type = DRM_ASAHI_CMD_COMPUTE; - cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute); + struct drm_asahi_cmd_compute cmd; + asahi_fill_cdm_command(dev, cs, &cmd); + util_dynarray_append(&payload, struct drm_asahi_cmd_compute, cmd); nr_cdm++; - - asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute, - &ts_inner[cmd_it].compute); - - /* Work around for shipping 6.11.8 kernels, remove when we bump uapi - */ - if (!agx_supports_timestamps(&dev->dev)) - cmd.cmd_buffer_size -= 8; } else { assert(cs->type == HK_CS_VDM); perf_debug(cmdbuf, "%u: Submitting VDM with %u API draws, %u draws", @@ -882,31 +831,17 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue, assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles || cs->timestamp.end.handle); - cmd.cmd_type = DRM_ASAHI_CMD_RENDER; - cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render); + struct drm_asahi_cmd_render cmd; + asahi_fill_vdm_command(dev, cs, &cmd); + util_dynarray_append(&payload, struct drm_asahi_cmd_render, cmd); nr_vdm++; - - asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render, - &ts_inner[cmd_it].render); } - - cmds[cmd_it++] = cmd; } } - assert(cmd_it == command_count); - if (dev->dev.debug & AGX_DBG_TRACE) { - for (unsigned i = 0; i < command_count; ++i) { - if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) { - agxdecode_drm_cmd_compute(dev->dev.agxdecode, &dev->dev.params, - &cmds_inner[i].compute, true); - } else { - assert(cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER); - agxdecode_drm_cmd_render(dev->dev.agxdecode, &dev->dev.params, - &cmds_inner[i].render, true); - } - } + agxdecode_drm_cmdbuf(dev->dev.agxdecode, &dev->dev.params, &payload, + true); agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr, dev->images.alloc); @@ -917,25 +852,20 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue, struct drm_asahi_submit submit_ioctl = { .flags = 0, .queue_id = queue->drm.id, - .result_handle = 0 /* TODO */, .in_sync_count = wait_count, .out_sync_count = submit->signal_count + 1, - .command_count = command_count, - .in_syncs = (uint64_t)(uintptr_t)(waits), - .out_syncs = (uint64_t)(uintptr_t)(signals), - .commands = (uint64_t)(uintptr_t)(cmds), + .cmdbuf_size = payload.size, + .syncs = (uint64_t)(uintptr_t)(syncs), + .cmdbuf = (uint64_t)(uintptr_t)(payload.data), }; VkResult result; if (command_count <= max_commands_per_submit(dev)) result = queue_submit_single(dev, &submit_ioctl); else - result = queue_submit_looped(dev, &submit_ioctl); - - free(ts_inner); - free(cmds_inner); - free(cmds); + result = queue_submit_looped(dev, &submit_ioctl, command_count); + util_dynarray_fini(&payload); return result; } @@ -970,18 +900,25 @@ hk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit) return result; } -static uint32_t +static enum drm_asahi_priority translate_priority(VkQueueGlobalPriorityKHR prio) { - /* clang-format off */ switch (prio) { - case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR: return 0; - case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR: return 1; - case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR: return 2; - case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR: return 3; - default: unreachable("Invalid VkQueueGlobalPriorityKHR"); + case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR: + return DRM_ASAHI_PRIORITY_REALTIME; + + case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR: + return DRM_ASAHI_PRIORITY_HIGH; + + case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR: + return DRM_ASAHI_PRIORITY_MEDIUM; + + case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR: + return DRM_ASAHI_PRIORITY_LOW; + + default: + unreachable("Invalid VkQueueGlobalPriorityKHR"); } - /* clang-format on */ } VkResult @@ -1001,17 +938,21 @@ hk_queue_init(struct hk_device *dev, struct hk_queue *queue, priority_info ? priority_info->globalPriority : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; + /* TODO: Lift when kernel side is ready and we can handle failures in + * create_command_queue. + */ + enum drm_asahi_priority drm_priority = translate_priority(priority); + if (drm_priority >= DRM_ASAHI_PRIORITY_HIGH) { + return VK_ERROR_NOT_PERMITTED_EXT; + } + result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family); if (result != VK_SUCCESS) return result; queue->vk.driver_submit = hk_queue_submit; - queue->drm.id = agx_create_command_queue(&dev->dev, - DRM_ASAHI_QUEUE_CAP_RENDER | - DRM_ASAHI_QUEUE_CAP_BLIT | - DRM_ASAHI_QUEUE_CAP_COMPUTE, - translate_priority(priority)); + queue->drm.id = agx_create_command_queue(&dev->dev, drm_priority); if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) { mesa_loge("drmSyncobjCreate() failed %d\n", errno); diff --git a/src/broadcom/simulator/v3d_simulator.c b/src/broadcom/simulator/v3d_simulator.c index 079c3b7fdf6..5be3917161b 100644 --- a/src/broadcom/simulator/v3d_simulator.c +++ b/src/broadcom/simulator/v3d_simulator.c @@ -59,7 +59,7 @@ #include "util/u_math.h" #include -#include "asahi/lib/unstable_asahi_drm.h" +#include "drm-uapi/asahi_drm.h" #include "drm-uapi/amdgpu_drm.h" #include "drm-uapi/i915_drm.h" #include "drm-uapi/v3d_drm.h" diff --git a/src/gallium/drivers/asahi/agx_batch.c b/src/gallium/drivers/asahi/agx_batch.c index 44cc458124d..6d61c85470d 100644 --- a/src/gallium/drivers/asahi/agx_batch.c +++ b/src/gallium/drivers/asahi/agx_batch.c @@ -10,6 +10,7 @@ #include "util/bitset.h" #include "util/u_dynarray.h" #include "util/u_range.h" +#include "agx_device.h" #include "agx_state.h" #include "vdrm.h" @@ -159,137 +160,14 @@ agx_batch_init(struct agx_context *ctx, assert(!ret && batch->syncobj); } - batch->result_off = - (2 * sizeof(union agx_batch_result)) * agx_batch_idx(batch); - batch->result = - (void *)(((uint8_t *)agx_bo_map(ctx->result_buf)) + batch->result_off); - memset(batch->result, 0, sizeof(union agx_batch_result) * 2); - agx_batch_mark_active(batch); } -const char *status_str[] = { - [DRM_ASAHI_STATUS_PENDING] = "(pending)", - [DRM_ASAHI_STATUS_COMPLETE] = "Complete", - [DRM_ASAHI_STATUS_UNKNOWN_ERROR] = "UNKNOWN ERROR", - [DRM_ASAHI_STATUS_TIMEOUT] = "TIMEOUT", - [DRM_ASAHI_STATUS_FAULT] = "FAULT", - [DRM_ASAHI_STATUS_KILLED] = "KILLED", - [DRM_ASAHI_STATUS_NO_DEVICE] = "NO DEVICE", -}; - -const char *fault_type_str[] = { - [DRM_ASAHI_FAULT_NONE] = "(none)", - [DRM_ASAHI_FAULT_UNKNOWN] = "Unknown", - [DRM_ASAHI_FAULT_UNMAPPED] = "Unmapped", - [DRM_ASAHI_FAULT_AF_FAULT] = "AF Fault", - [DRM_ASAHI_FAULT_WRITE_ONLY] = "Write Only", - [DRM_ASAHI_FAULT_READ_ONLY] = "Read Only", - [DRM_ASAHI_FAULT_NO_ACCESS] = "No Access", -}; - -const char *low_unit_str[16] = { - "DCMP", "UL1C", "CMP", "GSL1", "IAP", "VCE", "TE", "RAS", - "VDM", "PPP", "IPF", "IPF_CPF", "VF", "VF_CPF", "ZLS", "UNK", -}; - -const char *mid_unit_str[16] = { - "UNK", "dPM", "dCDM_KS0", "dCDM_KS1", "dCDM_KS2", "dIPP", - "dIPP_CS", "dVDM_CSD", "dVDM_SSD", "dVDM_ILF", "dVDM_ILD", "dRDE0", - "dRDE1", "FC", "GSL2", "UNK", -}; - -const char *high_unit_str[16] = { - "gPM_SP", "gVDM_CSD_SP", "gVDM_SSD_SP", "gVDM_ILF_SP", - "gVDM_TFP_SP", "gVDM_MMB_SP", "gCDM_CS_KS0_SP", "gCDM_CS_KS1_SP", - "gCDM_CS_KS2_SP", "gCDM_KS0_SP", "gCDM_KS1_SP", "gCDM_KS2_SP", - "gIPP_SP", "gIPP_CS_SP", "gRDE0_SP", "gRDE1_SP", -}; - -static void -agx_print_result(struct agx_device *dev, struct agx_context *ctx, - struct drm_asahi_result_info *info, unsigned batch_idx, - bool is_compute) +static struct agx_timestamps * +agx_batch_timestamps(struct agx_batch *batch) { - if (unlikely(info->status != DRM_ASAHI_STATUS_COMPLETE)) { - ctx->any_faults = true; - } - - if (likely(info->status == DRM_ASAHI_STATUS_COMPLETE && - !((dev)->debug & AGX_DBG_STATS))) - return; - - if (is_compute) { - struct drm_asahi_result_compute *r = (void *)info; - float time = (r->ts_end - r->ts_start) / dev->params.timer_frequency_hz; - - mesa_logw( - "[Batch %d] Compute %s: %.06f\n", batch_idx, - info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?", - time); - } else { - struct drm_asahi_result_render *r = (void *)info; - float time_vtx = (r->vertex_ts_end - r->vertex_ts_start) / - (float)dev->params.timer_frequency_hz; - float time_frag = (r->fragment_ts_end - r->fragment_ts_start) / - (float)dev->params.timer_frequency_hz; - mesa_logw( - "[Batch %d] Render %s: TVB %9ld/%9ld bytes (%d ovf) %c%c%c | vtx %.06f frag %.06f\n", - batch_idx, - info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?", - (long)r->tvb_usage_bytes, (long)r->tvb_size_bytes, - (int)r->num_tvb_overflows, - r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF ? 'G' : ' ', - r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN ? 'M' : ' ', - r->flags & DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED ? 'O' : ' ', - time_vtx, time_frag); - } - - if (info->fault_type != DRM_ASAHI_FAULT_NONE) { - const char *unit_name; - int unit_index; - - switch (info->unit) { - case 0x00 ... 0x9f: - unit_name = low_unit_str[info->unit & 0xf]; - unit_index = info->unit >> 4; - break; - case 0xa0 ... 0xaf: - unit_name = mid_unit_str[info->unit & 0xf]; - unit_index = 0; - break; - case 0xb0 ... 0xb7: - unit_name = "GL2CC_META"; - unit_index = info->unit & 0x7; - break; - case 0xb8: - unit_name = "GL2CC_MB"; - unit_index = 0; - break; - case 0xe0 ... 0xff: - unit_name = high_unit_str[info->unit & 0xf]; - unit_index = (info->unit >> 4) & 1; - break; - default: - unit_name = "UNK"; - unit_index = 0; - break; - } - - mesa_logw( - "[Batch %d] Fault: %s : Addr 0x%llx %c Unit %02x (%s/%d) SB 0x%02x L%d Extra 0x%x\n", - batch_idx, - info->fault_type < ARRAY_SIZE(fault_type_str) - ? fault_type_str[info->fault_type] - : "?", - (long long)info->address, info->is_read ? 'r' : 'W', info->unit, - unit_name, unit_index, info->sideband, info->level, info->extra); - - agx_debug_fault(dev, info->address); - } - - assert(info->status == DRM_ASAHI_STATUS_COMPLETE || - info->status == DRM_ASAHI_STATUS_KILLED); + struct agx_timestamps *ts = agx_bo_map(batch->ctx->timestamps); + return ts + agx_batch_idx(batch); } static void @@ -302,17 +180,25 @@ agx_batch_print_stats(struct agx_device *dev, struct agx_batch *batch) abort(); } - if (!batch->result) + if (likely(!(dev->debug & AGX_DBG_STATS))) return; + struct agx_timestamps *ts = agx_batch_timestamps(batch); + if (batch->cdm.bo) { - agx_print_result(dev, batch->ctx, &batch->result[0].compute.info, - batch_idx, true); + float time = (ts->comp_end - ts->comp_start) / + (float)dev->params.command_timestamp_frequency_hz; + + mesa_logw("[Batch %d] Compute: %.06f\n", batch_idx, time); } if (batch->vdm.bo) { - agx_print_result(dev, batch->ctx, &batch->result[1].render.info, - batch_idx, false); + float time_vtx = (ts->vtx_end - ts->vtx_start) / + (float)dev->params.command_timestamp_frequency_hz; + float time_frag = (ts->frag_end - ts->frag_start) / + (float)dev->params.command_timestamp_frequency_hz; + mesa_logw("[Batch %d] vtx %.06f frag %.06f\n", batch_idx, time_vtx, + time_frag); } } @@ -328,15 +214,17 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset) assert(ctx->batch != batch); uint64_t begin_ts = ~0, end_ts = 0; - if (batch->result) { + if (batch->timestamps.size) { + struct agx_timestamps *ts = agx_batch_timestamps(batch); + if (batch->cdm.bo) { - begin_ts = MIN2(begin_ts, batch->result[0].compute.ts_start); - end_ts = MAX2(end_ts, batch->result[0].compute.ts_end); + begin_ts = MIN2(begin_ts, ts->comp_start); + end_ts = MAX2(end_ts, ts->comp_end); } if (batch->vdm.bo) { - begin_ts = MIN2(begin_ts, batch->result[1].render.vertex_ts_start); - end_ts = MAX2(end_ts, batch->result[1].render.fragment_ts_end); + begin_ts = MIN2(begin_ts, ts->vtx_start); + end_ts = MAX2(end_ts, ts->frag_end); } } @@ -733,6 +621,24 @@ agx_add_sync(struct drm_asahi_sync *syncs, unsigned *count, uint32_t handle) }; } +#define MAX_ATTACHMENTS 16 + +struct attachments { + struct drm_asahi_attachment list[MAX_ATTACHMENTS]; + size_t count; +}; + +static void +asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc) +{ + assert(att->count < MAX_ATTACHMENTS); + + att->list[att->count++] = (struct drm_asahi_attachment){ + .size = rsrc->layout.size_B, + .pointer = rsrc->bo->va->addr, + }; +} + void agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, struct drm_asahi_cmd_compute *compute, @@ -741,34 +647,19 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, struct agx_device *dev = agx_device(ctx->base.screen); struct agx_screen *screen = agx_screen(ctx->base.screen); - bool feedback = dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_STATS); - -#ifndef NDEBUG - /* Debug builds always get feedback (for fault checks) */ - feedback = true; -#endif - - /* Timer queries use the feedback timestamping */ - feedback |= (batch->timestamps.size > 0); - - if (!feedback) - batch->result = NULL; - /* We allocate the worst-case sync array size since this won't be excessive * for most workloads */ unsigned max_syncs = batch->bo_list.bit_count + 2; unsigned in_sync_count = 0; unsigned shared_bo_count = 0; - struct drm_asahi_sync *in_syncs = - malloc(max_syncs * sizeof(struct drm_asahi_sync)); + struct drm_asahi_sync *syncs = + malloc((max_syncs * sizeof(struct drm_asahi_sync)) + 2); struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *)); uint64_t wait_seqid = p_atomic_read(&screen->flush_wait_seqid); - struct agx_submit_virt virt = { - .vbo_res_id = ctx->result_buf->vbo_res_id, - }; + struct agx_submit_virt virt = {0}; /* Elide syncing against our own queue */ if (wait_seqid && wait_seqid == ctx->flush_my_seqid) { @@ -784,37 +675,6 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, batch_debug(batch, "Sync point is %" PRIu64, seqid); - /* Subtle concurrency note: Since we assign seqids atomically and do - * not lock submission across contexts, it is possible for two threads - * to submit timeline syncobj updates out of order. As far as I can - * tell, this case is handled in the kernel conservatively: it triggers - * a fence context bump and effectively "splits" the timeline at the - * larger point, causing future lookups for earlier points to return a - * later point, waiting more. The signaling code still makes sure all - * prior fences have to be signaled before considering a given point - * signaled, regardless of order. That's good enough for us. - * - * (Note: this case breaks drm_syncobj_query_ioctl and for this reason - * triggers a DRM_DEBUG message on submission, but we don't use that - * so we don't care.) - * - * This case can be tested by setting seqid = 1 unconditionally here, - * causing every single syncobj update to reuse the same timeline point. - * Everything still works (but over-synchronizes because this effectively - * serializes all submissions once any context flushes once). - */ - struct drm_asahi_sync out_syncs[2] = { - { - .sync_type = DRM_ASAHI_SYNC_SYNCOBJ, - .handle = batch->syncobj, - }, - { - .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ, - .handle = screen->flush_syncobj, - .timeline_value = seqid, - }, - }; - /* This lock protects against a subtle race scenario: * - Context 1 submits and registers itself as writer for a BO * - Context 2 runs the below loop, and finds the writer syncobj @@ -861,7 +721,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, close(in_sync_fd); /* Add it to our wait list */ - agx_add_sync(in_syncs, &in_sync_count, sync_handle); + agx_add_sync(syncs, &in_sync_count, sync_handle); /* And keep track of the BO for cloning the out_sync */ shared_bos[shared_bo_count++] = bo; @@ -879,8 +739,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, batch, "Waits on inter-context BO @ 0x%" PRIx64 " from queue %u", bo->va->addr, queue_id); - agx_add_sync(in_syncs, &in_sync_count, - agx_bo_writer_syncobj(writer)); + agx_add_sync(syncs, &in_sync_count, agx_bo_writer_syncobj(writer)); shared_bos[shared_bo_count++] = NULL; } } @@ -894,7 +753,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, for (unsigned i = 0; i < virt.extres_count; i++) { while (!*p) p++; // Skip inter-context slots which are not recorded here - virt.extres[i].res_id = (*p)->vbo_res_id; + virt.extres[i].res_id = (*p)->uapi_handle; virt.extres[i].flags = ASAHI_EXTRES_READ | ASAHI_EXTRES_WRITE; p++; } @@ -910,63 +769,109 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, } /* Add an explicit fence from gallium, if any */ - agx_add_sync(in_syncs, &in_sync_count, agx_get_in_sync(ctx)); + agx_add_sync(syncs, &in_sync_count, agx_get_in_sync(ctx)); /* Add an implicit cross-context flush sync point, if any */ if (wait_seqid) { batch_debug(batch, "Waits on inter-context sync point %" PRIu64, wait_seqid); - in_syncs[in_sync_count++] = (struct drm_asahi_sync){ + syncs[in_sync_count++] = (struct drm_asahi_sync){ .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ, .handle = screen->flush_syncobj, .timeline_value = wait_seqid, }; } + /* Subtle concurrency note: Since we assign seqids atomically and do + * not lock submission across contexts, it is possible for two threads + * to submit timeline syncobj updates out of order. As far as I can + * tell, this case is handled in the kernel conservatively: it triggers + * a fence context bump and effectively "splits" the timeline at the + * larger point, causing future lookups for earlier points to return a + * later point, waiting more. The signaling code still makes sure all + * prior fences have to be signaled before considering a given point + * signaled, regardless of order. That's good enough for us. + * + * (Note: this case breaks drm_syncobj_query_ioctl and for this reason + * triggers a DRM_DEBUG message on submission, but we don't use that + * so we don't care.) + * + * This case can be tested by setting seqid = 1 unconditionally here, + * causing every single syncobj update to reuse the same timeline point. + * Everything still works (but over-synchronizes because this effectively + * serializes all submissions once any context flushes once). + */ + struct drm_asahi_sync *out_syncs = syncs + in_sync_count; + + out_syncs[0] = (struct drm_asahi_sync){ + .sync_type = DRM_ASAHI_SYNC_SYNCOBJ, + .handle = batch->syncobj, + }; + + out_syncs[1] = (struct drm_asahi_sync){ + .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ, + .handle = screen->flush_syncobj, + .timeline_value = seqid, + }; + /* Submit! */ - struct drm_asahi_command commands[2]; - unsigned command_count = 0; + struct util_dynarray cmdbuf; + util_dynarray_init(&cmdbuf, NULL); if (compute) { - commands[command_count++] = (struct drm_asahi_command){ - .cmd_type = DRM_ASAHI_CMD_COMPUTE, - .flags = 0, - .cmd_buffer = (uint64_t)(uintptr_t)compute, + /* Barrier on previous submission */ + struct drm_asahi_cmd_header header = agx_cmd_header(true, 0, 0); - /* Work around for shipping 6.11.8 kernels, remove when we bump uapi - */ - .cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute) - 8, - .result_offset = feedback ? batch->result_off : 0, - .result_size = feedback ? sizeof(union agx_batch_result) : 0, - /* Barrier on previous submission */ - .barriers = {0, 0}, - }; + util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header); + util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_compute, *compute); } if (render) { - commands[command_count++] = (struct drm_asahi_command){ - .cmd_type = DRM_ASAHI_CMD_RENDER, - .flags = 0, - .cmd_buffer = (uint64_t)(uintptr_t)render, - .cmd_buffer_size = sizeof(struct drm_asahi_cmd_render), - .result_offset = - feedback ? (batch->result_off + sizeof(union agx_batch_result)) : 0, - .result_size = feedback ? sizeof(union agx_batch_result) : 0, - /* Barrier on previous submission */ - .barriers = {compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0}, - }; + struct attachments att = {.count = 0}; + struct pipe_framebuffer_state *fb = &batch->key; + + for (unsigned i = 0; i < fb->nr_cbufs; ++i) { + if (fb->cbufs[i]) + asahi_add_attachment(&att, agx_resource(fb->cbufs[i]->texture)); + } + + if (fb->zsbuf) { + struct agx_resource *rsrc = agx_resource(fb->zsbuf->texture); + asahi_add_attachment(&att, rsrc); + + if (rsrc->separate_stencil) + asahi_add_attachment(&att, rsrc->separate_stencil); + } + + if (att.count) { + struct drm_asahi_cmd_header header = { + .cmd_type = DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS, + .size = sizeof(att.list[0]) * att.count, + .cdm_barrier = DRM_ASAHI_BARRIER_NONE, + .vdm_barrier = DRM_ASAHI_BARRIER_NONE, + }; + + util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header); + util_dynarray_append_array(&cmdbuf, struct drm_asahi_attachment, + att.list, att.count); + } + + /* Barrier on previous submission */ + struct drm_asahi_cmd_header header = agx_cmd_header( + false, compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0); + + util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header); + util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_render, *render); } struct drm_asahi_submit submit = { .flags = 0, .queue_id = ctx->queue_id, - .result_handle = feedback ? ctx->result_buf->handle : 0, .in_sync_count = in_sync_count, .out_sync_count = 2, - .command_count = command_count, - .in_syncs = (uint64_t)(uintptr_t)(in_syncs), - .out_syncs = (uint64_t)(uintptr_t)(out_syncs), - .commands = (uint64_t)(uintptr_t)(&commands[0]), + .syncs = (uint64_t)(uintptr_t)(syncs), + .cmdbuf = (uint64_t)(uintptr_t)(cmdbuf.data), + .cmdbuf_size = cmdbuf.size, }; int ret = dev->ops.submit(dev, &submit, &virt); @@ -983,7 +888,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, fprintf( stderr, "DRM_IOCTL_ASAHI_SUBMIT render failed: %m (%dx%d tile %dx%d layers %d samples %d)\n", - c->fb_width, c->fb_height, c->utile_width, c->utile_height, + c->width_px, c->height_px, c->utile_width_px, c->utile_height_px, c->layers, c->samples); } @@ -1009,7 +914,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, shared_bos[i]->va->addr); /* Free the in_sync handle we just acquired */ - ret = drmSyncobjDestroy(dev->fd, in_syncs[i].handle); + ret = drmSyncobjDestroy(dev->fd, syncs[i].handle); assert(ret >= 0); /* And then import the out_sync sync file into it */ ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd); @@ -1039,21 +944,12 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, batch_debug(batch, "Writes to BO @ 0x%" PRIx64, bo->va->addr); } - free(in_syncs); + free(syncs); free(shared_bos); if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) { if (dev->debug & AGX_DBG_TRACE) { - if (compute) { - agxdecode_drm_cmd_compute(dev->agxdecode, &dev->params, compute, - true); - } - - if (render) { - agxdecode_drm_cmd_render(dev->agxdecode, &dev->params, render, - true); - } - + agxdecode_drm_cmdbuf(dev->agxdecode, &dev->params, &cmdbuf, true); agxdecode_next_frame(); } @@ -1077,6 +973,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, } } + util_dynarray_fini(&cmdbuf); agx_batch_mark_submitted(batch); if (virt.extres) @@ -1155,9 +1052,6 @@ agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch) if (ctx->batch == batch) ctx->batch = NULL; - /* Elide printing stats */ - batch->result = NULL; - agx_batch_cleanup(ctx, batch, true); } diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 8d5b647f0f8..82573e6444c 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -11,7 +11,7 @@ #include "asahi/compiler/agx_compile.h" #include "asahi/layout/layout.h" #include "asahi/lib/decode.h" -#include "asahi/lib/unstable_asahi_drm.h" +#include "drm-uapi/asahi_drm.h" #include "drm-uapi/drm_fourcc.h" #include "frontend/winsys_handle.h" #include "gallium/auxiliary/renderonly/renderonly.h" @@ -1199,26 +1199,6 @@ agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres) } } -#define MAX_ATTACHMENTS 16 - -struct attachments { - struct drm_asahi_attachment list[MAX_ATTACHMENTS]; - size_t count; -}; - -static void -asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc, - struct pipe_surface *surf) -{ - assert(att->count < MAX_ATTACHMENTS); - int idx = att->count++; - - att->list[idx].size = rsrc->layout.size_B; - att->list[idx].pointer = rsrc->bo->va->addr; - att->list[idx].order = 1; // TODO: What does this do? - att->list[idx].flags = 0; -} - static bool is_aligned(unsigned x, unsigned pot_alignment) { @@ -1226,12 +1206,20 @@ is_aligned(unsigned x, unsigned pot_alignment) return (x & (pot_alignment - 1)) == 0; } +static unsigned +build_timestamp_offset(struct agx_batch *batch, unsigned offset) +{ + return (agx_batch_idx(batch) * sizeof(struct agx_timestamps)) + offset; +} + +#define timestamp_offset(batch, offs) \ + build_timestamp_offset(batch, offsetof(struct agx_timestamps, offs)) + static void agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, - struct attachments *att, struct agx_pool *pool, - struct agx_batch *batch, struct pipe_framebuffer_state *framebuffer, - uint64_t encoder_ptr, uint64_t encoder_id, uint64_t cmd_ta_id, - uint64_t cmd_3d_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr, + struct agx_pool *pool, struct agx_batch *batch, + struct pipe_framebuffer_state *framebuffer, uint64_t encoder_ptr, + uint64_t scissor_ptr, uint64_t depth_bias_ptr, uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear, struct asahi_bg_eot pipeline_load, struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures, @@ -1240,28 +1228,27 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, { memset(c, 0, sizeof(*c)); - c->encoder_ptr = encoder_ptr; - c->encoder_id = encoder_id; - c->cmd_3d_id = cmd_3d_id; - c->cmd_ta_id = cmd_ta_id; - - c->fragment_usc_base = dev->shader_base; - c->vertex_usc_base = dev->shader_base; + c->vdm_ctrl_stream_base = encoder_ptr; /* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is * advertised, we don't set it and lower in the vertex shader. */ c->ppp_ctrl = 0x202; - c->fb_width = framebuffer->width; - c->fb_height = framebuffer->height; - - c->iogpu_unk_214 = 0xc000; + c->width_px = framebuffer->width; + c->height_px = framebuffer->height; c->isp_bgobjvals = 0x300; struct agx_resource *zres = NULL, *sres = NULL; + if (framebuffer->zsbuf) { + agx_pack(&c->isp_zls_pixels, CR_ISP_ZLS_PIXELS, cfg) { + cfg.x = c->width_px; + cfg.y = c->height_px; + } + } + agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) { if (framebuffer->zsbuf) { @@ -1279,9 +1266,6 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || desc->format == PIPE_FORMAT_S8_UINT); - c->depth_dimensions = - (framebuffer->width - 1) | ((framebuffer->height - 1) << 15); - if (util_format_has_depth(desc)) zres = zsres; else @@ -1297,11 +1281,8 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH); zls_control.z_load_enable = !clear && load; - c->depth_buffer_load = agx_map_texture_gpu(zres, first_layer) + - ail_get_level_offset_B(&zres->layout, level); - - c->depth_buffer_store = c->depth_buffer_load; - c->depth_buffer_partial = c->depth_buffer_load; + c->depth.base = agx_map_texture_gpu(zres, first_layer) + + ail_get_level_offset_B(&zres->layout, level); /* Main stride in pages */ assert((zres->layout.depth_px == 1 || @@ -1309,14 +1290,12 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, "Page aligned Z layers"); unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE; - c->depth_buffer_load_stride = ((stride_pages - 1) << 14) | 1; - c->depth_buffer_store_stride = c->depth_buffer_load_stride; - c->depth_buffer_partial_stride = c->depth_buffer_load_stride; + c->depth.stride = ((stride_pages - 1) << 14) | 1; assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile"); if (zres->layout.compressed) { - c->depth_meta_buffer_load = + c->depth.comp_base = agx_map_texture_gpu(zres, 0) + zres->layout.metadata_offset_B + (first_layer * zres->layout.compression_layer_stride_B) + @@ -1328,14 +1307,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, "Cacheline aligned Z meta layers"); unsigned stride_lines = zres->layout.compression_layer_stride_B / AIL_CACHELINE; - c->depth_meta_buffer_load_stride = (stride_lines - 1) << 14; - - c->depth_meta_buffer_store = c->depth_meta_buffer_load; - c->depth_meta_buffer_store_stride = - c->depth_meta_buffer_load_stride; - c->depth_meta_buffer_partial = c->depth_meta_buffer_load; - c->depth_meta_buffer_partial_stride = - c->depth_meta_buffer_load_stride; + c->depth.comp_stride = (stride_lines - 1) << 14; zls_control.z_compress_1 = true; zls_control.z_compress_2 = true; @@ -1346,7 +1318,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, c->isp_bgobjdepth = (uint16_t)(SATURATE(clear_depth) * scale + 0.5f); zls_control.z_format = AGX_ZLS_FORMAT_16; - c->iogpu_unk_214 |= 0x40000; + c->flags |= DRM_ASAHI_RENDER_DBIAS_IS_INT; } else { c->isp_bgobjdepth = fui(clear_depth); zls_control.z_format = AGX_ZLS_FORMAT_32F; @@ -1360,24 +1332,18 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL); zls_control.s_load_enable = !clear && load; - c->stencil_buffer_load = - agx_map_texture_gpu(sres, first_layer) + - ail_get_level_offset_B(&sres->layout, level); - - c->stencil_buffer_store = c->stencil_buffer_load; - c->stencil_buffer_partial = c->stencil_buffer_load; + c->stencil.base = agx_map_texture_gpu(sres, first_layer) + + ail_get_level_offset_B(&sres->layout, level); /* Main stride in pages */ assert((sres->layout.depth_px == 1 || is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) && "Page aligned S layers"); unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE; - c->stencil_buffer_load_stride = ((stride_pages - 1) << 14) | 1; - c->stencil_buffer_store_stride = c->stencil_buffer_load_stride; - c->stencil_buffer_partial_stride = c->stencil_buffer_load_stride; + c->stencil.stride = ((stride_pages - 1) << 14) | 1; if (sres->layout.compressed) { - c->stencil_meta_buffer_load = + c->stencil.comp_base = agx_map_texture_gpu(sres, 0) + sres->layout.metadata_offset_B + (first_layer * sres->layout.compression_layer_stride_B) + @@ -1389,14 +1355,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, "Cacheline aligned S meta layers"); unsigned stride_lines = sres->layout.compression_layer_stride_B / AIL_CACHELINE; - c->stencil_meta_buffer_load_stride = (stride_lines - 1) << 14; - - c->stencil_meta_buffer_store = c->stencil_meta_buffer_load; - c->stencil_meta_buffer_store_stride = - c->stencil_meta_buffer_load_stride; - c->stencil_meta_buffer_partial = c->stencil_meta_buffer_load; - c->stencil_meta_buffer_partial_stride = - c->stencil_meta_buffer_load_stride; + c->stencil.comp_stride = (stride_lines - 1) << 14; zls_control.s_compress_1 = true; zls_control.s_compress_2 = true; @@ -1407,71 +1366,48 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, } } - if (clear_pipeline_textures) - c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S; - else - c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES; - - if (zres && !(batch->clear & PIPE_CLEAR_DEPTH)) - c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S; - - if (sres && !(batch->clear & PIPE_CLEAR_STENCIL)) - c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S; - if (dev->debug & AGX_DBG_NOCLUSTER) - c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING; + c->flags |= DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING; - /* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */ - if (tib->nr_samples > 1 && framebuffer->zsbuf) - c->flags |= ASAHI_RENDER_MSAA_ZS; - - memcpy(&c->load_pipeline_bind, &pipeline_clear.counts, + memcpy(&c->bg.rsrc_spec, &pipeline_clear.counts, sizeof(struct agx_counts_packed)); - memcpy(&c->store_pipeline_bind, &pipeline_store.counts, + memcpy(&c->eot.rsrc_spec, &pipeline_store.counts, sizeof(struct agx_counts_packed)); - memcpy(&c->partial_reload_pipeline_bind, &pipeline_load.counts, + memcpy(&c->partial_bg.rsrc_spec, &pipeline_load.counts, sizeof(struct agx_counts_packed)); - memcpy(&c->partial_store_pipeline_bind, &pipeline_store.counts, + memcpy(&c->partial_eot.rsrc_spec, &pipeline_store.counts, sizeof(struct agx_counts_packed)); /* XXX is this correct? */ - c->load_pipeline = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4); - c->store_pipeline = pipeline_store.usc | 4; - c->partial_reload_pipeline = pipeline_load.usc | 4; - c->partial_store_pipeline = pipeline_store.usc | 4; + c->bg.usc = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4); + c->eot.usc = pipeline_store.usc | 4; + c->partial_bg.usc = pipeline_load.usc | 4; + c->partial_eot.usc = pipeline_store.usc | 4; - c->utile_width = tib->tile_size.width; - c->utile_height = tib->tile_size.height; + c->utile_width_px = tib->tile_size.width; + c->utile_height_px = tib->tile_size.height; c->samples = tib->nr_samples; c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1); c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl; - c->sample_size = tib->sample_size_B; - - /* XXX OR 0x80 with eMRT? */ - c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(tib), 2048) / 2048; + c->sample_size_B = tib->sample_size_B; float tan_60 = 1.732051f; - c->merge_upper_x = fui(tan_60 / framebuffer->width); - c->merge_upper_y = fui(tan_60 / framebuffer->height); + c->isp_merge_upper_x = fui(tan_60 / framebuffer->width); + c->isp_merge_upper_y = fui(tan_60 / framebuffer->height); - c->scissor_array = scissor_ptr; - c->depth_bias_array = depth_bias_ptr; - c->visibility_result_buffer = visibility_result_ptr; + c->isp_scissor_base = scissor_ptr; + c->isp_dbias_base = depth_bias_ptr; + c->isp_oclqry_base = visibility_result_ptr; - c->vertex_sampler_array = - batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0; - c->vertex_sampler_count = batch->sampler_heap.count; - c->vertex_sampler_max = batch->sampler_heap.count + 1; - - /* In the future we could split the heaps if useful */ - c->fragment_sampler_array = c->vertex_sampler_array; - c->fragment_sampler_count = c->vertex_sampler_count; - c->fragment_sampler_max = c->vertex_sampler_max; + if (batch->sampler_heap.bo) { + c->sampler_heap = batch->sampler_heap.bo->va->addr; + c->sampler_count = batch->sampler_heap.count; + } /* If a tile is empty, we do not want to process it, as the redundant * roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of @@ -1483,39 +1419,25 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, * This case matters a LOT for performance in workloads that split batches. */ if (batch->clear & batch->resolve) - c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES; - - for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) { - if (!framebuffer->cbufs[i]) - continue; - - asahi_add_attachment(att, agx_resource(framebuffer->cbufs[i]->texture), - framebuffer->cbufs[i]); - } - - if (framebuffer->zsbuf) { - struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture); - - asahi_add_attachment(att, rsrc, framebuffer->zsbuf); - - if (rsrc->separate_stencil) { - asahi_add_attachment(att, rsrc->separate_stencil, framebuffer->zsbuf); - } - } - - c->fragment_attachments = (uint64_t)(uintptr_t)&att->list[0]; - c->fragment_attachment_count = att->count; + c->flags |= DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES; if (batch->vs_scratch) { - c->flags |= ASAHI_RENDER_VERTEX_SPILLS; - c->vertex_helper_arg = batch->ctx->scratch_vs.buf->va->addr; - c->vertex_helper_cfg = batch->vs_preamble_scratch << 16; - c->vertex_helper_program = agx_helper_program(&batch->ctx->bg_eot); + c->flags |= DRM_ASAHI_RENDER_VERTEX_SCRATCH; + c->vertex_helper.data = batch->ctx->scratch_vs.buf->va->addr; + c->vertex_helper.cfg = batch->vs_preamble_scratch << 16; + c->vertex_helper.binary = agx_helper_program(&batch->ctx->bg_eot); } if (batch->fs_scratch) { - c->fragment_helper_arg = batch->ctx->scratch_fs.buf->va->addr; - c->fragment_helper_cfg = batch->fs_preamble_scratch << 16; - c->fragment_helper_program = agx_helper_program(&batch->ctx->bg_eot); + c->fragment_helper.data = batch->ctx->scratch_fs.buf->va->addr; + c->fragment_helper.cfg = batch->fs_preamble_scratch << 16; + c->fragment_helper.binary = agx_helper_program(&batch->ctx->bg_eot); + } + + if (batch->timestamps.size > 0) { + c->ts_vtx.start.handle = batch->ctx->timestamp_handle; + c->ts_frag.end.handle = batch->ctx->timestamp_handle; + c->ts_vtx.start.offset = timestamp_offset(batch, vtx_start); + c->ts_frag.end.offset = timestamp_offset(batch, frag_end); } } @@ -1595,8 +1517,6 @@ static void agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch, struct drm_asahi_cmd_compute *cmdbuf) { - struct agx_device *dev = agx_device(ctx->base.screen); - /* Finalize the encoder */ agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _) ; @@ -1606,27 +1526,14 @@ agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch, if (batch->cs_scratch) agx_batch_add_bo(batch, ctx->scratch_cs.buf); - unsigned cmdbuf_id = agx_get_global_id(dev); - unsigned encoder_id = agx_get_global_id(dev); - *cmdbuf = (struct drm_asahi_cmd_compute){ - .flags = 0, - .encoder_ptr = batch->cdm.bo->va->addr, - .encoder_end = + .cdm_ctrl_stream_base = batch->cdm.bo->va->addr, + .cdm_ctrl_stream_end = batch->cdm.bo->va->addr + (batch->cdm.current - (uint8_t *)agx_bo_map(batch->cdm.bo)), - .usc_base = dev->shader_base, - .helper_arg = 0, - .helper_cfg = 0, - .helper_program = 0, - .iogpu_unk_40 = 0, - .sampler_array = + .sampler_heap = batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0, .sampler_count = batch->sampler_heap.count, - .sampler_max = batch->sampler_heap.count + 1, - .encoder_id = encoder_id, - .cmd_id = cmdbuf_id, - .unk_mask = 0xffffffff, }; if (batch->cs_scratch) { @@ -1635,16 +1542,23 @@ agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch, // helper. Disable them for now. // cmdbuf->iogpu_unk_40 = 0x1c; - cmdbuf->helper_arg = ctx->scratch_cs.buf->va->addr; - cmdbuf->helper_cfg = batch->cs_preamble_scratch << 16; - // cmdbuf->helper_cfg |= 0x40; - cmdbuf->helper_program = agx_helper_program(&batch->ctx->bg_eot); + cmdbuf->helper.data = ctx->scratch_cs.buf->va->addr; + cmdbuf->helper.cfg = batch->cs_preamble_scratch << 16; + // cmdbuf->helper.cfg |= 0x40; + cmdbuf->helper.binary = agx_helper_program(&batch->ctx->bg_eot); + } + + if (batch->timestamps.size > 0) { + cmdbuf->ts.start.handle = ctx->timestamp_handle; + cmdbuf->ts.end.handle = ctx->timestamp_handle; + cmdbuf->ts.start.offset = timestamp_offset(batch, comp_start); + cmdbuf->ts.end.offset = timestamp_offset(batch, comp_start); } } static void agx_flush_render(struct agx_context *ctx, struct agx_batch *batch, - struct drm_asahi_cmd_render *cmdbuf, struct attachments *att) + struct drm_asahi_cmd_render *cmdbuf) { struct agx_device *dev = agx_device(ctx->base.screen); @@ -1694,16 +1608,11 @@ agx_flush_render(struct agx_context *ctx, struct agx_batch *batch, */ agx_batch_add_bo(batch, batch->vdm.bo); - unsigned cmd_ta_id = agx_get_global_id(dev); - unsigned cmd_3d_id = agx_get_global_id(dev); - unsigned encoder_id = agx_get_global_id(dev); - - agx_cmdbuf(dev, cmdbuf, att, &batch->pool, batch, &batch->key, - batch->vdm.bo->va->addr, encoder_id, cmd_ta_id, cmd_3d_id, - scissor, zbias, agx_get_occlusion_heap(batch), - pipeline_background, pipeline_background_partial, pipeline_store, - clear_pipeline_textures, batch->clear_depth, batch->clear_stencil, - &batch->tilebuffer_layout); + agx_cmdbuf( + dev, cmdbuf, &batch->pool, batch, &batch->key, batch->vdm.bo->va->addr, + scissor, zbias, agx_get_occlusion_heap(batch), pipeline_background, + pipeline_background_partial, pipeline_store, clear_pipeline_textures, + batch->clear_depth, batch->clear_stencil, &batch->tilebuffer_layout); } void @@ -1712,7 +1621,6 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) assert(agx_batch_is_active(batch)); assert(!agx_batch_is_submitted(batch)); - struct attachments att = {.count = 0}; struct drm_asahi_cmd_render render; struct drm_asahi_cmd_compute compute; bool has_vdm = false, has_cdm = false; @@ -1723,7 +1631,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch) } if (batch->vdm.bo && (batch->clear || batch->initialized)) { - agx_flush_render(ctx, batch, &render, &att); + agx_flush_render(ctx, batch, &render); has_vdm = true; } @@ -1761,8 +1669,6 @@ agx_destroy_context(struct pipe_context *pctx) agx_bg_eot_cleanup(&ctx->bg_eot); agx_destroy_meta_shaders(ctx); - agx_bo_unreference(dev, ctx->result_buf); - /* Lock around the syncobj destruction, to avoid racing * command submission in another context. **/ @@ -1778,6 +1684,9 @@ agx_destroy_context(struct pipe_context *pctx) drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj); } + dev->ops.bo_unbind_object(dev, ctx->timestamp_handle); + agx_bo_unreference(dev, ctx->timestamps); + u_rwlock_wrunlock(&screen->destroy_lock); pipe_resource_reference(&ctx->heap, NULL); @@ -1841,21 +1750,18 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags) } pctx->const_uploader = pctx->stream_uploader; - uint32_t priority = 2; - if (flags & PIPE_CONTEXT_PRIORITY_LOW) - priority = 3; - else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM) - priority = 2; - else if (flags & PIPE_CONTEXT_PRIORITY_HIGH) - priority = 1; - else if (flags & PIPE_CONTEXT_PRIORITY_REALTIME) - priority = 0; + enum drm_asahi_priority priority = DRM_ASAHI_PRIORITY_MEDIUM; - ctx->queue_id = agx_create_command_queue(agx_device(screen), - DRM_ASAHI_QUEUE_CAP_RENDER | - DRM_ASAHI_QUEUE_CAP_BLIT | - DRM_ASAHI_QUEUE_CAP_COMPUTE, - priority); + if (flags & PIPE_CONTEXT_PRIORITY_LOW) + priority = DRM_ASAHI_PRIORITY_LOW; + else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM) + priority = DRM_ASAHI_PRIORITY_MEDIUM; + + /* TODO: High/realtime need us to handle errors since we might not have + * permission. Sort this out later. + */ + + ctx->queue_id = agx_create_command_queue(agx_device(screen), priority); pctx->destroy = agx_destroy_context; pctx->flush = agx_flush; @@ -1893,11 +1799,17 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags) ctx->blitter = util_blitter_create(pctx); ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx); - ctx->result_buf = - agx_bo_create(agx_device(screen), - (2 * sizeof(union agx_batch_result)) * AGX_MAX_BATCHES, 0, - AGX_BO_WRITEBACK, "Batch result buffer"); - assert(ctx->result_buf); + struct agx_device *dev = agx_device(screen); + size_t timestamps_size = sizeof(struct agx_timestamps) * AGX_MAX_BATCHES; + + /* The kernel requires that timestamp buffers are SHARED */ + ctx->timestamps = + agx_bo_create(dev, timestamps_size, 0, AGX_BO_WRITEBACK | AGX_BO_SHARED, + "Timestamp buffer"); + assert(ctx->timestamps); + + ret = agx_bind_timestamps(dev, ctx->timestamps, &ctx->timestamp_handle); + assert(!ret); /* Sync object/FD used for NATIVE_FENCE_FD. */ ctx->in_sync_fd = -1; @@ -2101,7 +2013,7 @@ agx_init_screen_caps(struct pipe_screen *pscreen) caps->texture_barrier = true; /* Timer resolution is the length of a single tick in nanos */ - caps->timer_resolution = agx_gpu_time_to_ns(agx_device(pscreen), 1); + caps->timer_resolution = agx_gpu_timestamp_to_ns(agx_device(pscreen), 1); caps->sampler_view_target = true; caps->texture_swizzle = true; @@ -2452,8 +2364,7 @@ agx_screen_get_fd(struct pipe_screen *pscreen) static uint64_t agx_get_timestamp(struct pipe_screen *pscreen) { - struct agx_device *dev = agx_device(pscreen); - return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev)); + return agx_get_gpu_timestamp(agx_device(pscreen)); } static void @@ -2488,13 +2399,6 @@ agx_screen_create(int fd, struct renderonly *ro, struct agx_screen *agx_screen; struct pipe_screen *screen; - /* Refuse to probe. There is no stable UAPI yet. Upstream Mesa cannot be used - * yet with Asahi. Do not try. Do not patch out this check. Do not teach - * others about patching this check. Do not distribute upstream Mesa with - * this check patched out. - */ - return NULL; - agx_screen = rzalloc(NULL, struct agx_screen); if (!agx_screen) return NULL; diff --git a/src/gallium/drivers/asahi/agx_query.c b/src/gallium/drivers/asahi/agx_query.c index 2a8ff5c1c6b..1326f4747f5 100644 --- a/src/gallium/drivers/asahi/agx_query.c +++ b/src/gallium/drivers/asahi/agx_query.c @@ -408,12 +408,12 @@ agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery, return true; case QUERY_COPY_TIMESTAMP: - vresult->u64 = agx_gpu_time_to_ns(dev, value); + vresult->u64 = agx_gpu_timestamp_to_ns(dev, value); return true; case QUERY_COPY_TIME_ELAPSED: /* end - begin */ - vresult->u64 = agx_gpu_time_to_ns(dev, ptr[0] - ptr[1]); + vresult->u64 = agx_gpu_timestamp_to_ns(dev, ptr[0] - ptr[1]); return true; default: diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 294a941f754..e6dd33aaf27 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -18,7 +18,6 @@ #include "asahi/lib/agx_tilebuffer.h" #include "asahi/lib/agx_uvs.h" #include "asahi/lib/pool.h" -#include "asahi/lib/unstable_asahi_drm.h" #include "asahi/libagx/geometry.h" #include "compiler/shader_enums.h" #include "gallium/auxiliary/util/u_blitter.h" @@ -355,9 +354,13 @@ struct agx_stage { uint32_t valid_samplers; }; -union agx_batch_result { - struct drm_asahi_result_render render; - struct drm_asahi_result_compute compute; +struct agx_timestamps { + uint64_t vtx_start; + uint64_t vtx_end; + uint64_t frag_start; + uint64_t frag_end; + uint64_t comp_start; + uint64_t comp_end; }; /* This is a firmware limit. It should be possible to raise to 2048 in the @@ -454,10 +457,6 @@ struct agx_batch { /* Arrays of GPU pointers that should be written with the batch timestamps */ struct util_dynarray timestamps; - /* Result buffer where the kernel places command execution information */ - union agx_batch_result *result; - size_t result_off; - /* Actual pointer in a uniform */ struct agx_bo *geom_params_bo, *geom_index_bo; uint64_t geom_index; @@ -646,7 +645,8 @@ struct agx_context { uint32_t queue_id; struct agx_batch *batch; - struct agx_bo *result_buf; + struct agx_bo *timestamps; + uint32_t timestamp_handle; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; uint32_t vb_mask;