asahi: port to stable uAPI

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Acked-by: Eric Engestrom <eric@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33984>
This commit is contained in:
Alyssa Rosenzweig 2025-04-09 09:31:53 -04:00 committed by Marge Bot
parent 3e110005a6
commit c64a2bbff5
25 changed files with 760 additions and 1257 deletions

View file

@ -6,21 +6,17 @@
#include <string.h> #include <string.h>
#include "../lib/unstable_asahi_drm.h"
#include "drm-shim/drm_shim.h" #include "drm-shim/drm_shim.h"
#include "drm-uapi/asahi_drm.h"
bool drm_shim_driver_prefers_first_render_node = true; bool drm_shim_driver_prefers_first_render_node = true;
static const struct drm_asahi_params_global params = { static const struct drm_asahi_params_global params = {
.unstable_uabi_version = DRM_ASAHI_UNSTABLE_UABI_VERSION,
.gpu_generation = 13, .gpu_generation = 13,
.gpu_variant = 'G', .gpu_variant = 'G',
.gpu_revision = 0, .gpu_revision = 0,
.vm_user_start = 0x1000000, .vm_start = 0x1000000,
.vm_user_end = 0x5000000, .vm_end = 0x5000000,
.vm_usc_start = 0,
.vm_usc_end = 0,
.vm_page_size = 4096,
}; };
struct asahi_bo { struct asahi_bo {
@ -48,12 +44,6 @@ asahi_ioctl_noop(int fd, unsigned long request, void *arg)
return 0; return 0;
} }
static int
asahi_ioctl_submit(int fd, unsigned long request, void *arg)
{
return 0;
}
static int static int
asahi_ioctl_gem_create(int fd, unsigned long request, void *arg) asahi_ioctl_gem_create(int fd, unsigned long request, void *arg)
{ {
@ -110,12 +100,12 @@ static ioctl_fn_t driver_ioctls[] = {
[DRM_ASAHI_GET_PARAMS] = asahi_ioctl_get_param, [DRM_ASAHI_GET_PARAMS] = asahi_ioctl_get_param,
[DRM_ASAHI_VM_CREATE] = asahi_ioctl_noop, [DRM_ASAHI_VM_CREATE] = asahi_ioctl_noop,
[DRM_ASAHI_VM_DESTROY] = asahi_ioctl_noop, [DRM_ASAHI_VM_DESTROY] = asahi_ioctl_noop,
[DRM_ASAHI_VM_BIND] = asahi_ioctl_noop,
[DRM_ASAHI_GEM_CREATE] = asahi_ioctl_gem_create, [DRM_ASAHI_GEM_CREATE] = asahi_ioctl_gem_create,
[DRM_ASAHI_GEM_MMAP_OFFSET] = asahi_ioctl_gem_mmap_offset, [DRM_ASAHI_GEM_MMAP_OFFSET] = asahi_ioctl_gem_mmap_offset,
[DRM_ASAHI_GEM_BIND] = asahi_ioctl_noop,
[DRM_ASAHI_QUEUE_CREATE] = asahi_ioctl_noop, [DRM_ASAHI_QUEUE_CREATE] = asahi_ioctl_noop,
[DRM_ASAHI_QUEUE_DESTROY] = asahi_ioctl_noop, [DRM_ASAHI_QUEUE_DESTROY] = asahi_ioctl_noop,
[DRM_ASAHI_SUBMIT] = asahi_ioctl_submit, [DRM_ASAHI_SUBMIT] = asahi_ioctl_noop,
}; };
void void

View file

@ -1119,4 +1119,9 @@
<field name="S Resolve" start="58" size="1" type="bool"/> <field name="S Resolve" start="58" size="1" type="bool"/>
</struct> </struct>
<struct name="CR ISP ZLS Pixels" size="4">
<field name="X" start="0" size="15" type="uint" modifier="minus(1)"/>
<field name="Y" start="15" size="15" type="uint" modifier="minus(1)"/>
</struct>
</genxml> </genxml>

View file

@ -1,4 +0,0 @@
[unstable_asahi_drm.h]
indent_style = tab
indent_size = 8
max_line_length = 100

View file

@ -11,6 +11,7 @@
#include "util/ralloc.h" #include "util/ralloc.h"
#include "agx_device.h" #include "agx_device.h"
#include "decode.h" #include "decode.h"
#include "layout.h"
/* Helper to calculate the bucket index of a BO */ /* Helper to calculate the bucket index of a BO */
static unsigned static unsigned
@ -355,8 +356,8 @@ agx_bo_create(struct agx_device *dev, size_t size, unsigned align,
assert(size > 0); assert(size > 0);
/* BOs are allocated in pages */ /* BOs are allocated in pages */
size = ALIGN_POT(size, (size_t)dev->params.vm_page_size); size = ALIGN_POT(size, AIL_PAGESIZE);
align = MAX2(align, dev->params.vm_page_size); align = MAX2(align, AIL_PAGESIZE);
/* See if we have a BO already in the cache */ /* See if we have a BO already in the cache */
bo = agx_bo_cache_fetch(dev, size, align, flags, true); bo = agx_bo_cache_fetch(dev, size, align, flags, true);

View file

@ -88,6 +88,11 @@ struct agx_bo {
/* Process-local index */ /* Process-local index */
uint32_t handle; uint32_t handle;
/* Handle to refer to this BO in uAPI calls. This is either the GEM handle
* on native Linux, or the virtio resource ID with virtgpu.
*/
uint32_t uapi_handle;
/* DMA-BUF fd clone for adding fences to imports/exports */ /* DMA-BUF fd clone for adding fences to imports/exports */
int prime_fd; int prime_fd;
@ -99,10 +104,6 @@ struct agx_bo {
/* For debugging */ /* For debugging */
const char *label; const char *label;
/* virtio blob_id */
uint32_t blob_id;
uint32_t vbo_res_id;
}; };
static inline uint32_t static inline uint32_t

View file

@ -8,6 +8,7 @@
#include "agx_device.h" #include "agx_device.h"
#include <inttypes.h> #include <inttypes.h>
#include "clc/asahi_clc.h" #include "clc/asahi_clc.h"
#include "drm-uapi/asahi_drm.h"
#include "util/macros.h" #include "util/macros.h"
#include "util/ralloc.h" #include "util/ralloc.h"
#include "util/timespec.h" #include "util/timespec.h"
@ -18,6 +19,7 @@
#include "agx_scratch.h" #include "agx_scratch.h"
#include "decode.h" #include "decode.h"
#include "glsl_types.h" #include "glsl_types.h"
#include "layout.h"
#include "libagx_dgc.h" #include "libagx_dgc.h"
#include "libagx_shaders.h" #include "libagx_shaders.h"
@ -34,7 +36,6 @@
#include "util/u_printf.h" #include "util/u_printf.h"
#include "git_sha1.h" #include "git_sha1.h"
#include "nir_serialize.h" #include "nir_serialize.h"
#include "unstable_asahi_drm.h"
#include "vdrm.h" #include "vdrm.h"
static inline int static inline int
@ -80,7 +81,7 @@ static const struct debug_named_value agx_debug_options[] = {
void void
agx_bo_free(struct agx_device *dev, struct agx_bo *bo) agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
{ {
const uint64_t handle = bo->handle; const uint64_t handle = bo->uapi_handle;
if (bo->_map) if (bo->_map)
munmap(bo->_map, bo->size); munmap(bo->_map, bo->size);
@ -103,33 +104,58 @@ agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
} }
static int static int
agx_drm_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
uint32_t count)
{
struct drm_asahi_vm_bind vm_bind = {
.num_binds = count,
.vm_id = dev->vm_id,
.userptr = (uintptr_t)ops,
.stride = sizeof(*ops),
};
int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_VM_BIND, &vm_bind);
if (ret) {
fprintf(stderr, "DRM_IOCTL_ASAHI_VM_BIND failed\n");
}
return ret;
}
/*
* Convenience helper to bind a single BO regardless of kernel module.
*/
int
agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
size_t size_B, uint64_t offset_B, uint32_t flags, bool unbind) size_t size_B, uint64_t offset_B, uint32_t flags)
{ {
assert((size_B % 16384) == 0 && "alignment required"); assert((size_B % 16384) == 0 && "alignment required");
assert((offset_B % 16384) == 0 && "alignment required"); assert((offset_B % 16384) == 0 && "alignment required");
assert((addr % 16384) == 0 && "alignment required"); assert((addr % 16384) == 0 && "alignment required");
struct drm_asahi_gem_bind gem_bind = { struct drm_asahi_gem_bind_op op = {
.op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND,
.flags = flags, .flags = flags,
.handle = bo ? bo->handle : 0, .handle = bo ? bo->uapi_handle : 0,
.vm_id = dev->vm_id,
.offset = offset_B, .offset = offset_B,
.range = size_B, .range = size_B,
.addr = addr, .addr = addr,
}; };
assert((size_B % 16384) == 0 && "page alignment required"); return dev->ops.bo_bind(dev, &op, 1);
assert((offset_B % 16384) == 0 && "page alignment required"); }
assert((addr % 16384) == 0 && "page alignment required");
int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND, &gem_bind); int
if (ret) { agx_bind_timestamps(struct agx_device *dev, struct agx_bo *bo, uint32_t *handle)
fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND failed: %m (handle=%d)\n", {
bo ? bo->handle : 0); struct drm_asahi_gem_bind_object bind = {
} .op = DRM_ASAHI_BIND_OBJECT_OP_BIND,
.flags = DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS,
.handle = bo->uapi_handle,
.range = bo->size,
};
int ret = dev->ops.bo_bind_object(dev, &bind);
*handle = bind.object_handle;
return ret; return ret;
} }
@ -146,10 +172,10 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
struct drm_asahi_gem_create gem_create = {.size = size}; struct drm_asahi_gem_create gem_create = {.size = size};
if (flags & AGX_BO_WRITEBACK) if (flags & AGX_BO_WRITEBACK)
gem_create.flags |= ASAHI_GEM_WRITEBACK; gem_create.flags |= DRM_ASAHI_GEM_WRITEBACK;
if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) { if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) {
gem_create.flags |= ASAHI_GEM_VM_PRIVATE; gem_create.flags |= DRM_ASAHI_GEM_VM_PRIVATE;
gem_create.vm_id = dev->vm_id; gem_create.vm_id = dev->vm_id;
} }
@ -173,7 +199,7 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
bo->size = gem_create.size; bo->size = gem_create.size;
bo->align = align; bo->align = align;
bo->flags = flags; bo->flags = flags;
bo->handle = handle; bo->handle = bo->uapi_handle = handle;
bo->prime_fd = -1; bo->prime_fd = -1;
enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0; enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0;
@ -184,12 +210,12 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
return NULL; return NULL;
} }
uint32_t bind = ASAHI_BIND_READ; uint32_t bind = DRM_ASAHI_BIND_READ;
if (!(flags & AGX_BO_READONLY)) { if (!(flags & AGX_BO_READONLY)) {
bind |= ASAHI_BIND_WRITE; bind |= DRM_ASAHI_BIND_WRITE;
} }
ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind, false); ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind);
if (ret) { if (ret) {
agx_bo_free(dev, bo); agx_bo_free(dev, bo);
return NULL; return NULL;
@ -203,7 +229,8 @@ agx_bo_mmap(struct agx_device *dev, struct agx_bo *bo)
{ {
assert(bo->_map == NULL && "not double mapped"); assert(bo->_map == NULL && "not double mapped");
struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = bo->handle}; struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle =
bo->uapi_handle};
int ret; int ret;
ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset); ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset);
@ -245,7 +272,7 @@ agx_bo_import(struct agx_device *dev, int fd)
if (!bo->size) { if (!bo->size) {
bo->dev = dev; bo->dev = dev;
bo->size = lseek(fd, 0, SEEK_END); bo->size = lseek(fd, 0, SEEK_END);
bo->align = dev->params.vm_page_size; bo->align = AIL_PAGESIZE;
/* Sometimes this can fail and return -1. size of -1 is not /* Sometimes this can fail and return -1. size of -1 is not
* a nice thing for mmap to try mmap. Be more robust also * a nice thing for mmap to try mmap. Be more robust also
@ -255,7 +282,7 @@ agx_bo_import(struct agx_device *dev, int fd)
pthread_mutex_unlock(&dev->bo_map_lock); pthread_mutex_unlock(&dev->bo_map_lock);
return NULL; return NULL;
} }
if (bo->size & (dev->params.vm_page_size - 1)) { if (bo->size & (AIL_PAGESIZE - 1)) {
fprintf( fprintf(
stderr, stderr,
"import failed: BO is not a multiple of the page size (0x%llx bytes)\n", "import failed: BO is not a multiple of the page size (0x%llx bytes)\n",
@ -281,11 +308,13 @@ agx_bo_import(struct agx_device *dev, int fd)
} }
if (dev->is_virtio) { if (dev->is_virtio) {
bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, bo->handle); bo->uapi_handle = vdrm_handle_to_res_id(dev->vdrm, bo->handle);
} else {
bo->uapi_handle = bo->handle;
} }
ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0, ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0,
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE);
if (ret) { if (ret) {
fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n", fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n",
(long long)bo->va->addr); (long long)bo->va->addr);
@ -361,38 +390,24 @@ agx_bo_export(struct agx_device *dev, struct agx_bo *bo)
} }
static int static int
agx_bo_bind_object(struct agx_device *dev, struct agx_bo *bo, agx_bo_bind_object(struct agx_device *dev,
uint32_t *object_handle, size_t size_B, uint64_t offset_B, struct drm_asahi_gem_bind_object *bind)
uint32_t flags)
{ {
struct drm_asahi_gem_bind_object gem_bind = { int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, bind);
.op = ASAHI_BIND_OBJECT_OP_BIND,
.flags = flags,
.handle = bo->handle,
.vm_id = 0,
.offset = offset_B,
.range = size_B,
};
int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind);
if (ret) { if (ret) {
fprintf(stderr, fprintf(stderr,
"DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n", "DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n",
bo->handle); bind->handle);
} }
*object_handle = gem_bind.object_handle;
return ret; return ret;
} }
static int static int
agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle, agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle)
uint32_t flags)
{ {
struct drm_asahi_gem_bind_object gem_bind = { struct drm_asahi_gem_bind_object gem_bind = {
.op = ASAHI_BIND_OBJECT_OP_UNBIND, .op = DRM_ASAHI_BIND_OBJECT_OP_UNBIND,
.flags = flags,
.object_handle = object_handle, .object_handle = object_handle,
}; };
@ -406,23 +421,6 @@ agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle,
return ret; return ret;
} }
static void
agx_get_global_ids(struct agx_device *dev)
{
dev->next_global_id = 0;
dev->last_global_id = 0x1000000;
}
uint64_t
agx_get_global_id(struct agx_device *dev)
{
if (unlikely(dev->next_global_id >= dev->last_global_id)) {
agx_get_global_ids(dev);
}
return dev->next_global_id++;
}
static ssize_t static ssize_t
agx_get_params(struct agx_device *dev, void *buf, size_t size) agx_get_params(struct agx_device *dev, void *buf, size_t size)
{ {
@ -452,7 +450,7 @@ agx_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
const agx_device_ops_t agx_device_drm_ops = { const agx_device_ops_t agx_device_drm_ops = {
.bo_alloc = agx_bo_alloc, .bo_alloc = agx_bo_alloc,
.bo_bind = agx_bo_bind, .bo_bind = agx_drm_bo_bind,
.bo_mmap = agx_bo_mmap, .bo_mmap = agx_bo_mmap,
.get_params = agx_get_params, .get_params = agx_get_params,
.submit = agx_submit, .submit = agx_submit,
@ -475,16 +473,12 @@ gcd(uint64_t n, uint64_t m)
static void static void
agx_init_timestamps(struct agx_device *dev) agx_init_timestamps(struct agx_device *dev)
{ {
uint64_t ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC); uint64_t user_ts_gcd =
gcd(dev->params.command_timestamp_frequency_hz, NSEC_PER_SEC);
dev->timestamp_to_ns.num = NSEC_PER_SEC / ts_gcd;
dev->timestamp_to_ns.den = dev->params.timer_frequency_hz / ts_gcd;
uint64_t user_ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC);
dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd; dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd;
dev->user_timestamp_to_ns.den = dev->user_timestamp_to_ns.den =
dev->params.user_timestamp_frequency_hz / user_ts_gcd; dev->params.command_timestamp_frequency_hz / user_ts_gcd;
} }
bool bool
@ -533,47 +527,6 @@ agx_open_device(void *memctx, struct agx_device *dev)
} }
assert(params_size >= sizeof(dev->params)); assert(params_size >= sizeof(dev->params));
/* Refuse to probe. */
if (dev->params.unstable_uabi_version != DRM_ASAHI_UNSTABLE_UABI_VERSION) {
fprintf(
stderr,
"You are attempting to use upstream Mesa with a downstream kernel!\n"
"This WILL NOT work.\n"
"The Asahi UABI is unstable and NOT SUPPORTED in upstream Mesa.\n"
"UABI related code in upstream Mesa is not for use!\n"
"\n"
"Do NOT attempt to patch out checks, you WILL break your system.\n"
"Do NOT report bugs.\n"
"Do NOT ask Mesa developers for support.\n"
"Do NOT write guides about how to patch out these checks.\n"
"Do NOT package patches to Mesa to bypass this.\n"
"\n"
"~~~\n"
"This is not a place of honor.\n"
"No highly esteemed deed is commemorated here.\n"
"Nothing valued is here.\n"
"\n"
"What is here was dangerous and repulsive to us.\n"
"This message is a warning about danger.\n"
"\n"
"The danger is still present, in your time, as it was in ours.\n"
"The danger is unleashed only if you substantially disturb this place physically.\n"
"This place is best shunned and left uninhabited.\n"
"~~~\n"
"\n"
"THIS IS NOT A BUG. THIS IS YOU DOING SOMETHING BROKEN!\n");
abort();
}
uint64_t incompat =
dev->params.feat_incompat & (~AGX_SUPPORTED_INCOMPAT_FEATURES);
if (incompat) {
fprintf(stderr, "Missing GPU incompat features: 0x%" PRIx64 "\n",
incompat);
assert(0);
return false;
}
assert(dev->params.gpu_generation >= 13); assert(dev->params.gpu_generation >= 13);
const char *variant = " Unknown"; const char *variant = " Unknown";
switch (dev->params.gpu_variant) { switch (dev->params.gpu_variant) {
@ -611,14 +564,10 @@ agx_open_device(void *memctx, struct agx_device *dev)
assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS); assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS);
reservation += LIBAGX_PRINTF_BUFFER_SIZE; reservation += LIBAGX_PRINTF_BUFFER_SIZE;
dev->guard_size = dev->params.vm_page_size; dev->guard_size = AIL_PAGESIZE;
if (dev->params.vm_usc_start) { // Put the USC heap at the bottom of the user address space, 4GiB aligned
dev->shader_base = dev->params.vm_usc_start; dev->shader_base =
} else { ALIGN_POT(MAX2(dev->params.vm_start, reservation), 0x100000000ull);
// Put the USC heap at the bottom of the user address space, 4GiB aligned
dev->shader_base = ALIGN_POT(MAX2(dev->params.vm_user_start, reservation),
0x100000000ull);
}
if (dev->shader_base < reservation) { if (dev->shader_base < reservation) {
/* Our robustness implementation requires the bottom unmapped */ /* Our robustness implementation requires the bottom unmapped */
@ -631,8 +580,8 @@ agx_open_device(void *memctx, struct agx_device *dev)
// Put the user heap after the USC heap // Put the user heap after the USC heap
uint64_t user_start = dev->shader_base + shader_size; uint64_t user_start = dev->shader_base + shader_size;
assert(dev->shader_base >= dev->params.vm_user_start); assert(dev->shader_base >= dev->params.vm_start);
assert(user_start < dev->params.vm_user_end); assert(user_start < dev->params.vm_end);
dev->agxdecode = agxdecode_new_context(dev->shader_base); dev->agxdecode = agxdecode_new_context(dev->shader_base);
@ -652,8 +601,8 @@ agx_open_device(void *memctx, struct agx_device *dev)
// reasonable use case. // reasonable use case.
uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30); uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30);
struct drm_asahi_vm_create vm_create = { struct drm_asahi_vm_create vm_create = {
.kernel_start = dev->params.vm_user_end - kernel_size, .kernel_start = dev->params.vm_end - kernel_size,
.kernel_end = dev->params.vm_user_end, .kernel_end = dev->params.vm_end,
}; };
uint64_t user_size = vm_create.kernel_start - user_start; uint64_t user_size = vm_create.kernel_start - user_start;
@ -671,8 +620,6 @@ agx_open_device(void *memctx, struct agx_device *dev)
dev->vm_id = vm_create.vm_id; dev->vm_id = vm_create.vm_id;
agx_get_global_ids(dev);
glsl_type_singleton_init_or_ref(); glsl_type_singleton_init_or_ref();
if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) { if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) {
@ -698,8 +645,8 @@ agx_open_device(void *memctx, struct agx_device *dev)
*/ */
{ {
void *bo = agx_bo_create(dev, 16384, 0, 0, "Zero page"); void *bo = agx_bo_create(dev, 16384, 0, 0, "Zero page");
int ret = dev->ops.bo_bind(dev, bo, AGX_ZERO_PAGE_ADDRESS, 16384, 0, int ret = agx_bo_bind(dev, bo, AGX_ZERO_PAGE_ADDRESS, 16384, 0,
ASAHI_BIND_READ, false); DRM_ASAHI_BIND_READ);
if (ret) { if (ret) {
fprintf(stderr, "Failed to bind zero page"); fprintf(stderr, "Failed to bind zero page");
return false; return false;
@ -709,9 +656,9 @@ agx_open_device(void *memctx, struct agx_device *dev)
void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK, void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK,
"Printf/abort"); "Printf/abort");
ret = dev->ops.bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS, ret = agx_bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS,
LIBAGX_PRINTF_BUFFER_SIZE, 0, LIBAGX_PRINTF_BUFFER_SIZE, 0,
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE);
if (ret) { if (ret) {
fprintf(stderr, "Failed to bind printf buffer"); fprintf(stderr, "Failed to bind printf buffer");
return false; return false;
@ -738,8 +685,8 @@ agx_close_device(struct agx_device *dev)
} }
uint32_t uint32_t
agx_create_command_queue(struct agx_device *dev, uint32_t caps, agx_create_command_queue(struct agx_device *dev,
uint32_t priority) enum drm_asahi_priority priority)
{ {
if (dev->debug & AGX_DBG_1QUEUE) { if (dev->debug & AGX_DBG_1QUEUE) {
@ -753,9 +700,8 @@ agx_create_command_queue(struct agx_device *dev, uint32_t caps,
struct drm_asahi_queue_create queue_create = { struct drm_asahi_queue_create queue_create = {
.vm_id = dev->vm_id, .vm_id = dev->vm_id,
.queue_caps = caps,
.priority = priority, .priority = priority,
.flags = 0, .usc_exec_base = dev->shader_base,
}; };
int ret = int ret =
@ -873,28 +819,14 @@ agx_debug_fault(struct agx_device *dev, uint64_t addr)
uint64_t uint64_t
agx_get_gpu_timestamp(struct agx_device *dev) agx_get_gpu_timestamp(struct agx_device *dev)
{ {
if (dev->params.feat_compat & DRM_ASAHI_FEAT_GETTIME) { struct drm_asahi_get_time get_time = {.flags = 0};
struct drm_asahi_get_time get_time = {.flags = 0, .extensions = 0};
int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time); int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time);
if (ret) { if (ret) {
fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n"); fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n");
} else {
return get_time.gpu_timestamp;
}
} }
#if DETECT_ARCH_AARCH64
uint64_t ret; return get_time.gpu_timestamp;
__asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret));
return ret;
#elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
/* Maps to the above when run under FEX without thunking */
uint32_t high, low;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
return (uint64_t)low | ((uint64_t)high << 32);
#else
#error "invalid architecture for asahi"
#endif
} }
/* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */ /* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */

View file

@ -7,6 +7,7 @@
#include <stdint.h> #include <stdint.h>
#include <xf86drm.h> #include <xf86drm.h>
#include "drm-uapi/asahi_drm.h"
#include "util/ralloc.h" #include "util/ralloc.h"
#include "util/simple_mtx.h" #include "util/simple_mtx.h"
#include "util/sparse_array.h" #include "util/sparse_array.h"
@ -18,16 +19,11 @@
#include "decode.h" #include "decode.h"
#include "layout.h" #include "layout.h"
#include "libagx_dgc.h" #include "libagx_dgc.h"
#include "unstable_asahi_drm.h"
#include "vdrm.h" #include "vdrm.h"
#include "asahi_proto.h" #include "asahi_proto.h"
// TODO: this is a lie right now
static const uint64_t AGX_SUPPORTED_INCOMPAT_FEATURES =
DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION;
enum agx_dbg { enum agx_dbg {
AGX_DBG_TRACE = BITFIELD_BIT(0), AGX_DBG_TRACE = BITFIELD_BIT(0),
AGX_DBG_BODUMP = BITFIELD_BIT(1), AGX_DBG_BODUMP = BITFIELD_BIT(1),
@ -69,7 +65,6 @@ struct nir_shader;
#define BARRIER_COMPUTE (1 << DRM_ASAHI_SUBQUEUE_COMPUTE) #define BARRIER_COMPUTE (1 << DRM_ASAHI_SUBQUEUE_COMPUTE)
struct agx_submit_virt { struct agx_submit_virt {
uint32_t vbo_res_id;
uint32_t extres_count; uint32_t extres_count;
struct asahi_ccmd_submit_res *extres; struct asahi_ccmd_submit_res *extres;
}; };
@ -77,21 +72,23 @@ struct agx_submit_virt {
typedef struct { typedef struct {
struct agx_bo *(*bo_alloc)(struct agx_device *dev, size_t size, size_t align, struct agx_bo *(*bo_alloc)(struct agx_device *dev, size_t size, size_t align,
enum agx_bo_flags flags); enum agx_bo_flags flags);
int (*bo_bind)(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, int (*bo_bind)(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
size_t size_B, uint64_t offset_B, uint32_t flags, uint32_t count);
bool unbind);
void (*bo_mmap)(struct agx_device *dev, struct agx_bo *bo); void (*bo_mmap)(struct agx_device *dev, struct agx_bo *bo);
ssize_t (*get_params)(struct agx_device *dev, void *buf, size_t size); ssize_t (*get_params)(struct agx_device *dev, void *buf, size_t size);
int (*submit)(struct agx_device *dev, struct drm_asahi_submit *submit, int (*submit)(struct agx_device *dev, struct drm_asahi_submit *submit,
struct agx_submit_virt *virt); struct agx_submit_virt *virt);
int (*bo_bind_object)(struct agx_device *dev, struct agx_bo *bo, int (*bo_bind_object)(struct agx_device *dev,
uint32_t *object_handle, size_t size_B, struct drm_asahi_gem_bind_object *bind);
uint64_t offset_B, uint32_t flags); int (*bo_unbind_object)(struct agx_device *dev, uint32_t object_handle);
int (*bo_unbind_object)(struct agx_device *dev, uint32_t object_handle,
uint32_t flags);
} agx_device_ops_t; } agx_device_ops_t;
int agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
size_t size_B, uint64_t offset_B, uint32_t flags);
int agx_bind_timestamps(struct agx_device *dev, struct agx_bo *bo,
uint32_t *handle);
struct agx_device { struct agx_device {
uint32_t debug; uint32_t debug;
@ -100,7 +97,6 @@ struct agx_device {
char name[64]; char name[64];
struct drm_asahi_params_global params; struct drm_asahi_params_global params;
uint64_t next_global_id, last_global_id;
bool is_virtio; bool is_virtio;
agx_device_ops_t ops; agx_device_ops_t ops;
@ -160,11 +156,6 @@ struct agx_device {
/* Simplified device selection */ /* Simplified device selection */
enum agx_chip chip; enum agx_chip chip;
struct {
uint64_t num;
uint64_t den;
} timestamp_to_ns;
struct { struct {
uint64_t num; uint64_t num;
uint64_t den; uint64_t den;
@ -185,7 +176,7 @@ agx_bo_map(struct agx_bo *bo)
static inline bool static inline bool
agx_has_soft_fault(struct agx_device *dev) agx_has_soft_fault(struct agx_device *dev)
{ {
return (dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS) && return (dev->params.features & DRM_ASAHI_FEATURE_SOFT_FAULTS) &&
!(dev->debug & AGX_DBG_NOSOFT); !(dev->debug & AGX_DBG_NOSOFT);
} }
@ -208,10 +199,8 @@ agx_lookup_bo(struct agx_device *dev, uint32_t handle)
return util_sparse_array_get(&dev->bo_map, handle); return util_sparse_array_get(&dev->bo_map, handle);
} }
uint64_t agx_get_global_id(struct agx_device *dev); uint32_t agx_create_command_queue(struct agx_device *dev,
enum drm_asahi_priority priority);
uint32_t agx_create_command_queue(struct agx_device *dev, uint32_t caps,
uint32_t priority);
int agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id); int agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id);
int agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd); int agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd);
@ -221,12 +210,6 @@ void agx_debug_fault(struct agx_device *dev, uint64_t addr);
uint64_t agx_get_gpu_timestamp(struct agx_device *dev); uint64_t agx_get_gpu_timestamp(struct agx_device *dev);
static inline uint64_t
agx_gpu_time_to_ns(struct agx_device *dev, uint64_t gpu_time)
{
return (gpu_time * dev->timestamp_to_ns.num) / dev->timestamp_to_ns.den;
}
static inline uint64_t static inline uint64_t
agx_gpu_timestamp_to_ns(struct agx_device *dev, uint64_t gpu_timestamp) agx_gpu_timestamp_to_ns(struct agx_device *dev, uint64_t gpu_timestamp)
{ {
@ -245,8 +228,14 @@ struct agx_va *agx_va_alloc(struct agx_device *dev, uint64_t size_B,
uint64_t fixed_va); uint64_t fixed_va);
void agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind); void agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind);
static inline bool static inline struct drm_asahi_cmd_header
agx_supports_timestamps(const struct agx_device *dev) agx_cmd_header(bool compute, uint16_t barrier_vdm, uint16_t barrier_cdm)
{ {
return (dev->params.feat_compat & DRM_ASAHI_FEAT_USER_TIMESTAMPS); return (struct drm_asahi_cmd_header){
.cmd_type = compute ? DRM_ASAHI_CMD_COMPUTE : DRM_ASAHI_CMD_RENDER,
.size = compute ? sizeof(struct drm_asahi_cmd_compute)
: sizeof(struct drm_asahi_cmd_render),
.vdm_barrier = barrier_vdm,
.cdm_barrier = barrier_cdm,
};
} }

View file

@ -9,7 +9,6 @@
#include <sys/mman.h> #include <sys/mman.h>
#include "drm-uapi/virtgpu_drm.h" #include "drm-uapi/virtgpu_drm.h"
#include "unstable_asahi_drm.h"
#include "vdrm.h" #include "vdrm.h"
@ -67,14 +66,14 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align,
}; };
if (flags & AGX_BO_WRITEBACK) if (flags & AGX_BO_WRITEBACK)
req.flags |= ASAHI_GEM_WRITEBACK; req.flags |= DRM_ASAHI_GEM_WRITEBACK;
uint32_t blob_flags = uint32_t blob_flags =
VIRTGPU_BLOB_FLAG_USE_MAPPABLE | VIRTGPU_BLOB_FLAG_USE_SHAREABLE; VIRTGPU_BLOB_FLAG_USE_MAPPABLE | VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
req.bind_flags = ASAHI_BIND_READ; req.bind_flags = DRM_ASAHI_BIND_READ;
if (!(flags & AGX_BO_READONLY)) { if (!(flags & AGX_BO_READONLY)) {
req.bind_flags |= ASAHI_BIND_WRITE; req.bind_flags |= DRM_ASAHI_BIND_WRITE;
} }
uint32_t blob_id = p_atomic_inc_return(&dev->next_blob_id); uint32_t blob_id = p_atomic_inc_return(&dev->next_blob_id);
@ -86,7 +85,6 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align,
return NULL; return NULL;
} }
/* Note: optional, can zero out for not mapping for sparse */
req.addr = va->addr; req.addr = va->addr;
req.blob_id = blob_id; req.blob_id = blob_id;
req.vm_id = dev->vm_id; req.vm_id = dev->vm_id;
@ -111,55 +109,46 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align,
bo->flags = flags; bo->flags = flags;
bo->handle = handle; bo->handle = handle;
bo->prime_fd = -1; bo->prime_fd = -1;
bo->blob_id = blob_id;
bo->va = va; bo->va = va;
bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, handle); bo->uapi_handle = vdrm_handle_to_res_id(dev->vdrm, handle);
return bo; return bo;
} }
static int static int
agx_virtio_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr, agx_virtio_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
size_t size_B, uint64_t offset_B, uint32_t flags, uint32_t count)
bool unbind)
{ {
struct asahi_ccmd_gem_bind_req req = { size_t payload_size = sizeof(*ops) * count;
.hdr.cmd = ASAHI_CCMD_GEM_BIND, size_t req_len = sizeof(struct asahi_ccmd_vm_bind_req) + payload_size;
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_req), struct asahi_ccmd_vm_bind_req *req = calloc(1, req_len);
.bind = {
.op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND,
.flags = flags,
.vm_id = dev->vm_id,
.handle = bo ? bo->vbo_res_id : 0,
.offset = offset_B,
.range = size_B,
.addr = addr,
}};
int ret = vdrm_send_req(dev->vdrm, &req.hdr, false); *req = (struct asahi_ccmd_vm_bind_req){
.hdr.cmd = ASAHI_CCMD_VM_BIND,
.hdr.len = sizeof(struct asahi_ccmd_vm_bind_req),
.vm_id = dev->vm_id,
.stride = sizeof(*ops),
.count = count,
};
memcpy(req->payload, ops, payload_size);
int ret = vdrm_send_req(dev->vdrm, &req->hdr, false);
if (ret) { if (ret) {
fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d (handle=%d)\n", ret, fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d\n", ret);
bo ? bo->handle : 0);
} }
return ret; return ret;
} }
static int static int
agx_virtio_bo_bind_object(struct agx_device *dev, struct agx_bo *bo, agx_virtio_bo_bind_object(struct agx_device *dev,
uint32_t *object_handle, size_t size_B, struct drm_asahi_gem_bind_object *bind)
uint64_t offset_B, uint32_t flags)
{ {
struct asahi_ccmd_gem_bind_object_req req = { struct asahi_ccmd_gem_bind_object_req req = {
.hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT, .hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT,
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req), .hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req),
.bind = { .bind = *bind,
.op = ASAHI_BIND_OBJECT_OP_BIND, };
.flags = flags,
.vm_id = 0,
.handle = bo->vbo_res_id,
.offset = offset_B,
.range = size_B,
}};
struct asahi_ccmd_gem_bind_object_rsp *rsp; struct asahi_ccmd_gem_bind_object_rsp *rsp;
@ -170,25 +159,23 @@ agx_virtio_bo_bind_object(struct agx_device *dev, struct agx_bo *bo,
if (ret || rsp->ret) { if (ret || rsp->ret) {
fprintf(stderr, fprintf(stderr,
"ASAHI_CCMD_GEM_BIND_OBJECT bind failed: %d:%d (handle=%d)\n", "ASAHI_CCMD_GEM_BIND_OBJECT bind failed: %d:%d (handle=%d)\n",
ret, rsp->ret, bo->handle); ret, rsp->ret, bind->handle);
} }
if (!rsp->ret) if (!rsp->ret)
*object_handle = rsp->object_handle; bind->object_handle = rsp->object_handle;
return rsp->ret; return rsp->ret;
} }
static int static int
agx_virtio_bo_unbind_object(struct agx_device *dev, uint32_t object_handle, agx_virtio_bo_unbind_object(struct agx_device *dev, uint32_t object_handle)
uint32_t flags)
{ {
struct asahi_ccmd_gem_bind_object_req req = { struct asahi_ccmd_gem_bind_object_req req = {
.hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT, .hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT,
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req), .hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req),
.bind = { .bind = {
.op = ASAHI_BIND_OBJECT_OP_UNBIND, .op = DRM_ASAHI_BIND_OBJECT_OP_UNBIND,
.flags = flags,
.object_handle = object_handle, .object_handle = object_handle,
}}; }};
@ -228,89 +215,24 @@ agx_virtio_get_params(struct agx_device *dev, void *buf, size_t size)
sizeof(struct asahi_ccmd_get_params_rsp) + size); sizeof(struct asahi_ccmd_get_params_rsp) + size);
int ret = vdrm_send_req(vdrm, &req.hdr, true); int ret = vdrm_send_req(vdrm, &req.hdr, true);
if (ret) if (!ret)
goto out; return ret;
if (rsp->virt_uabi_version != ASAHI_PROTO_UNSTABLE_UABI_VERSION) {
fprintf(stderr, "Virt UABI mismatch: Host %d, Mesa %d\n",
rsp->virt_uabi_version, ASAHI_PROTO_UNSTABLE_UABI_VERSION);
return -1;
}
ret = rsp->ret; ret = rsp->ret;
if (!ret) { if (ret)
memcpy(buf, &rsp->payload, size); return ret;
return size;
}
out: memcpy(buf, &rsp->payload, size);
return ret; return size;
}
static void
agx_virtio_serialize_attachments(char **ptr, uint64_t attachments,
uint32_t count)
{
if (!count)
return;
size_t attachments_size = sizeof(struct drm_asahi_attachment) * count;
memcpy(*ptr, (char *)(uintptr_t)attachments, attachments_size);
*ptr += attachments_size;
} }
static int static int
agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit, agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
struct agx_submit_virt *virt) struct agx_submit_virt *virt)
{ {
struct drm_asahi_command *commands = struct drm_asahi_sync *syncs =
(struct drm_asahi_command *)(uintptr_t)submit->commands; (struct drm_asahi_sync *)(uintptr_t)submit->syncs;
struct drm_asahi_sync *in_syncs = size_t req_len = sizeof(struct asahi_ccmd_submit_req) + submit->cmdbuf_size;
(struct drm_asahi_sync *)(uintptr_t)submit->in_syncs;
struct drm_asahi_sync *out_syncs =
(struct drm_asahi_sync *)(uintptr_t)submit->out_syncs;
size_t req_len = sizeof(struct asahi_ccmd_submit_req);
for (int i = 0; i < submit->command_count; i++) {
switch (commands[i].cmd_type) {
case DRM_ASAHI_CMD_COMPUTE: {
struct drm_asahi_cmd_compute *compute =
(struct drm_asahi_cmd_compute *)(uintptr_t)commands[i].cmd_buffer;
req_len += sizeof(struct drm_asahi_command) +
sizeof(struct drm_asahi_cmd_compute);
req_len +=
compute->attachment_count * sizeof(struct drm_asahi_attachment);
if (compute->extensions) {
assert(*(uint32_t *)(uintptr_t)compute->extensions ==
ASAHI_COMPUTE_EXT_TIMESTAMPS);
req_len += sizeof(struct drm_asahi_cmd_compute_user_timestamps);
}
break;
}
case DRM_ASAHI_CMD_RENDER: {
struct drm_asahi_cmd_render *render =
(struct drm_asahi_cmd_render *)(uintptr_t)commands[i].cmd_buffer;
req_len += sizeof(struct drm_asahi_command) +
sizeof(struct drm_asahi_cmd_render);
req_len += render->fragment_attachment_count *
sizeof(struct drm_asahi_attachment);
req_len += render->vertex_attachment_count *
sizeof(struct drm_asahi_attachment);
if (render->extensions) {
assert(*(uint32_t *)(uintptr_t)render->extensions ==
ASAHI_RENDER_EXT_TIMESTAMPS);
req_len += sizeof(struct drm_asahi_cmd_render_user_timestamps);
}
break;
}
default:
return EINVAL;
}
}
size_t extres_size = size_t extres_size =
sizeof(struct asahi_ccmd_submit_res) * virt->extres_count; sizeof(struct asahi_ccmd_submit_res) * virt->extres_count;
@ -320,55 +242,13 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
(struct asahi_ccmd_submit_req *)calloc(1, req_len); (struct asahi_ccmd_submit_req *)calloc(1, req_len);
req->queue_id = submit->queue_id; req->queue_id = submit->queue_id;
req->result_res_id = virt->vbo_res_id;
req->command_count = submit->command_count;
req->extres_count = virt->extres_count; req->extres_count = virt->extres_count;
req->cmdbuf_size = submit->cmdbuf_size;
char *ptr = (char *)&req->payload; char *ptr = (char *)&req->payload;
for (int i = 0; i < submit->command_count; i++) { memcpy(ptr, (void *)(uintptr_t)submit->cmdbuf, req->cmdbuf_size);
memcpy(ptr, &commands[i], sizeof(struct drm_asahi_command)); ptr += req->cmdbuf_size;
ptr += sizeof(struct drm_asahi_command);
memcpy(ptr, (char *)(uintptr_t)commands[i].cmd_buffer,
commands[i].cmd_buffer_size);
ptr += commands[i].cmd_buffer_size;
switch (commands[i].cmd_type) {
case DRM_ASAHI_CMD_RENDER: {
struct drm_asahi_cmd_render *render =
(struct drm_asahi_cmd_render *)(uintptr_t)commands[i].cmd_buffer;
agx_virtio_serialize_attachments(&ptr, render->vertex_attachments,
render->vertex_attachment_count);
agx_virtio_serialize_attachments(&ptr, render->fragment_attachments,
render->fragment_attachment_count);
if (render->extensions) {
struct drm_asahi_cmd_render_user_timestamps *ext =
(struct drm_asahi_cmd_render_user_timestamps *)(uintptr_t)
render->extensions;
assert(!ext->next);
memcpy(ptr, (void *)ext, sizeof(*ext));
ptr += sizeof(*ext);
}
break;
}
case DRM_ASAHI_CMD_COMPUTE: {
struct drm_asahi_cmd_compute *compute =
(struct drm_asahi_cmd_compute *)(uintptr_t)commands[i].cmd_buffer;
agx_virtio_serialize_attachments(&ptr, compute->attachments,
compute->attachment_count);
if (compute->extensions) {
struct drm_asahi_cmd_compute_user_timestamps *ext =
(struct drm_asahi_cmd_compute_user_timestamps *)(uintptr_t)
compute->extensions;
assert(!ext->next);
memcpy(ptr, (void *)ext, sizeof(*ext));
ptr += sizeof(*ext);
}
break;
}
}
}
memcpy(ptr, virt->extres, extres_size); memcpy(ptr, virt->extres, extres_size);
ptr += extres_size; ptr += extres_size;
@ -376,18 +256,12 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
req->hdr.cmd = ASAHI_CCMD_SUBMIT; req->hdr.cmd = ASAHI_CCMD_SUBMIT;
req->hdr.len = req_len; req->hdr.len = req_len;
struct drm_virtgpu_execbuffer_syncobj *vdrm_in_syncs = calloc( uint32_t total_syncs = submit->in_sync_count + submit->out_sync_count;
submit->in_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj)); struct drm_virtgpu_execbuffer_syncobj *vdrm_syncs =
for (int i = 0; i < submit->in_sync_count; i++) { calloc(total_syncs, sizeof(struct drm_virtgpu_execbuffer_syncobj));
vdrm_in_syncs[i].handle = in_syncs[i].handle; for (int i = 0; i < total_syncs; i++) {
vdrm_in_syncs[i].point = in_syncs[i].timeline_value; vdrm_syncs[i].handle = syncs[i].handle;
} vdrm_syncs[i].point = syncs[i].timeline_value;
struct drm_virtgpu_execbuffer_syncobj *vdrm_out_syncs = calloc(
submit->out_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj));
for (int i = 0; i < submit->out_sync_count; i++) {
vdrm_out_syncs[i].handle = out_syncs[i].handle;
vdrm_out_syncs[i].point = out_syncs[i].timeline_value;
} }
struct vdrm_execbuf_params p = { struct vdrm_execbuf_params p = {
@ -395,15 +269,14 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
.ring_idx = 1, .ring_idx = 1,
.req = &req->hdr, .req = &req->hdr,
.num_in_syncobjs = submit->in_sync_count, .num_in_syncobjs = submit->in_sync_count,
.in_syncobjs = vdrm_in_syncs, .in_syncobjs = vdrm_syncs,
.num_out_syncobjs = submit->out_sync_count, .num_out_syncobjs = submit->out_sync_count,
.out_syncobjs = vdrm_out_syncs, .out_syncobjs = vdrm_syncs + submit->in_sync_count,
}; };
int ret = vdrm_execbuf(dev->vdrm, &p); int ret = vdrm_execbuf(dev->vdrm, &p);
free(vdrm_out_syncs); free(vdrm_syncs);
free(vdrm_in_syncs);
free(req); free(req);
return ret; return ret;
} }

View file

@ -56,7 +56,7 @@ agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind)
return; return;
if (unbind) { if (unbind) {
dev->ops.bo_bind(dev, NULL, va->addr, va->size_B, 0, 0, true); agx_bo_bind(dev, NULL, va->addr, va->size_B, 0, DRM_ASAHI_BIND_UNBIND);
} }
struct util_vma_heap *heap = agx_vma_heap(dev, va->flags); struct util_vma_heap *heap = agx_vma_heap(dev, va->flags);

View file

@ -7,7 +7,7 @@
#ifndef ASAHI_PROTO_H_ #ifndef ASAHI_PROTO_H_
#define ASAHI_PROTO_H_ #define ASAHI_PROTO_H_
#define ASAHI_PROTO_UNSTABLE_UABI_VERSION 1 #include "drm-uapi/asahi_drm.h"
/** /**
* Defines the layout of shmem buffer used for host->guest communication. * Defines the layout of shmem buffer used for host->guest communication.
@ -37,7 +37,7 @@ enum asahi_ccmd {
ASAHI_CCMD_IOCTL_SIMPLE, ASAHI_CCMD_IOCTL_SIMPLE,
ASAHI_CCMD_GET_PARAMS, ASAHI_CCMD_GET_PARAMS,
ASAHI_CCMD_GEM_NEW, ASAHI_CCMD_GEM_NEW,
ASAHI_CCMD_GEM_BIND, ASAHI_CCMD_VM_BIND,
ASAHI_CCMD_SUBMIT, ASAHI_CCMD_SUBMIT,
ASAHI_CCMD_GEM_BIND_OBJECT, ASAHI_CCMD_GEM_BIND_OBJECT,
}; };
@ -93,7 +93,6 @@ DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_get_params_req)
struct asahi_ccmd_get_params_rsp { struct asahi_ccmd_get_params_rsp {
struct vdrm_ccmd_rsp hdr; struct vdrm_ccmd_rsp hdr;
int32_t ret; int32_t ret;
uint32_t virt_uabi_version;
uint8_t payload[]; uint8_t payload[];
}; };
@ -108,11 +107,14 @@ struct asahi_ccmd_gem_new_req {
}; };
DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_new_req) DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_new_req)
struct asahi_ccmd_gem_bind_req { struct asahi_ccmd_vm_bind_req {
struct vdrm_ccmd_req hdr; struct vdrm_ccmd_req hdr;
struct drm_asahi_gem_bind bind; uint32_t vm_id;
uint32_t stride;
uint32_t count;
uint8_t payload[];
}; };
DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_bind_req) DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_vm_bind_req)
struct asahi_ccmd_gem_bind_object_req { struct asahi_ccmd_gem_bind_object_req {
struct vdrm_ccmd_req hdr; struct vdrm_ccmd_req hdr;
@ -138,8 +140,7 @@ struct asahi_ccmd_submit_req {
struct vdrm_ccmd_req hdr; struct vdrm_ccmd_req hdr;
uint32_t flags; uint32_t flags;
uint32_t queue_id; uint32_t queue_id;
uint32_t result_res_id; uint32_t cmdbuf_size;
uint32_t command_count;
uint32_t extres_count; uint32_t extres_count;
uint8_t payload[]; uint8_t payload[];

View file

@ -18,7 +18,6 @@
#include "util/u_hexdump.h" #include "util/u_hexdump.h"
#include "decode.h" #include "decode.h"
#include "unstable_asahi_drm.h"
struct libagxdecode_config lib_config; struct libagxdecode_config lib_config;
@ -801,106 +800,119 @@ agxdecode_helper(struct agxdecode_ctx *ctx, const char *prefix, uint64_t helper)
} }
} }
void static void
agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx, agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx,
struct drm_asahi_params_global *params, struct drm_asahi_params_global *params,
struct drm_asahi_cmd_render *c, bool verbose) struct drm_asahi_cmd_render *c, bool verbose)
{ {
agxdecode_dump_file_open(); DUMP_FIELD(c, "%x", flags);
DUMP_FIELD(c, "0x%llx", vdm_ctrl_stream_base);
DUMP_FIELD(c, "%llx", flags); agxdecode_stateful(ctx, c->vdm_ctrl_stream_base, "Encoder", agxdecode_vdm,
DUMP_FIELD(c, "0x%llx", encoder_ptr); verbose, params, NULL);
agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_vdm, verbose,
params, NULL);
DUMP_FIELD(c, "0x%x", encoder_id);
DUMP_FIELD(c, "0x%x", cmd_ta_id);
DUMP_FIELD(c, "0x%x", cmd_3d_id);
DUMP_FIELD(c, "0x%x", ppp_ctrl); DUMP_FIELD(c, "0x%x", ppp_ctrl);
DUMP_FIELD(c, "0x%llx", ppp_multisamplectl); DUMP_FIELD(c, "0x%llx", ppp_multisamplectl);
DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control"); DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control");
DUMP_FIELD(c, "0x%llx", depth_buffer_load); DUMP_FIELD(c, "0x%llx", depth.base);
DUMP_FIELD(c, "0x%llx", depth_buffer_store); DUMP_FIELD(c, "0x%llx", depth.comp_base);
DUMP_FIELD(c, "0x%llx", depth_buffer_partial); DUMP_FIELD(c, "%u", depth.stride);
DUMP_FIELD(c, "0x%llx", stencil_buffer_load); DUMP_FIELD(c, "%u", depth.comp_stride);
DUMP_FIELD(c, "0x%llx", stencil_buffer_store); DUMP_FIELD(c, "0x%llx", stencil.base);
DUMP_FIELD(c, "0x%llx", stencil_buffer_partial); DUMP_FIELD(c, "0x%llx", stencil.comp_base);
DUMP_FIELD(c, "0x%llx", scissor_array); DUMP_FIELD(c, "%u", stencil.stride);
DUMP_FIELD(c, "0x%llx", depth_bias_array); DUMP_FIELD(c, "%u", stencil.comp_stride);
DUMP_FIELD(c, "%d", fb_width); DUMP_FIELD(c, "0x%llx", isp_scissor_base);
DUMP_FIELD(c, "%d", fb_height); DUMP_FIELD(c, "0x%llx", isp_dbias_base);
DUMP_FIELD(c, "%d", width_px);
DUMP_FIELD(c, "%d", height_px);
DUMP_FIELD(c, "%d", layers); DUMP_FIELD(c, "%d", layers);
DUMP_FIELD(c, "%d", samples); DUMP_FIELD(c, "%d", samples);
DUMP_FIELD(c, "%d", sample_size); DUMP_FIELD(c, "%d", sample_size_B);
DUMP_FIELD(c, "%d", tib_blocks); DUMP_FIELD(c, "%d", utile_width_px);
DUMP_FIELD(c, "%d", utile_width); DUMP_FIELD(c, "%d", utile_height_px);
DUMP_FIELD(c, "%d", utile_height); DUMP_FIELD(c, "0x%x", bg.usc);
DUMP_FIELD(c, "0x%x", load_pipeline); DUMP_FIELD(c, "0x%x", bg.rsrc_spec);
DUMP_FIELD(c, "0x%x", load_pipeline_bind); agxdecode_stateful(ctx, decode_usc(ctx, c->bg.usc & ~0x7), "Load pipeline",
agxdecode_stateful(ctx, decode_usc(ctx, c->load_pipeline & ~0x7), agxdecode_usc, verbose, params, NULL);
"Load pipeline", agxdecode_usc, verbose, params, NULL); DUMP_FIELD(c, "0x%x", eot.usc);
DUMP_FIELD(c, "0x%x", store_pipeline); DUMP_FIELD(c, "0x%x", eot.rsrc_spec);
DUMP_FIELD(c, "0x%x", store_pipeline_bind); agxdecode_stateful(ctx, decode_usc(ctx, c->eot.usc & ~0x7), "Store pipeline",
agxdecode_stateful(ctx, decode_usc(ctx, c->store_pipeline & ~0x7), agxdecode_usc, verbose, params, NULL);
"Store pipeline", agxdecode_usc, verbose, params, NULL); DUMP_FIELD(c, "0x%x", partial_bg.usc);
DUMP_FIELD(c, "0x%x", partial_reload_pipeline); DUMP_FIELD(c, "0x%x", partial_bg.rsrc_spec);
DUMP_FIELD(c, "0x%x", partial_reload_pipeline_bind); agxdecode_stateful(ctx, decode_usc(ctx, c->partial_bg.usc & ~0x7),
agxdecode_stateful(ctx, decode_usc(ctx, c->partial_reload_pipeline & ~0x7),
"Partial reload pipeline", agxdecode_usc, verbose, params, "Partial reload pipeline", agxdecode_usc, verbose, params,
NULL); NULL);
DUMP_FIELD(c, "0x%x", partial_store_pipeline); DUMP_FIELD(c, "0x%x", partial_eot.usc);
DUMP_FIELD(c, "0x%x", partial_store_pipeline_bind); DUMP_FIELD(c, "0x%x", partial_eot.rsrc_spec);
agxdecode_stateful(ctx, decode_usc(ctx, c->partial_store_pipeline & ~0x7), agxdecode_stateful(ctx, decode_usc(ctx, c->partial_eot.usc & ~0x7),
"Partial store pipeline", agxdecode_usc, verbose, params, "Partial store pipeline", agxdecode_usc, verbose, params,
NULL); NULL);
DUMP_FIELD(c, "0x%x", depth_dimensions); DUMP_FIELD(c, "0x%x", isp_zls_pixels);
DUMP_FIELD(c, "0x%x", isp_bgobjdepth); DUMP_FIELD(c, "0x%x", isp_bgobjdepth);
DUMP_FIELD(c, "0x%x", isp_bgobjvals); DUMP_FIELD(c, "0x%x", isp_bgobjvals);
agxdecode_sampler_heap(ctx, c->vertex_sampler_array, agxdecode_sampler_heap(ctx, c->sampler_heap, c->sampler_count);
c->vertex_sampler_count);
/* Linux driver doesn't use this, at least for now */ agxdecode_helper(ctx, "Vertex", c->vertex_helper.binary);
assert(c->fragment_sampler_array == c->vertex_sampler_array); agxdecode_helper(ctx, "Fragment", c->fragment_helper.binary);
assert(c->fragment_sampler_count == c->vertex_sampler_count);
DUMP_FIELD(c, "%d", vertex_attachment_count);
struct drm_asahi_attachment *vertex_attachments =
(void *)(uintptr_t)c->vertex_attachments;
for (unsigned i = 0; i < c->vertex_attachment_count; i++) {
DUMP_FIELD((&vertex_attachments[i]), "0x%x", order);
DUMP_FIELD((&vertex_attachments[i]), "0x%llx", size);
DUMP_FIELD((&vertex_attachments[i]), "0x%llx", pointer);
}
DUMP_FIELD(c, "%d", fragment_attachment_count);
struct drm_asahi_attachment *fragment_attachments =
(void *)(uintptr_t)c->fragment_attachments;
for (unsigned i = 0; i < c->fragment_attachment_count; i++) {
DUMP_FIELD((&fragment_attachments[i]), "0x%x", order);
DUMP_FIELD((&fragment_attachments[i]), "0x%llx", size);
DUMP_FIELD((&fragment_attachments[i]), "0x%llx", pointer);
}
agxdecode_helper(ctx, "Vertex", c->vertex_helper_program);
agxdecode_helper(ctx, "Fragment", c->fragment_helper_program);
} }
void static void
agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx, agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx,
struct drm_asahi_params_global *params, struct drm_asahi_params_global *params,
struct drm_asahi_cmd_compute *c, bool verbose) struct drm_asahi_cmd_compute *c, bool verbose)
{
DUMP_FIELD(c, "%x", flags);
DUMP_FIELD(c, "0x%llx", cdm_ctrl_stream_base);
agxdecode_stateful(ctx, c->cdm_ctrl_stream_base, "Encoder", agxdecode_cdm,
verbose, params, NULL);
agxdecode_sampler_heap(ctx, c->sampler_heap, c->sampler_count);
agxdecode_helper(ctx, "Compute", c->helper.binary);
}
static void
agxdecode_drm_attachments(const char *name, struct drm_asahi_attachment *atts,
size_t size)
{
fprintf(agxdecode_dump_stream, "%s attachments:\n", name);
unsigned count = size / sizeof(struct drm_asahi_attachment);
for (unsigned i = 0; i < count; i++) {
DUMP_FIELD((&atts[i]), "0x%llx", size);
DUMP_FIELD((&atts[i]), "0x%llx", pointer);
}
}
void
agxdecode_drm_cmdbuf(struct agxdecode_ctx *ctx,
struct drm_asahi_params_global *params,
struct util_dynarray *cmdbuf, bool verbose)
{ {
agxdecode_dump_file_open(); agxdecode_dump_file_open();
DUMP_FIELD(c, "%llx", flags); for (unsigned offs = 0; offs < cmdbuf->size;) {
DUMP_FIELD(c, "0x%llx", encoder_ptr); struct drm_asahi_cmd_header *header =
agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_cdm, verbose, (void *)((uint8_t *)cmdbuf->data) + offs;
params, NULL); offs += sizeof(*header);
DUMP_FIELD(c, "0x%x", encoder_id); void *data = (void *)((uint8_t *)cmdbuf->data) + offs;
DUMP_FIELD(c, "0x%x", cmd_id);
agxdecode_sampler_heap(ctx, c->sampler_array, c->sampler_count); if (header->cmd_type == DRM_ASAHI_CMD_RENDER) {
agxdecode_helper(ctx, "Compute", c->helper_program); agxdecode_drm_cmd_render(ctx, params, data, verbose);
} else if (header->cmd_type == DRM_ASAHI_CMD_COMPUTE) {
agxdecode_drm_cmd_compute(ctx, params, data, verbose);
} else if (header->cmd_type == DRM_ASAHI_SET_VERTEX_ATTACHMENTS) {
agxdecode_drm_attachments("Vertex", data, header->size);
} else if (header->cmd_type == DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS) {
agxdecode_drm_attachments("Fragment", data, header->size);
} else if (header->cmd_type == DRM_ASAHI_SET_COMPUTE_ATTACHMENTS) {
agxdecode_drm_attachments("Compute", data, header->size);
} else {
unreachable("Invalid command type");
}
offs += header->size;
}
} }
static void static void

View file

@ -10,7 +10,7 @@
#include <sys/types.h> #include <sys/types.h>
#include "agx_bo.h" #include "agx_bo.h"
#include "unstable_asahi_drm.h" #include "drm-uapi/asahi_drm.h"
struct agxdecode_ctx; struct agxdecode_ctx;
@ -28,15 +28,11 @@ void agxdecode_cmdstream(struct agxdecode_ctx *ctx, unsigned cmdbuf_index,
void agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap, void agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap,
unsigned nr_entries); unsigned nr_entries);
void agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx, struct util_dynarray;
struct drm_asahi_params_global *params,
struct drm_asahi_cmd_render *cmdbuf,
bool verbose);
void agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx, void agxdecode_drm_cmdbuf(struct agxdecode_ctx *ctx,
struct drm_asahi_params_global *params, struct drm_asahi_params_global *params,
struct drm_asahi_cmd_compute *cmdbuf, struct util_dynarray *cmdbuf, bool verbose);
bool verbose);
void agxdecode_dump_file_open(void); void agxdecode_dump_file_open(void);

View file

@ -81,17 +81,9 @@ VkResult
hk_bind_scratch(struct hk_device *dev, struct agx_va *va, unsigned offset_B, hk_bind_scratch(struct hk_device *dev, struct agx_va *va, unsigned offset_B,
size_t size_B) size_t size_B)
{ {
VkResult result = VK_SUCCESS; return agx_bo_bind(
&dev->dev, dev->sparse.write, va->addr + offset_B, size_B, 0,
for (unsigned i = 0; i < size_B; i += AIL_PAGESIZE) { DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE | DRM_ASAHI_BIND_SINGLE_PAGE);
result = dev->dev.ops.bo_bind(&dev->dev, dev->sparse.write,
va->addr + offset_B + i, AIL_PAGESIZE, 0,
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
if (result != VK_SUCCESS)
return result;
}
return result;
} }
VKAPI_ATTR VkResult VKAPI_CALL VKAPI_ATTR VkResult VKAPI_CALL
@ -253,11 +245,9 @@ hk_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
if (buffer->va) { if (buffer->va) {
VK_FROM_HANDLE(hk_device, dev, device); VK_FROM_HANDLE(hk_device, dev, device);
size_t size = MIN2(mem->bo->size, buffer->va->size_B); size_t size = MIN2(mem->bo->size, buffer->va->size_B);
int ret = int ret = agx_bo_bind(&dev->dev, mem->bo, buffer->vk.device_address,
dev->dev.ops.bo_bind(&dev->dev, mem->bo, buffer->vk.device_address, size, pBindInfos[i].memoryOffset,
size, pBindInfos[i].memoryOffset, DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE);
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
if (ret) if (ret)
return VK_ERROR_UNKNOWN; return VK_ERROR_UNKNOWN;
} else { } else {

View file

@ -151,7 +151,6 @@ struct hk_render_registers {
uint32_t isp_bgobjdepth; uint32_t isp_bgobjdepth;
uint32_t isp_bgobjvals; uint32_t isp_bgobjvals;
struct agx_zls_control_packed zls_control, zls_control_partial; struct agx_zls_control_packed zls_control, zls_control_partial;
uint32_t iogpu_unk_214;
uint32_t depth_dimensions; uint32_t depth_dimensions;
bool process_empty_tiles; bool process_empty_tiles;
enum u_tristate dbias_is_int; enum u_tristate dbias_is_int;

View file

@ -680,8 +680,6 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
const VkRenderingAttachmentInfo *attach_s = const VkRenderingAttachmentInfo *attach_s =
pRenderingInfo->pStencilAttachment; pRenderingInfo->pStencilAttachment;
render->cr.iogpu_unk_214 = 0xc000;
struct ail_layout *z_layout = NULL, *s_layout = NULL; struct ail_layout *z_layout = NULL, *s_layout = NULL;
if (attach_z != NULL && attach_z != VK_NULL_HANDLE && attach_z->imageView) { if (attach_z != NULL && attach_z != VK_NULL_HANDLE && attach_z->imageView) {

View file

@ -285,7 +285,7 @@ static VkResult
hk_get_timestamp(struct vk_device *device, uint64_t *timestamp) hk_get_timestamp(struct vk_device *device, uint64_t *timestamp)
{ {
struct hk_device *dev = container_of(device, struct hk_device, vk); struct hk_device *dev = container_of(device, struct hk_device, vk);
*timestamp = agx_gpu_time_to_ns(&dev->dev, agx_get_gpu_timestamp(&dev->dev)); *timestamp = agx_get_gpu_timestamp(&dev->dev);
return VK_SUCCESS; return VK_SUCCESS;
} }

View file

@ -52,7 +52,7 @@ hk_memory_type_flags(const VkMemoryType *type,
static void static void
hk_add_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo) hk_add_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo)
{ {
uint32_t id = bo->vbo_res_id; uint32_t id = bo->uapi_handle;
unsigned count = util_dynarray_num_elements(&dev->external_bos.list, unsigned count = util_dynarray_num_elements(&dev->external_bos.list,
struct asahi_ccmd_submit_res); struct asahi_ccmd_submit_res);
@ -89,7 +89,7 @@ hk_add_ext_bo(struct hk_device *dev, struct agx_bo *bo)
static void static void
hk_remove_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo) hk_remove_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo)
{ {
uint32_t id = bo->vbo_res_id; uint32_t id = bo->uapi_handle;
unsigned count = util_dynarray_num_elements(&dev->external_bos.list, unsigned count = util_dynarray_num_elements(&dev->external_bos.list,
struct asahi_ccmd_submit_res); struct asahi_ccmd_submit_res);

View file

@ -23,7 +23,6 @@
#include "util/simple_mtx.h" #include "util/simple_mtx.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
#include "vulkan/wsi/wsi_common.h" #include "vulkan/wsi/wsi_common.h"
#include "unstable_asahi_drm.h"
#include "vk_drm_syncobj.h" #include "vk_drm_syncobj.h"
#include "vk_shader_module.h" #include "vk_shader_module.h"
@ -274,13 +273,10 @@ hk_get_device_features(
.sparseResidencyAliased = true, .sparseResidencyAliased = true,
.sparseResidencyImage2D = true, .sparseResidencyImage2D = true,
/* We depend on soft fault to implement sparse residency on buffers with /* TODO: We need to implement sparse buffer without soft fault to avoid
* the appropriate semantics. Lifting this requirement would be possible * tying our hands later.
* but challenging, given the requirements imposed by
* sparseResidencyNonResidentStrict.
*/ */
.sparseResidencyBuffer = .sparseResidencyBuffer = false,
(dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS),
/* This needs investigation. */ /* This needs investigation. */
.sparseResidencyImage3D = false, .sparseResidencyImage3D = false,
@ -743,7 +739,7 @@ hk_get_device_properties(const struct agx_device *dev,
.sampledImageStencilSampleCounts = sample_counts, .sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = sample_counts, .storageImageSampleCounts = sample_counts,
.maxSampleMaskWords = 1, .maxSampleMaskWords = 1,
.timestampComputeAndGraphics = agx_supports_timestamps(dev), .timestampComputeAndGraphics = true,
/* FIXME: Is timestamp period actually 1? */ /* FIXME: Is timestamp period actually 1? */
.timestampPeriod = 1.0f, .timestampPeriod = 1.0f,
.maxClipDistances = 8, .maxClipDistances = 8,
@ -1141,9 +1137,6 @@ hk_create_drm_physical_device(struct vk_instance *_instance,
struct hk_instance *instance = (struct hk_instance *)_instance; struct hk_instance *instance = (struct hk_instance *)_instance;
VkResult result; VkResult result;
/* Blanket refusal to probe due to unstable UAPI. */
return VK_ERROR_INCOMPATIBLE_DRIVER;
if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) || if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
drm_device->bustype != DRM_BUS_PLATFORM) drm_device->bustype != DRM_BUS_PLATFORM)
return VK_ERROR_INCOMPATIBLE_DRIVER; return VK_ERROR_INCOMPATIBLE_DRIVER;
@ -1433,8 +1426,7 @@ hk_GetPhysicalDeviceQueueFamilyProperties2(
{ {
p->queueFamilyProperties.queueFlags = queue_family->queue_flags; p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
p->queueFamilyProperties.queueCount = queue_family->queue_count; p->queueFamilyProperties.queueCount = queue_family->queue_count;
p->queueFamilyProperties.timestampValidBits = p->queueFamilyProperties.timestampValidBits = 64;
agx_supports_timestamps(&pdev->dev) ? 64 : 0;
p->queueFamilyProperties.minImageTransferGranularity = p->queueFamilyProperties.minImageTransferGranularity =
(VkExtent3D){1, 1, 1}; (VkExtent3D){1, 1, 1};

View file

@ -24,6 +24,7 @@
#include "compiler/nir/nir.h" #include "compiler/nir/nir.h"
#include "compiler/nir/nir_builder.h" #include "compiler/nir/nir_builder.h"
#include "drm-uapi/asahi_drm.h"
#include "util/os_time.h" #include "util/os_time.h"
#include "util/u_dynarray.h" #include "util/u_dynarray.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
@ -88,10 +89,6 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
bool timestamp = pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP; bool timestamp = pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP;
unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0; unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0;
/* Workaround for DXVK on old kernels */
if (!agx_supports_timestamps(&dev->dev))
timestamp = false;
pool = pool =
vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool)); vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool));
if (!pool) if (!pool)
@ -131,10 +128,7 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
* them. * them.
*/ */
if (timestamp) { if (timestamp) {
int ret = dev->dev.ops.bo_bind_object( int ret = agx_bind_timestamps(&dev->dev, pool->bo, &pool->handle);
&dev->dev, pool->bo, &pool->handle, pool->bo->size, 0,
ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS);
if (ret) { if (ret) {
hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool), hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool),
pAllocator); pAllocator);
@ -186,7 +180,7 @@ hk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool,
} }
if (pool->handle) if (pool->handle)
dev->dev.ops.bo_unbind_object(&dev->dev, pool->handle, 0); dev->dev.ops.bo_unbind_object(&dev->dev, pool->handle);
agx_bo_unreference(&dev->dev, pool->bo); agx_bo_unreference(&dev->dev, pool->bo);
vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk); vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
@ -390,12 +384,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
VK_FROM_HANDLE(hk_query_pool, pool, queryPool); VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
struct hk_device *dev = hk_cmd_buffer_device(cmd); struct hk_device *dev = hk_cmd_buffer_device(cmd);
/* Workaround for DXVK on old kernels */
if (!agx_supports_timestamps(&dev->dev))
return;
uint64_t report_addr = hk_query_report_addr(dev, pool, query); uint64_t report_addr = hk_query_report_addr(dev, pool, query);
bool after_gfx = cmd->current_cs.gfx != NULL; bool after_gfx = cmd->current_cs.gfx != NULL;
/* When writing timestamps for compute, we split the control stream at each /* When writing timestamps for compute, we split the control stream at each

View file

@ -26,9 +26,9 @@
#include "hk_physical_device.h" #include "hk_physical_device.h"
#include <xf86drm.h> #include <xf86drm.h>
#include "asahi/lib/unstable_asahi_drm.h"
#include "util/list.h" #include "util/list.h"
#include "util/macros.h" #include "util/macros.h"
#include "util/u_dynarray.h"
#include "vulkan/vulkan_core.h" #include "vulkan/vulkan_core.h"
#include "hk_private.h" #include "hk_private.h"
@ -78,67 +78,39 @@ queue_submit_empty(struct hk_device *dev, struct hk_queue *queue,
static void static void
asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs, asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
struct drm_asahi_cmd_compute *cmd, struct drm_asahi_cmd_compute *cmd)
struct drm_asahi_cmd_compute_user_timestamps *timestamps)
{ {
size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start); size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start);
*cmd = (struct drm_asahi_cmd_compute){ *cmd = (struct drm_asahi_cmd_compute){
.encoder_ptr = cs->addr, .cdm_ctrl_stream_base = cs->addr,
.encoder_end = cs->addr + len, .cdm_ctrl_stream_end = cs->addr + len,
.sampler_array = dev->samplers.table.bo->va->addr, .sampler_heap = dev->samplers.table.bo->va->addr,
.sampler_count = dev->samplers.table.alloc, .sampler_count = dev->samplers.table.alloc,
.sampler_max = dev->samplers.table.alloc + 1,
.usc_base = dev->dev.shader_base, .ts.end.handle = cs->timestamp.end.handle,
.ts.end.offset = cs->timestamp.end.offset_B,
.encoder_id = agx_get_global_id(&dev->dev),
.cmd_id = agx_get_global_id(&dev->dev),
.unk_mask = 0xffffffff,
}; };
if (cs->timestamp.end.handle) {
assert(agx_supports_timestamps(&dev->dev));
*timestamps = (struct drm_asahi_cmd_compute_user_timestamps){
.type = ASAHI_COMPUTE_EXT_TIMESTAMPS,
.end_handle = cs->timestamp.end.handle,
.end_offset = cs->timestamp.end.offset_B,
};
cmd->extensions = (uint64_t)(uintptr_t)timestamps;
}
if (cs->scratch.cs.main || cs->scratch.cs.preamble) { if (cs->scratch.cs.main || cs->scratch.cs.preamble) {
cmd->helper_arg = dev->scratch.cs.buf->va->addr; cmd->helper.data = dev->scratch.cs.buf->va->addr;
cmd->helper_cfg = cs->scratch.cs.preamble ? (1 << 16) : 0; cmd->helper.cfg = cs->scratch.cs.preamble ? (1 << 16) : 0;
cmd->helper_program = agx_helper_program(&dev->bg_eot); cmd->helper.binary = agx_helper_program(&dev->bg_eot);
} }
} }
static void static void
asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs, asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
struct drm_asahi_cmd_render *c, struct drm_asahi_cmd_render *c)
struct drm_asahi_cmd_render_user_timestamps *timestamps)
{ {
unsigned cmd_ta_id = agx_get_global_id(&dev->dev);
unsigned cmd_3d_id = agx_get_global_id(&dev->dev);
unsigned encoder_id = agx_get_global_id(&dev->dev);
memset(c, 0, sizeof(*c)); memset(c, 0, sizeof(*c));
c->encoder_ptr = cs->addr; c->vdm_ctrl_stream_base = cs->addr;
c->encoder_id = encoder_id;
c->cmd_3d_id = cmd_3d_id;
c->cmd_ta_id = cmd_ta_id;
c->ppp_ctrl = 0x202; c->ppp_ctrl = 0x202;
c->fragment_usc_base = dev->dev.shader_base; c->width_px = cs->cr.width;
c->vertex_usc_base = c->fragment_usc_base; c->height_px = cs->cr.height;
c->fb_width = cs->cr.width;
c->fb_height = cs->cr.height;
c->isp_bgobjdepth = cs->cr.isp_bgobjdepth; c->isp_bgobjdepth = cs->cr.isp_bgobjdepth;
c->isp_bgobjvals = cs->cr.isp_bgobjvals; c->isp_bgobjvals = cs->cr.isp_bgobjvals;
@ -146,65 +118,30 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control)); static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control));
memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control)); memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control));
c->depth_dimensions = agx_pack(&c->isp_zls_pixels, CR_ISP_ZLS_PIXELS, cfg) {
(cs->cr.zls_width - 1) | ((cs->cr.zls_height - 1) << 15); cfg.x = cs->cr.zls_width;
cfg.y = cs->cr.zls_height;
}
c->depth_buffer_load = cs->cr.depth.buffer; c->depth.base = cs->cr.depth.buffer;
c->depth_buffer_store = cs->cr.depth.buffer; c->depth.stride = cs->cr.depth.stride;
c->depth_buffer_partial = cs->cr.depth.buffer; c->depth.comp_base = cs->cr.depth.meta;
c->depth.comp_stride = cs->cr.depth.meta_stride;
c->depth_buffer_load_stride = cs->cr.depth.stride; c->stencil.base = cs->cr.stencil.buffer;
c->depth_buffer_store_stride = cs->cr.depth.stride; c->stencil.stride = cs->cr.stencil.stride;
c->depth_buffer_partial_stride = cs->cr.depth.stride; c->stencil.comp_base = cs->cr.stencil.meta;
c->stencil.comp_stride = cs->cr.stencil.meta_stride;
c->depth_meta_buffer_load = cs->cr.depth.meta;
c->depth_meta_buffer_store = cs->cr.depth.meta;
c->depth_meta_buffer_partial = cs->cr.depth.meta;
c->depth_meta_buffer_load_stride = cs->cr.depth.stride;
c->depth_meta_buffer_store_stride = cs->cr.depth.meta_stride;
c->depth_meta_buffer_partial_stride = cs->cr.depth.meta_stride;
c->stencil_buffer_load = cs->cr.stencil.buffer;
c->stencil_buffer_store = cs->cr.stencil.buffer;
c->stencil_buffer_partial = cs->cr.stencil.buffer;
c->stencil_buffer_load_stride = cs->cr.stencil.stride;
c->stencil_buffer_store_stride = cs->cr.stencil.stride;
c->stencil_buffer_partial_stride = cs->cr.stencil.stride;
c->stencil_meta_buffer_load = cs->cr.stencil.meta;
c->stencil_meta_buffer_store = cs->cr.stencil.meta;
c->stencil_meta_buffer_partial = cs->cr.stencil.meta;
c->stencil_meta_buffer_load_stride = cs->cr.stencil.stride;
c->stencil_meta_buffer_store_stride = cs->cr.stencil.meta_stride;
c->stencil_meta_buffer_partial_stride = cs->cr.stencil.meta_stride;
c->iogpu_unk_214 = cs->cr.iogpu_unk_214;
if (cs->cr.dbias_is_int == U_TRISTATE_YES) { if (cs->cr.dbias_is_int == U_TRISTATE_YES) {
c->iogpu_unk_214 |= 0x40000; c->flags |= DRM_ASAHI_RENDER_DBIAS_IS_INT;
} }
if (dev->dev.debug & AGX_DBG_NOCLUSTER) { if (dev->dev.debug & AGX_DBG_NOCLUSTER) {
c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING; c->flags |= DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING;
} else {
/* XXX: We don't know what this does exactly, and the name is
* surely wrong. But it fixes dEQP-VK.memory.pipeline_barrier.* tests on
* G14C when clustering is enabled...
*/
c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
} }
#if 0 c->utile_width_px = cs->tib.tile_size.width;
/* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */ c->utile_height_px = cs->tib.tile_size.height;
if (tib->nr_samples > 1 && framebuffer->zsbuf)
c->flags |= ASAHI_RENDER_MSAA_ZS;
#endif
c->utile_width = cs->tib.tile_size.width;
c->utile_height = cs->tib.tile_size.height;
/* Can be 0 for attachmentless rendering with no draws */ /* Can be 0 for attachmentless rendering with no draws */
c->samples = MAX2(cs->tib.nr_samples, 1); c->samples = MAX2(cs->tib.nr_samples, 1);
@ -217,75 +154,62 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
* *
* XXX: Hack for vkd3d-proton. * XXX: Hack for vkd3d-proton.
*/ */
if (c->layers == 2048 && c->fb_width == 16384 && c->fb_height == 16384) { if (c->layers == 2048 && c->width_px == 16384 && c->height_px == 16384) {
mesa_log(MESA_LOG_WARN, MESA_LOG_TAG, "Clamping massive framebuffer"); mesa_log(MESA_LOG_WARN, MESA_LOG_TAG, "Clamping massive framebuffer");
c->layers = 32; c->layers = 32;
} }
c->ppp_multisamplectl = cs->ppp_multisamplectl; c->ppp_multisamplectl = cs->ppp_multisamplectl;
c->sample_size = cs->tib.sample_size_B; c->sample_size_B = cs->tib.sample_size_B;
c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(&cs->tib), 2048) / 2048;
float tan_60 = 1.732051f; float tan_60 = 1.732051f;
c->merge_upper_x = fui(tan_60 / cs->cr.width); c->isp_merge_upper_x = fui(tan_60 / cs->cr.width);
c->merge_upper_y = fui(tan_60 / cs->cr.height); c->isp_merge_upper_y = fui(tan_60 / cs->cr.height);
c->load_pipeline = cs->cr.bg.main.usc | 4; c->bg.usc = cs->cr.bg.main.usc | 4;
c->store_pipeline = cs->cr.eot.main.usc | 4; c->eot.usc = cs->cr.eot.main.usc | 4;
c->partial_reload_pipeline = cs->cr.bg.partial.usc | 4; c->partial_bg.usc = cs->cr.bg.partial.usc | 4;
c->partial_store_pipeline = cs->cr.eot.partial.usc | 4; c->partial_eot.usc = cs->cr.eot.partial.usc | 4;
memcpy(&c->load_pipeline_bind, &cs->cr.bg.main.counts, memcpy(&c->bg.rsrc_spec, &cs->cr.bg.main.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
memcpy(&c->store_pipeline_bind, &cs->cr.eot.main.counts, memcpy(&c->eot.rsrc_spec, &cs->cr.eot.main.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
memcpy(&c->partial_reload_pipeline_bind, &cs->cr.bg.partial.counts, memcpy(&c->partial_bg.rsrc_spec, &cs->cr.bg.partial.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
memcpy(&c->partial_store_pipeline_bind, &cs->cr.eot.partial.counts, memcpy(&c->partial_eot.rsrc_spec, &cs->cr.eot.partial.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
c->scissor_array = cs->uploaded_scissor; c->isp_scissor_base = cs->uploaded_scissor;
c->depth_bias_array = cs->uploaded_zbias; c->isp_dbias_base = cs->uploaded_zbias;
c->vertex_sampler_array = dev->samplers.table.bo->va->addr; c->sampler_heap = dev->samplers.table.bo->va->addr;
c->vertex_sampler_count = dev->samplers.table.alloc; c->sampler_count = dev->samplers.table.alloc;
c->vertex_sampler_max = dev->samplers.table.alloc + 1;
c->fragment_sampler_array = c->vertex_sampler_array; c->isp_oclqry_base = dev->occlusion_queries.bo->va->addr;
c->fragment_sampler_count = c->vertex_sampler_count;
c->fragment_sampler_max = c->vertex_sampler_max;
c->visibility_result_buffer = dev->occlusion_queries.bo->va->addr;
if (cs->cr.process_empty_tiles) if (cs->cr.process_empty_tiles)
c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES; c->flags |= DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES;
if (cs->scratch.vs.main || cs->scratch.vs.preamble) { if (cs->scratch.vs.main || cs->scratch.vs.preamble) {
c->flags |= ASAHI_RENDER_VERTEX_SPILLS; c->flags |= DRM_ASAHI_RENDER_VERTEX_SCRATCH;
c->vertex_helper_arg = dev->scratch.vs.buf->va->addr; c->vertex_helper.data = dev->scratch.vs.buf->va->addr;
c->vertex_helper_cfg = cs->scratch.vs.preamble ? (1 << 16) : 0; c->vertex_helper.cfg = cs->scratch.vs.preamble ? (1 << 16) : 0;
c->vertex_helper_program = agx_helper_program(&dev->bg_eot); c->vertex_helper.binary = agx_helper_program(&dev->bg_eot);
} }
if (cs->scratch.fs.main || cs->scratch.fs.preamble) { if (cs->scratch.fs.main || cs->scratch.fs.preamble) {
c->fragment_helper_arg = dev->scratch.fs.buf->va->addr; c->fragment_helper.data = dev->scratch.fs.buf->va->addr;
c->fragment_helper_cfg = cs->scratch.fs.preamble ? (1 << 16) : 0; c->fragment_helper.cfg = cs->scratch.fs.preamble ? (1 << 16) : 0;
c->fragment_helper_program = agx_helper_program(&dev->bg_eot); c->fragment_helper.binary = agx_helper_program(&dev->bg_eot);
} }
if (cs->timestamp.end.handle) { if (cs->timestamp.end.handle) {
assert(agx_supports_timestamps(&dev->dev)); c->ts_frag.end.handle = cs->timestamp.end.handle;
c->ts_frag.end.offset = cs->timestamp.end.offset_B;
c->extensions = (uint64_t)(uintptr_t)timestamps;
*timestamps = (struct drm_asahi_cmd_render_user_timestamps){
.type = ASAHI_RENDER_EXT_TIMESTAMPS,
.frg_end_handle = cs->timestamp.end.handle,
.frg_end_offset = cs->timestamp.end.offset_B,
};
} }
} }
@ -314,11 +238,6 @@ union drm_asahi_cmd {
struct drm_asahi_cmd_render render; struct drm_asahi_cmd_render render;
}; };
union drm_asahi_user_timestamps {
struct drm_asahi_cmd_compute_user_timestamps compute;
struct drm_asahi_cmd_render_user_timestamps render;
};
/* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes /* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes
* on the CTS once lossless compression is enabled. This needs to be * on the CTS once lossless compression is enabled. This needs to be
* investigated before we can reenable this mechanism. We are likely missing a * investigated before we can reenable this mechanism. We are likely missing a
@ -333,11 +252,7 @@ max_commands_per_submit(struct hk_device *dev)
static VkResult static VkResult
queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit) queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit)
{ {
/* Currently we don't use the result buffer or implicit sync */ struct agx_submit_virt virt = {0};
struct agx_submit_virt virt = {
.vbo_res_id = 0,
.extres_count = 0,
};
if (dev->dev.is_virtio) { if (dev->dev.is_virtio) {
u_rwlock_rdlock(&dev->external_bos.lock); u_rwlock_rdlock(&dev->external_bos.lock);
@ -367,14 +282,19 @@ queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit)
* bounds. * bounds.
*/ */
static VkResult static VkResult
queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit) queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit,
unsigned command_count)
{ {
struct drm_asahi_command *cmds = (void *)(uintptr_t)submit->commands; uint8_t *cmdbuf = (uint8_t *)(uintptr_t)submit->cmdbuf;
unsigned commands_remaining = submit->command_count; uint32_t offs = 0;
unsigned submitted[DRM_ASAHI_SUBQUEUE_COUNT] = {0}; unsigned submitted_vdm = 0, submitted_cdm = 0;
unsigned commands_remaining = command_count;
uint64_t out_syncs =
submit->syncs + sizeof(struct drm_asahi_sync) * submit->in_sync_count;
while (commands_remaining) { while (commands_remaining) {
bool first = commands_remaining == submit->command_count; bool first = commands_remaining == command_count;
bool last = commands_remaining <= max_commands_per_submit(dev); bool last = commands_remaining <= max_commands_per_submit(dev);
unsigned count = MIN2(commands_remaining, max_commands_per_submit(dev)); unsigned count = MIN2(commands_remaining, max_commands_per_submit(dev));
@ -383,13 +303,27 @@ queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit)
assert(!last || commands_remaining == 0); assert(!last || commands_remaining == 0);
assert(count > 0); assert(count > 0);
unsigned base_offs = offs;
unsigned cdm_count = 0, vdm_count = 0;
/* We need to fix up the barriers since barriers are ioctl-relative */ /* We need to fix up the barriers since barriers are ioctl-relative */
for (unsigned i = 0; i < count; ++i) { for (unsigned i = 0; i < count; ++i) {
for (unsigned q = 0; q < DRM_ASAHI_SUBQUEUE_COUNT; ++q) { struct drm_asahi_cmd_header *cmd = (void *)(cmdbuf + offs);
if (cmds[i].barriers[q] != DRM_ASAHI_BARRIER_NONE) { offs += sizeof(*cmd) + cmd->size;
assert(cmds[i].barriers[q] >= submitted[q]);
cmds[i].barriers[q] -= submitted[q]; if (cmd->cmd_type == DRM_ASAHI_CMD_RENDER)
} vdm_count++;
else if (cmd->cmd_type == DRM_ASAHI_CMD_COMPUTE)
cdm_count++;
if (cmd->vdm_barrier != DRM_ASAHI_BARRIER_NONE) {
assert(cmd->vdm_barrier >= submitted_vdm);
cmd->vdm_barrier -= submitted_vdm;
}
if (cmd->cdm_barrier != DRM_ASAHI_BARRIER_NONE) {
assert(cmd->cdm_barrier >= submitted_cdm);
cmd->cdm_barrier -= submitted_cdm;
} }
} }
@ -399,38 +333,35 @@ queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit)
* TODO: there might be a more performant way to do this. * TODO: there might be a more performant way to do this.
*/ */
if (last && !first) { if (last && !first) {
for (unsigned q = 0; q < DRM_ASAHI_SUBQUEUE_COUNT; ++q) { struct drm_asahi_cmd_header *cmd = (void *)(cmdbuf + base_offs);
if (cmds[0].barriers[q] == DRM_ASAHI_BARRIER_NONE)
cmds[0].barriers[q] = 0; if (cmd->vdm_barrier == DRM_ASAHI_BARRIER_NONE)
} cmd->vdm_barrier = 0;
if (cmd->cdm_barrier == DRM_ASAHI_BARRIER_NONE)
cmd->cdm_barrier = 0;
} }
bool has_in_syncs = first;
bool has_out_syncs = last;
struct drm_asahi_submit submit_ioctl = { struct drm_asahi_submit submit_ioctl = {
.flags = submit->flags, .flags = submit->flags,
.queue_id = submit->queue_id, .queue_id = submit->queue_id,
.result_handle = submit->result_handle, .cmdbuf = submit->cmdbuf + base_offs,
.commands = (uint64_t)(uintptr_t)(cmds), .cmdbuf_size = offs - base_offs,
.command_count = count,
.in_syncs = first ? submit->in_syncs : 0, .syncs = has_in_syncs ? submit->syncs : out_syncs,
.in_sync_count = first ? submit->in_sync_count : 0, .in_sync_count = has_in_syncs ? submit->in_sync_count : 0,
.out_syncs = last ? submit->out_syncs : 0, .out_sync_count = has_out_syncs ? submit->out_sync_count : 0,
.out_sync_count = last ? submit->out_sync_count : 0,
}; };
VkResult result = queue_submit_single(dev, &submit_ioctl); VkResult result = queue_submit_single(dev, &submit_ioctl);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
for (unsigned i = 0; i < count; ++i) { submitted_cdm += cdm_count;
if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) submitted_vdm += vdm_count;
submitted[DRM_ASAHI_SUBQUEUE_COMPUTE]++;
else if (cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER)
submitted[DRM_ASAHI_SUBQUEUE_RENDER]++;
else
unreachable("unknown subqueue");
}
cmds += count;
} }
return VK_SUCCESS; return VK_SUCCESS;
@ -449,18 +380,24 @@ struct hk_bind_builder {
VkDeviceSize size; VkDeviceSize size;
VkDeviceSize memoryOffset; VkDeviceSize memoryOffset;
VkResult result; VkResult result;
/* Array of drm_asahi_gem_bind_op's */
struct util_dynarray binds;
}; };
static inline struct hk_bind_builder static inline struct hk_bind_builder
hk_bind_builder(struct hk_device *dev, struct vk_object_base *obj_base, hk_bind_builder(struct hk_device *dev, struct vk_object_base *obj_base,
struct agx_va *va, struct hk_image *image) struct agx_va *va, struct hk_image *image)
{ {
return (struct hk_bind_builder){ struct hk_bind_builder b = {
.dev = dev, .dev = dev,
.obj_base = obj_base, .obj_base = obj_base,
.va = va, .va = va,
.image = image, .image = image,
}; };
util_dynarray_init(&b.binds, NULL);
return b;
} }
static VkResult static VkResult
@ -523,13 +460,43 @@ hk_flush_bind(struct hk_bind_builder *b)
/* When the app wants to unbind, replace the bound pages with scratch pages /* When the app wants to unbind, replace the bound pages with scratch pages
* so we don't leave a gap. * so we don't leave a gap.
*/ */
struct drm_asahi_gem_bind_op op;
if (!b->mem) { if (!b->mem) {
return hk_bind_scratch(b->dev, b->va, b->resourceOffset, b->size); op = (struct drm_asahi_gem_bind_op){
.handle = b->dev->sparse.write->uapi_handle,
.flags = DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE |
DRM_ASAHI_BIND_SINGLE_PAGE,
.addr = b->va->addr + b->resourceOffset,
.range = b->size,
};
} else { } else {
return b->dev->dev.ops.bo_bind(&b->dev->dev, b->mem->bo, va_addr, b->size, op = (struct drm_asahi_gem_bind_op){
b->memoryOffset, .handle = b->mem->bo->uapi_handle,
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false); .flags = DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE,
.addr = va_addr,
.offset = b->memoryOffset,
.range = b->size,
};
} }
util_dynarray_append(&b->binds, struct drm_asahi_gem_bind_op, op);
return VK_SUCCESS;
}
static int
hk_bind_builder_finish(struct hk_bind_builder *b)
{
hk_flush_bind(b);
/* Submit everything to the kernel at once */
if (b->binds.size > 0) {
b->dev->dev.ops.bo_bind(
&b->dev->dev, b->binds.data,
util_dynarray_num_elements(&b->binds, struct drm_asahi_gem_bind_op));
}
util_dynarray_fini(&b->binds);
return b->result;
} }
static void static void
@ -577,7 +544,7 @@ hk_sparse_buffer_bind_memory(struct hk_device *device,
bind->pBinds[i].size, bind->pBinds[i].memoryOffset); bind->pBinds[i].size, bind->pBinds[i].memoryOffset);
} }
return hk_flush_bind(&b); return hk_bind_builder_finish(&b);
} }
static VkResult static VkResult
@ -623,7 +590,7 @@ hk_sparse_image_opaque_bind_memory(
} }
} }
return hk_flush_bind(&b); return hk_bind_builder_finish(&b);
} }
static void static void
@ -714,7 +681,7 @@ hk_sparse_image_bind_memory(struct hk_device *device,
} }
} }
return hk_flush_bind(&b); return hk_bind_builder_finish(&b);
} }
static VkResult static VkResult
@ -778,11 +745,9 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
return queue_submit_empty(dev, queue, submit); return queue_submit_empty(dev, queue, submit);
unsigned wait_count = 0; unsigned wait_count = 0;
struct drm_asahi_sync *waits = struct drm_asahi_sync *syncs =
alloca(submit->wait_count * sizeof(struct drm_asahi_sync)); alloca((submit->wait_count + submit->signal_count + 1) *
sizeof(struct drm_asahi_sync));
struct drm_asahi_sync *signals =
alloca((submit->signal_count + 1) * sizeof(struct drm_asahi_sync));
for (unsigned i = 0; i < submit->wait_count; ++i) { for (unsigned i = 0; i < submit->wait_count; ++i) {
/* The kernel rejects the submission if we try to wait on the same /* The kernel rejects the submission if we try to wait on the same
@ -808,36 +773,31 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
continue; continue;
} }
asahi_fill_sync(&waits[wait_count++], submit->waits[i].sync, asahi_fill_sync(&syncs[wait_count++], submit->waits[i].sync,
submit->waits[i].wait_value); submit->waits[i].wait_value);
} }
for (unsigned i = 0; i < submit->signal_count; ++i) { for (unsigned i = 0; i < submit->signal_count; ++i) {
asahi_fill_sync(&signals[i], submit->signals[i].sync, asahi_fill_sync(&syncs[wait_count + i], submit->signals[i].sync,
submit->signals[i].signal_value); submit->signals[i].signal_value);
} }
/* Signal progress on the queue itself */ /* Signal progress on the queue itself */
signals[submit->signal_count] = (struct drm_asahi_sync){ syncs[wait_count + submit->signal_count] = (struct drm_asahi_sync){
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ, .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
.handle = queue->drm.syncobj, .handle = queue->drm.syncobj,
.timeline_value = ++queue->drm.timeline_value, .timeline_value = ++queue->drm.timeline_value,
}; };
/* Now setup the command structs */ /* Now setup the command structs */
struct drm_asahi_command *cmds = malloc(sizeof(*cmds) * command_count); struct util_dynarray payload;
union drm_asahi_cmd *cmds_inner = util_dynarray_init(&payload, NULL);
malloc(sizeof(*cmds_inner) * command_count); union drm_asahi_cmd *cmds = malloc(sizeof(*cmds) * command_count);
union drm_asahi_user_timestamps *ts_inner = if (cmds == NULL) {
malloc(sizeof(*ts_inner) * command_count);
if (cmds == NULL || cmds_inner == NULL || ts_inner == NULL) {
free(ts_inner);
free(cmds_inner);
free(cmds); free(cmds);
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY); return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
} }
unsigned cmd_it = 0;
unsigned nr_vdm = 0, nr_cdm = 0; unsigned nr_vdm = 0, nr_cdm = 0;
for (unsigned i = 0; i < submit->command_buffer_count; ++i) { for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
@ -845,15 +805,11 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
(struct hk_cmd_buffer *)submit->command_buffers[i]; (struct hk_cmd_buffer *)submit->command_buffers[i];
list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) { list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) {
assert(cmd_it < command_count); /* Barrier on previous command */
struct drm_asahi_cmd_header header =
agx_cmd_header(cs->type == HK_CS_CDM, nr_vdm, nr_cdm);
struct drm_asahi_command cmd = { util_dynarray_append(&payload, struct drm_asahi_cmd_header, header);
.cmd_buffer = (uint64_t)(uintptr_t)&cmds_inner[cmd_it],
.result_offset = 0 /* TODO */,
.result_size = 0 /* TODO */,
/* Barrier on previous command */
.barriers = {nr_vdm, nr_cdm},
};
if (cs->type == HK_CS_CDM) { if (cs->type == HK_CS_CDM) {
perf_debug( perf_debug(
@ -864,17 +820,10 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
assert(cs->stats.cmds > 0 || cs->stats.flushes > 0 || assert(cs->stats.cmds > 0 || cs->stats.flushes > 0 ||
cs->timestamp.end.handle); cs->timestamp.end.handle);
cmd.cmd_type = DRM_ASAHI_CMD_COMPUTE; struct drm_asahi_cmd_compute cmd;
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute); asahi_fill_cdm_command(dev, cs, &cmd);
util_dynarray_append(&payload, struct drm_asahi_cmd_compute, cmd);
nr_cdm++; nr_cdm++;
asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute,
&ts_inner[cmd_it].compute);
/* Work around for shipping 6.11.8 kernels, remove when we bump uapi
*/
if (!agx_supports_timestamps(&dev->dev))
cmd.cmd_buffer_size -= 8;
} else { } else {
assert(cs->type == HK_CS_VDM); assert(cs->type == HK_CS_VDM);
perf_debug(cmdbuf, "%u: Submitting VDM with %u API draws, %u draws", perf_debug(cmdbuf, "%u: Submitting VDM with %u API draws, %u draws",
@ -882,31 +831,17 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles || assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles ||
cs->timestamp.end.handle); cs->timestamp.end.handle);
cmd.cmd_type = DRM_ASAHI_CMD_RENDER; struct drm_asahi_cmd_render cmd;
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render); asahi_fill_vdm_command(dev, cs, &cmd);
util_dynarray_append(&payload, struct drm_asahi_cmd_render, cmd);
nr_vdm++; nr_vdm++;
asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render,
&ts_inner[cmd_it].render);
} }
cmds[cmd_it++] = cmd;
} }
} }
assert(cmd_it == command_count);
if (dev->dev.debug & AGX_DBG_TRACE) { if (dev->dev.debug & AGX_DBG_TRACE) {
for (unsigned i = 0; i < command_count; ++i) { agxdecode_drm_cmdbuf(dev->dev.agxdecode, &dev->dev.params, &payload,
if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) { true);
agxdecode_drm_cmd_compute(dev->dev.agxdecode, &dev->dev.params,
&cmds_inner[i].compute, true);
} else {
assert(cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER);
agxdecode_drm_cmd_render(dev->dev.agxdecode, &dev->dev.params,
&cmds_inner[i].render, true);
}
}
agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr, agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr,
dev->images.alloc); dev->images.alloc);
@ -917,25 +852,20 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
struct drm_asahi_submit submit_ioctl = { struct drm_asahi_submit submit_ioctl = {
.flags = 0, .flags = 0,
.queue_id = queue->drm.id, .queue_id = queue->drm.id,
.result_handle = 0 /* TODO */,
.in_sync_count = wait_count, .in_sync_count = wait_count,
.out_sync_count = submit->signal_count + 1, .out_sync_count = submit->signal_count + 1,
.command_count = command_count, .cmdbuf_size = payload.size,
.in_syncs = (uint64_t)(uintptr_t)(waits), .syncs = (uint64_t)(uintptr_t)(syncs),
.out_syncs = (uint64_t)(uintptr_t)(signals), .cmdbuf = (uint64_t)(uintptr_t)(payload.data),
.commands = (uint64_t)(uintptr_t)(cmds),
}; };
VkResult result; VkResult result;
if (command_count <= max_commands_per_submit(dev)) if (command_count <= max_commands_per_submit(dev))
result = queue_submit_single(dev, &submit_ioctl); result = queue_submit_single(dev, &submit_ioctl);
else else
result = queue_submit_looped(dev, &submit_ioctl); result = queue_submit_looped(dev, &submit_ioctl, command_count);
free(ts_inner);
free(cmds_inner);
free(cmds);
util_dynarray_fini(&payload);
return result; return result;
} }
@ -970,18 +900,25 @@ hk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
return result; return result;
} }
static uint32_t static enum drm_asahi_priority
translate_priority(VkQueueGlobalPriorityKHR prio) translate_priority(VkQueueGlobalPriorityKHR prio)
{ {
/* clang-format off */
switch (prio) { switch (prio) {
case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR: return 0; case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR: return 1; return DRM_ASAHI_PRIORITY_REALTIME;
case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR: return 2;
case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR: return 3; case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
default: unreachable("Invalid VkQueueGlobalPriorityKHR"); return DRM_ASAHI_PRIORITY_HIGH;
case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
return DRM_ASAHI_PRIORITY_MEDIUM;
case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
return DRM_ASAHI_PRIORITY_LOW;
default:
unreachable("Invalid VkQueueGlobalPriorityKHR");
} }
/* clang-format on */
} }
VkResult VkResult
@ -1001,17 +938,21 @@ hk_queue_init(struct hk_device *dev, struct hk_queue *queue,
priority_info ? priority_info->globalPriority priority_info ? priority_info->globalPriority
: VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR; : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
/* TODO: Lift when kernel side is ready and we can handle failures in
* create_command_queue.
*/
enum drm_asahi_priority drm_priority = translate_priority(priority);
if (drm_priority >= DRM_ASAHI_PRIORITY_HIGH) {
return VK_ERROR_NOT_PERMITTED_EXT;
}
result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family); result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
if (result != VK_SUCCESS) if (result != VK_SUCCESS)
return result; return result;
queue->vk.driver_submit = hk_queue_submit; queue->vk.driver_submit = hk_queue_submit;
queue->drm.id = agx_create_command_queue(&dev->dev, queue->drm.id = agx_create_command_queue(&dev->dev, drm_priority);
DRM_ASAHI_QUEUE_CAP_RENDER |
DRM_ASAHI_QUEUE_CAP_BLIT |
DRM_ASAHI_QUEUE_CAP_COMPUTE,
translate_priority(priority));
if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) { if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) {
mesa_loge("drmSyncobjCreate() failed %d\n", errno); mesa_loge("drmSyncobjCreate() failed %d\n", errno);

View file

@ -59,7 +59,7 @@
#include "util/u_math.h" #include "util/u_math.h"
#include <xf86drm.h> #include <xf86drm.h>
#include "asahi/lib/unstable_asahi_drm.h" #include "drm-uapi/asahi_drm.h"
#include "drm-uapi/amdgpu_drm.h" #include "drm-uapi/amdgpu_drm.h"
#include "drm-uapi/i915_drm.h" #include "drm-uapi/i915_drm.h"
#include "drm-uapi/v3d_drm.h" #include "drm-uapi/v3d_drm.h"

View file

@ -10,6 +10,7 @@
#include "util/bitset.h" #include "util/bitset.h"
#include "util/u_dynarray.h" #include "util/u_dynarray.h"
#include "util/u_range.h" #include "util/u_range.h"
#include "agx_device.h"
#include "agx_state.h" #include "agx_state.h"
#include "vdrm.h" #include "vdrm.h"
@ -159,137 +160,14 @@ agx_batch_init(struct agx_context *ctx,
assert(!ret && batch->syncobj); assert(!ret && batch->syncobj);
} }
batch->result_off =
(2 * sizeof(union agx_batch_result)) * agx_batch_idx(batch);
batch->result =
(void *)(((uint8_t *)agx_bo_map(ctx->result_buf)) + batch->result_off);
memset(batch->result, 0, sizeof(union agx_batch_result) * 2);
agx_batch_mark_active(batch); agx_batch_mark_active(batch);
} }
const char *status_str[] = { static struct agx_timestamps *
[DRM_ASAHI_STATUS_PENDING] = "(pending)", agx_batch_timestamps(struct agx_batch *batch)
[DRM_ASAHI_STATUS_COMPLETE] = "Complete",
[DRM_ASAHI_STATUS_UNKNOWN_ERROR] = "UNKNOWN ERROR",
[DRM_ASAHI_STATUS_TIMEOUT] = "TIMEOUT",
[DRM_ASAHI_STATUS_FAULT] = "FAULT",
[DRM_ASAHI_STATUS_KILLED] = "KILLED",
[DRM_ASAHI_STATUS_NO_DEVICE] = "NO DEVICE",
};
const char *fault_type_str[] = {
[DRM_ASAHI_FAULT_NONE] = "(none)",
[DRM_ASAHI_FAULT_UNKNOWN] = "Unknown",
[DRM_ASAHI_FAULT_UNMAPPED] = "Unmapped",
[DRM_ASAHI_FAULT_AF_FAULT] = "AF Fault",
[DRM_ASAHI_FAULT_WRITE_ONLY] = "Write Only",
[DRM_ASAHI_FAULT_READ_ONLY] = "Read Only",
[DRM_ASAHI_FAULT_NO_ACCESS] = "No Access",
};
const char *low_unit_str[16] = {
"DCMP", "UL1C", "CMP", "GSL1", "IAP", "VCE", "TE", "RAS",
"VDM", "PPP", "IPF", "IPF_CPF", "VF", "VF_CPF", "ZLS", "UNK",
};
const char *mid_unit_str[16] = {
"UNK", "dPM", "dCDM_KS0", "dCDM_KS1", "dCDM_KS2", "dIPP",
"dIPP_CS", "dVDM_CSD", "dVDM_SSD", "dVDM_ILF", "dVDM_ILD", "dRDE0",
"dRDE1", "FC", "GSL2", "UNK",
};
const char *high_unit_str[16] = {
"gPM_SP", "gVDM_CSD_SP", "gVDM_SSD_SP", "gVDM_ILF_SP",
"gVDM_TFP_SP", "gVDM_MMB_SP", "gCDM_CS_KS0_SP", "gCDM_CS_KS1_SP",
"gCDM_CS_KS2_SP", "gCDM_KS0_SP", "gCDM_KS1_SP", "gCDM_KS2_SP",
"gIPP_SP", "gIPP_CS_SP", "gRDE0_SP", "gRDE1_SP",
};
static void
agx_print_result(struct agx_device *dev, struct agx_context *ctx,
struct drm_asahi_result_info *info, unsigned batch_idx,
bool is_compute)
{ {
if (unlikely(info->status != DRM_ASAHI_STATUS_COMPLETE)) { struct agx_timestamps *ts = agx_bo_map(batch->ctx->timestamps);
ctx->any_faults = true; return ts + agx_batch_idx(batch);
}
if (likely(info->status == DRM_ASAHI_STATUS_COMPLETE &&
!((dev)->debug & AGX_DBG_STATS)))
return;
if (is_compute) {
struct drm_asahi_result_compute *r = (void *)info;
float time = (r->ts_end - r->ts_start) / dev->params.timer_frequency_hz;
mesa_logw(
"[Batch %d] Compute %s: %.06f\n", batch_idx,
info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?",
time);
} else {
struct drm_asahi_result_render *r = (void *)info;
float time_vtx = (r->vertex_ts_end - r->vertex_ts_start) /
(float)dev->params.timer_frequency_hz;
float time_frag = (r->fragment_ts_end - r->fragment_ts_start) /
(float)dev->params.timer_frequency_hz;
mesa_logw(
"[Batch %d] Render %s: TVB %9ld/%9ld bytes (%d ovf) %c%c%c | vtx %.06f frag %.06f\n",
batch_idx,
info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?",
(long)r->tvb_usage_bytes, (long)r->tvb_size_bytes,
(int)r->num_tvb_overflows,
r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF ? 'G' : ' ',
r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN ? 'M' : ' ',
r->flags & DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED ? 'O' : ' ',
time_vtx, time_frag);
}
if (info->fault_type != DRM_ASAHI_FAULT_NONE) {
const char *unit_name;
int unit_index;
switch (info->unit) {
case 0x00 ... 0x9f:
unit_name = low_unit_str[info->unit & 0xf];
unit_index = info->unit >> 4;
break;
case 0xa0 ... 0xaf:
unit_name = mid_unit_str[info->unit & 0xf];
unit_index = 0;
break;
case 0xb0 ... 0xb7:
unit_name = "GL2CC_META";
unit_index = info->unit & 0x7;
break;
case 0xb8:
unit_name = "GL2CC_MB";
unit_index = 0;
break;
case 0xe0 ... 0xff:
unit_name = high_unit_str[info->unit & 0xf];
unit_index = (info->unit >> 4) & 1;
break;
default:
unit_name = "UNK";
unit_index = 0;
break;
}
mesa_logw(
"[Batch %d] Fault: %s : Addr 0x%llx %c Unit %02x (%s/%d) SB 0x%02x L%d Extra 0x%x\n",
batch_idx,
info->fault_type < ARRAY_SIZE(fault_type_str)
? fault_type_str[info->fault_type]
: "?",
(long long)info->address, info->is_read ? 'r' : 'W', info->unit,
unit_name, unit_index, info->sideband, info->level, info->extra);
agx_debug_fault(dev, info->address);
}
assert(info->status == DRM_ASAHI_STATUS_COMPLETE ||
info->status == DRM_ASAHI_STATUS_KILLED);
} }
static void static void
@ -302,17 +180,25 @@ agx_batch_print_stats(struct agx_device *dev, struct agx_batch *batch)
abort(); abort();
} }
if (!batch->result) if (likely(!(dev->debug & AGX_DBG_STATS)))
return; return;
struct agx_timestamps *ts = agx_batch_timestamps(batch);
if (batch->cdm.bo) { if (batch->cdm.bo) {
agx_print_result(dev, batch->ctx, &batch->result[0].compute.info, float time = (ts->comp_end - ts->comp_start) /
batch_idx, true); (float)dev->params.command_timestamp_frequency_hz;
mesa_logw("[Batch %d] Compute: %.06f\n", batch_idx, time);
} }
if (batch->vdm.bo) { if (batch->vdm.bo) {
agx_print_result(dev, batch->ctx, &batch->result[1].render.info, float time_vtx = (ts->vtx_end - ts->vtx_start) /
batch_idx, false); (float)dev->params.command_timestamp_frequency_hz;
float time_frag = (ts->frag_end - ts->frag_start) /
(float)dev->params.command_timestamp_frequency_hz;
mesa_logw("[Batch %d] vtx %.06f frag %.06f\n", batch_idx, time_vtx,
time_frag);
} }
} }
@ -328,15 +214,17 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
assert(ctx->batch != batch); assert(ctx->batch != batch);
uint64_t begin_ts = ~0, end_ts = 0; uint64_t begin_ts = ~0, end_ts = 0;
if (batch->result) { if (batch->timestamps.size) {
struct agx_timestamps *ts = agx_batch_timestamps(batch);
if (batch->cdm.bo) { if (batch->cdm.bo) {
begin_ts = MIN2(begin_ts, batch->result[0].compute.ts_start); begin_ts = MIN2(begin_ts, ts->comp_start);
end_ts = MAX2(end_ts, batch->result[0].compute.ts_end); end_ts = MAX2(end_ts, ts->comp_end);
} }
if (batch->vdm.bo) { if (batch->vdm.bo) {
begin_ts = MIN2(begin_ts, batch->result[1].render.vertex_ts_start); begin_ts = MIN2(begin_ts, ts->vtx_start);
end_ts = MAX2(end_ts, batch->result[1].render.fragment_ts_end); end_ts = MAX2(end_ts, ts->frag_end);
} }
} }
@ -733,6 +621,24 @@ agx_add_sync(struct drm_asahi_sync *syncs, unsigned *count, uint32_t handle)
}; };
} }
#define MAX_ATTACHMENTS 16
struct attachments {
struct drm_asahi_attachment list[MAX_ATTACHMENTS];
size_t count;
};
static void
asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc)
{
assert(att->count < MAX_ATTACHMENTS);
att->list[att->count++] = (struct drm_asahi_attachment){
.size = rsrc->layout.size_B,
.pointer = rsrc->bo->va->addr,
};
}
void void
agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch, agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
struct drm_asahi_cmd_compute *compute, struct drm_asahi_cmd_compute *compute,
@ -741,34 +647,19 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
struct agx_device *dev = agx_device(ctx->base.screen); struct agx_device *dev = agx_device(ctx->base.screen);
struct agx_screen *screen = agx_screen(ctx->base.screen); struct agx_screen *screen = agx_screen(ctx->base.screen);
bool feedback = dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_STATS);
#ifndef NDEBUG
/* Debug builds always get feedback (for fault checks) */
feedback = true;
#endif
/* Timer queries use the feedback timestamping */
feedback |= (batch->timestamps.size > 0);
if (!feedback)
batch->result = NULL;
/* We allocate the worst-case sync array size since this won't be excessive /* We allocate the worst-case sync array size since this won't be excessive
* for most workloads * for most workloads
*/ */
unsigned max_syncs = batch->bo_list.bit_count + 2; unsigned max_syncs = batch->bo_list.bit_count + 2;
unsigned in_sync_count = 0; unsigned in_sync_count = 0;
unsigned shared_bo_count = 0; unsigned shared_bo_count = 0;
struct drm_asahi_sync *in_syncs = struct drm_asahi_sync *syncs =
malloc(max_syncs * sizeof(struct drm_asahi_sync)); malloc((max_syncs * sizeof(struct drm_asahi_sync)) + 2);
struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *)); struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *));
uint64_t wait_seqid = p_atomic_read(&screen->flush_wait_seqid); uint64_t wait_seqid = p_atomic_read(&screen->flush_wait_seqid);
struct agx_submit_virt virt = { struct agx_submit_virt virt = {0};
.vbo_res_id = ctx->result_buf->vbo_res_id,
};
/* Elide syncing against our own queue */ /* Elide syncing against our own queue */
if (wait_seqid && wait_seqid == ctx->flush_my_seqid) { if (wait_seqid && wait_seqid == ctx->flush_my_seqid) {
@ -784,37 +675,6 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
batch_debug(batch, "Sync point is %" PRIu64, seqid); batch_debug(batch, "Sync point is %" PRIu64, seqid);
/* Subtle concurrency note: Since we assign seqids atomically and do
* not lock submission across contexts, it is possible for two threads
* to submit timeline syncobj updates out of order. As far as I can
* tell, this case is handled in the kernel conservatively: it triggers
* a fence context bump and effectively "splits" the timeline at the
* larger point, causing future lookups for earlier points to return a
* later point, waiting more. The signaling code still makes sure all
* prior fences have to be signaled before considering a given point
* signaled, regardless of order. That's good enough for us.
*
* (Note: this case breaks drm_syncobj_query_ioctl and for this reason
* triggers a DRM_DEBUG message on submission, but we don't use that
* so we don't care.)
*
* This case can be tested by setting seqid = 1 unconditionally here,
* causing every single syncobj update to reuse the same timeline point.
* Everything still works (but over-synchronizes because this effectively
* serializes all submissions once any context flushes once).
*/
struct drm_asahi_sync out_syncs[2] = {
{
.sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
.handle = batch->syncobj,
},
{
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
.handle = screen->flush_syncobj,
.timeline_value = seqid,
},
};
/* This lock protects against a subtle race scenario: /* This lock protects against a subtle race scenario:
* - Context 1 submits and registers itself as writer for a BO * - Context 1 submits and registers itself as writer for a BO
* - Context 2 runs the below loop, and finds the writer syncobj * - Context 2 runs the below loop, and finds the writer syncobj
@ -861,7 +721,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
close(in_sync_fd); close(in_sync_fd);
/* Add it to our wait list */ /* Add it to our wait list */
agx_add_sync(in_syncs, &in_sync_count, sync_handle); agx_add_sync(syncs, &in_sync_count, sync_handle);
/* And keep track of the BO for cloning the out_sync */ /* And keep track of the BO for cloning the out_sync */
shared_bos[shared_bo_count++] = bo; shared_bos[shared_bo_count++] = bo;
@ -879,8 +739,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
batch, "Waits on inter-context BO @ 0x%" PRIx64 " from queue %u", batch, "Waits on inter-context BO @ 0x%" PRIx64 " from queue %u",
bo->va->addr, queue_id); bo->va->addr, queue_id);
agx_add_sync(in_syncs, &in_sync_count, agx_add_sync(syncs, &in_sync_count, agx_bo_writer_syncobj(writer));
agx_bo_writer_syncobj(writer));
shared_bos[shared_bo_count++] = NULL; shared_bos[shared_bo_count++] = NULL;
} }
} }
@ -894,7 +753,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
for (unsigned i = 0; i < virt.extres_count; i++) { for (unsigned i = 0; i < virt.extres_count; i++) {
while (!*p) while (!*p)
p++; // Skip inter-context slots which are not recorded here p++; // Skip inter-context slots which are not recorded here
virt.extres[i].res_id = (*p)->vbo_res_id; virt.extres[i].res_id = (*p)->uapi_handle;
virt.extres[i].flags = ASAHI_EXTRES_READ | ASAHI_EXTRES_WRITE; virt.extres[i].flags = ASAHI_EXTRES_READ | ASAHI_EXTRES_WRITE;
p++; p++;
} }
@ -910,63 +769,109 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
} }
/* Add an explicit fence from gallium, if any */ /* Add an explicit fence from gallium, if any */
agx_add_sync(in_syncs, &in_sync_count, agx_get_in_sync(ctx)); agx_add_sync(syncs, &in_sync_count, agx_get_in_sync(ctx));
/* Add an implicit cross-context flush sync point, if any */ /* Add an implicit cross-context flush sync point, if any */
if (wait_seqid) { if (wait_seqid) {
batch_debug(batch, "Waits on inter-context sync point %" PRIu64, batch_debug(batch, "Waits on inter-context sync point %" PRIu64,
wait_seqid); wait_seqid);
in_syncs[in_sync_count++] = (struct drm_asahi_sync){ syncs[in_sync_count++] = (struct drm_asahi_sync){
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ, .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
.handle = screen->flush_syncobj, .handle = screen->flush_syncobj,
.timeline_value = wait_seqid, .timeline_value = wait_seqid,
}; };
} }
/* Subtle concurrency note: Since we assign seqids atomically and do
* not lock submission across contexts, it is possible for two threads
* to submit timeline syncobj updates out of order. As far as I can
* tell, this case is handled in the kernel conservatively: it triggers
* a fence context bump and effectively "splits" the timeline at the
* larger point, causing future lookups for earlier points to return a
* later point, waiting more. The signaling code still makes sure all
* prior fences have to be signaled before considering a given point
* signaled, regardless of order. That's good enough for us.
*
* (Note: this case breaks drm_syncobj_query_ioctl and for this reason
* triggers a DRM_DEBUG message on submission, but we don't use that
* so we don't care.)
*
* This case can be tested by setting seqid = 1 unconditionally here,
* causing every single syncobj update to reuse the same timeline point.
* Everything still works (but over-synchronizes because this effectively
* serializes all submissions once any context flushes once).
*/
struct drm_asahi_sync *out_syncs = syncs + in_sync_count;
out_syncs[0] = (struct drm_asahi_sync){
.sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
.handle = batch->syncobj,
};
out_syncs[1] = (struct drm_asahi_sync){
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
.handle = screen->flush_syncobj,
.timeline_value = seqid,
};
/* Submit! */ /* Submit! */
struct drm_asahi_command commands[2]; struct util_dynarray cmdbuf;
unsigned command_count = 0; util_dynarray_init(&cmdbuf, NULL);
if (compute) { if (compute) {
commands[command_count++] = (struct drm_asahi_command){ /* Barrier on previous submission */
.cmd_type = DRM_ASAHI_CMD_COMPUTE, struct drm_asahi_cmd_header header = agx_cmd_header(true, 0, 0);
.flags = 0,
.cmd_buffer = (uint64_t)(uintptr_t)compute,
/* Work around for shipping 6.11.8 kernels, remove when we bump uapi util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header);
*/ util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_compute, *compute);
.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute) - 8,
.result_offset = feedback ? batch->result_off : 0,
.result_size = feedback ? sizeof(union agx_batch_result) : 0,
/* Barrier on previous submission */
.barriers = {0, 0},
};
} }
if (render) { if (render) {
commands[command_count++] = (struct drm_asahi_command){ struct attachments att = {.count = 0};
.cmd_type = DRM_ASAHI_CMD_RENDER, struct pipe_framebuffer_state *fb = &batch->key;
.flags = 0,
.cmd_buffer = (uint64_t)(uintptr_t)render, for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render), if (fb->cbufs[i])
.result_offset = asahi_add_attachment(&att, agx_resource(fb->cbufs[i]->texture));
feedback ? (batch->result_off + sizeof(union agx_batch_result)) : 0, }
.result_size = feedback ? sizeof(union agx_batch_result) : 0,
/* Barrier on previous submission */ if (fb->zsbuf) {
.barriers = {compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0}, struct agx_resource *rsrc = agx_resource(fb->zsbuf->texture);
}; asahi_add_attachment(&att, rsrc);
if (rsrc->separate_stencil)
asahi_add_attachment(&att, rsrc->separate_stencil);
}
if (att.count) {
struct drm_asahi_cmd_header header = {
.cmd_type = DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS,
.size = sizeof(att.list[0]) * att.count,
.cdm_barrier = DRM_ASAHI_BARRIER_NONE,
.vdm_barrier = DRM_ASAHI_BARRIER_NONE,
};
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header);
util_dynarray_append_array(&cmdbuf, struct drm_asahi_attachment,
att.list, att.count);
}
/* Barrier on previous submission */
struct drm_asahi_cmd_header header = agx_cmd_header(
false, compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0);
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header);
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_render, *render);
} }
struct drm_asahi_submit submit = { struct drm_asahi_submit submit = {
.flags = 0, .flags = 0,
.queue_id = ctx->queue_id, .queue_id = ctx->queue_id,
.result_handle = feedback ? ctx->result_buf->handle : 0,
.in_sync_count = in_sync_count, .in_sync_count = in_sync_count,
.out_sync_count = 2, .out_sync_count = 2,
.command_count = command_count, .syncs = (uint64_t)(uintptr_t)(syncs),
.in_syncs = (uint64_t)(uintptr_t)(in_syncs), .cmdbuf = (uint64_t)(uintptr_t)(cmdbuf.data),
.out_syncs = (uint64_t)(uintptr_t)(out_syncs), .cmdbuf_size = cmdbuf.size,
.commands = (uint64_t)(uintptr_t)(&commands[0]),
}; };
int ret = dev->ops.submit(dev, &submit, &virt); int ret = dev->ops.submit(dev, &submit, &virt);
@ -983,7 +888,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
fprintf( fprintf(
stderr, stderr,
"DRM_IOCTL_ASAHI_SUBMIT render failed: %m (%dx%d tile %dx%d layers %d samples %d)\n", "DRM_IOCTL_ASAHI_SUBMIT render failed: %m (%dx%d tile %dx%d layers %d samples %d)\n",
c->fb_width, c->fb_height, c->utile_width, c->utile_height, c->width_px, c->height_px, c->utile_width_px, c->utile_height_px,
c->layers, c->samples); c->layers, c->samples);
} }
@ -1009,7 +914,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
shared_bos[i]->va->addr); shared_bos[i]->va->addr);
/* Free the in_sync handle we just acquired */ /* Free the in_sync handle we just acquired */
ret = drmSyncobjDestroy(dev->fd, in_syncs[i].handle); ret = drmSyncobjDestroy(dev->fd, syncs[i].handle);
assert(ret >= 0); assert(ret >= 0);
/* And then import the out_sync sync file into it */ /* And then import the out_sync sync file into it */
ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd); ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd);
@ -1039,21 +944,12 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
batch_debug(batch, "Writes to BO @ 0x%" PRIx64, bo->va->addr); batch_debug(batch, "Writes to BO @ 0x%" PRIx64, bo->va->addr);
} }
free(in_syncs); free(syncs);
free(shared_bos); free(shared_bos);
if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) { if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) {
if (dev->debug & AGX_DBG_TRACE) { if (dev->debug & AGX_DBG_TRACE) {
if (compute) { agxdecode_drm_cmdbuf(dev->agxdecode, &dev->params, &cmdbuf, true);
agxdecode_drm_cmd_compute(dev->agxdecode, &dev->params, compute,
true);
}
if (render) {
agxdecode_drm_cmd_render(dev->agxdecode, &dev->params, render,
true);
}
agxdecode_next_frame(); agxdecode_next_frame();
} }
@ -1077,6 +973,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
} }
} }
util_dynarray_fini(&cmdbuf);
agx_batch_mark_submitted(batch); agx_batch_mark_submitted(batch);
if (virt.extres) if (virt.extres)
@ -1155,9 +1052,6 @@ agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch)
if (ctx->batch == batch) if (ctx->batch == batch)
ctx->batch = NULL; ctx->batch = NULL;
/* Elide printing stats */
batch->result = NULL;
agx_batch_cleanup(ctx, batch, true); agx_batch_cleanup(ctx, batch, true);
} }

View file

@ -11,7 +11,7 @@
#include "asahi/compiler/agx_compile.h" #include "asahi/compiler/agx_compile.h"
#include "asahi/layout/layout.h" #include "asahi/layout/layout.h"
#include "asahi/lib/decode.h" #include "asahi/lib/decode.h"
#include "asahi/lib/unstable_asahi_drm.h" #include "drm-uapi/asahi_drm.h"
#include "drm-uapi/drm_fourcc.h" #include "drm-uapi/drm_fourcc.h"
#include "frontend/winsys_handle.h" #include "frontend/winsys_handle.h"
#include "gallium/auxiliary/renderonly/renderonly.h" #include "gallium/auxiliary/renderonly/renderonly.h"
@ -1199,26 +1199,6 @@ agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres)
} }
} }
#define MAX_ATTACHMENTS 16
struct attachments {
struct drm_asahi_attachment list[MAX_ATTACHMENTS];
size_t count;
};
static void
asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc,
struct pipe_surface *surf)
{
assert(att->count < MAX_ATTACHMENTS);
int idx = att->count++;
att->list[idx].size = rsrc->layout.size_B;
att->list[idx].pointer = rsrc->bo->va->addr;
att->list[idx].order = 1; // TODO: What does this do?
att->list[idx].flags = 0;
}
static bool static bool
is_aligned(unsigned x, unsigned pot_alignment) is_aligned(unsigned x, unsigned pot_alignment)
{ {
@ -1226,12 +1206,20 @@ is_aligned(unsigned x, unsigned pot_alignment)
return (x & (pot_alignment - 1)) == 0; return (x & (pot_alignment - 1)) == 0;
} }
static unsigned
build_timestamp_offset(struct agx_batch *batch, unsigned offset)
{
return (agx_batch_idx(batch) * sizeof(struct agx_timestamps)) + offset;
}
#define timestamp_offset(batch, offs) \
build_timestamp_offset(batch, offsetof(struct agx_timestamps, offs))
static void static void
agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c, agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
struct attachments *att, struct agx_pool *pool, struct agx_pool *pool, struct agx_batch *batch,
struct agx_batch *batch, struct pipe_framebuffer_state *framebuffer, struct pipe_framebuffer_state *framebuffer, uint64_t encoder_ptr,
uint64_t encoder_ptr, uint64_t encoder_id, uint64_t cmd_ta_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr,
uint64_t cmd_3d_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr,
uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear, uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear,
struct asahi_bg_eot pipeline_load, struct asahi_bg_eot pipeline_load,
struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures, struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures,
@ -1240,28 +1228,27 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
{ {
memset(c, 0, sizeof(*c)); memset(c, 0, sizeof(*c));
c->encoder_ptr = encoder_ptr; c->vdm_ctrl_stream_base = encoder_ptr;
c->encoder_id = encoder_id;
c->cmd_3d_id = cmd_3d_id;
c->cmd_ta_id = cmd_ta_id;
c->fragment_usc_base = dev->shader_base;
c->vertex_usc_base = dev->shader_base;
/* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is /* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is
* advertised, we don't set it and lower in the vertex shader. * advertised, we don't set it and lower in the vertex shader.
*/ */
c->ppp_ctrl = 0x202; c->ppp_ctrl = 0x202;
c->fb_width = framebuffer->width; c->width_px = framebuffer->width;
c->fb_height = framebuffer->height; c->height_px = framebuffer->height;
c->iogpu_unk_214 = 0xc000;
c->isp_bgobjvals = 0x300; c->isp_bgobjvals = 0x300;
struct agx_resource *zres = NULL, *sres = NULL; struct agx_resource *zres = NULL, *sres = NULL;
if (framebuffer->zsbuf) {
agx_pack(&c->isp_zls_pixels, CR_ISP_ZLS_PIXELS, cfg) {
cfg.x = c->width_px;
cfg.y = c->height_px;
}
}
agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) { agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) {
if (framebuffer->zsbuf) { if (framebuffer->zsbuf) {
@ -1279,9 +1266,6 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
desc->format == PIPE_FORMAT_S8_UINT); desc->format == PIPE_FORMAT_S8_UINT);
c->depth_dimensions =
(framebuffer->width - 1) | ((framebuffer->height - 1) << 15);
if (util_format_has_depth(desc)) if (util_format_has_depth(desc))
zres = zsres; zres = zsres;
else else
@ -1297,11 +1281,8 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH); zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH);
zls_control.z_load_enable = !clear && load; zls_control.z_load_enable = !clear && load;
c->depth_buffer_load = agx_map_texture_gpu(zres, first_layer) + c->depth.base = agx_map_texture_gpu(zres, first_layer) +
ail_get_level_offset_B(&zres->layout, level); ail_get_level_offset_B(&zres->layout, level);
c->depth_buffer_store = c->depth_buffer_load;
c->depth_buffer_partial = c->depth_buffer_load;
/* Main stride in pages */ /* Main stride in pages */
assert((zres->layout.depth_px == 1 || assert((zres->layout.depth_px == 1 ||
@ -1309,14 +1290,12 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
"Page aligned Z layers"); "Page aligned Z layers");
unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE; unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE;
c->depth_buffer_load_stride = ((stride_pages - 1) << 14) | 1; c->depth.stride = ((stride_pages - 1) << 14) | 1;
c->depth_buffer_store_stride = c->depth_buffer_load_stride;
c->depth_buffer_partial_stride = c->depth_buffer_load_stride;
assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile"); assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile");
if (zres->layout.compressed) { if (zres->layout.compressed) {
c->depth_meta_buffer_load = c->depth.comp_base =
agx_map_texture_gpu(zres, 0) + agx_map_texture_gpu(zres, 0) +
zres->layout.metadata_offset_B + zres->layout.metadata_offset_B +
(first_layer * zres->layout.compression_layer_stride_B) + (first_layer * zres->layout.compression_layer_stride_B) +
@ -1328,14 +1307,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
"Cacheline aligned Z meta layers"); "Cacheline aligned Z meta layers");
unsigned stride_lines = unsigned stride_lines =
zres->layout.compression_layer_stride_B / AIL_CACHELINE; zres->layout.compression_layer_stride_B / AIL_CACHELINE;
c->depth_meta_buffer_load_stride = (stride_lines - 1) << 14; c->depth.comp_stride = (stride_lines - 1) << 14;
c->depth_meta_buffer_store = c->depth_meta_buffer_load;
c->depth_meta_buffer_store_stride =
c->depth_meta_buffer_load_stride;
c->depth_meta_buffer_partial = c->depth_meta_buffer_load;
c->depth_meta_buffer_partial_stride =
c->depth_meta_buffer_load_stride;
zls_control.z_compress_1 = true; zls_control.z_compress_1 = true;
zls_control.z_compress_2 = true; zls_control.z_compress_2 = true;
@ -1346,7 +1318,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
c->isp_bgobjdepth = c->isp_bgobjdepth =
(uint16_t)(SATURATE(clear_depth) * scale + 0.5f); (uint16_t)(SATURATE(clear_depth) * scale + 0.5f);
zls_control.z_format = AGX_ZLS_FORMAT_16; zls_control.z_format = AGX_ZLS_FORMAT_16;
c->iogpu_unk_214 |= 0x40000; c->flags |= DRM_ASAHI_RENDER_DBIAS_IS_INT;
} else { } else {
c->isp_bgobjdepth = fui(clear_depth); c->isp_bgobjdepth = fui(clear_depth);
zls_control.z_format = AGX_ZLS_FORMAT_32F; zls_control.z_format = AGX_ZLS_FORMAT_32F;
@ -1360,24 +1332,18 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL); zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL);
zls_control.s_load_enable = !clear && load; zls_control.s_load_enable = !clear && load;
c->stencil_buffer_load = c->stencil.base = agx_map_texture_gpu(sres, first_layer) +
agx_map_texture_gpu(sres, first_layer) + ail_get_level_offset_B(&sres->layout, level);
ail_get_level_offset_B(&sres->layout, level);
c->stencil_buffer_store = c->stencil_buffer_load;
c->stencil_buffer_partial = c->stencil_buffer_load;
/* Main stride in pages */ /* Main stride in pages */
assert((sres->layout.depth_px == 1 || assert((sres->layout.depth_px == 1 ||
is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) && is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) &&
"Page aligned S layers"); "Page aligned S layers");
unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE; unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE;
c->stencil_buffer_load_stride = ((stride_pages - 1) << 14) | 1; c->stencil.stride = ((stride_pages - 1) << 14) | 1;
c->stencil_buffer_store_stride = c->stencil_buffer_load_stride;
c->stencil_buffer_partial_stride = c->stencil_buffer_load_stride;
if (sres->layout.compressed) { if (sres->layout.compressed) {
c->stencil_meta_buffer_load = c->stencil.comp_base =
agx_map_texture_gpu(sres, 0) + agx_map_texture_gpu(sres, 0) +
sres->layout.metadata_offset_B + sres->layout.metadata_offset_B +
(first_layer * sres->layout.compression_layer_stride_B) + (first_layer * sres->layout.compression_layer_stride_B) +
@ -1389,14 +1355,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
"Cacheline aligned S meta layers"); "Cacheline aligned S meta layers");
unsigned stride_lines = unsigned stride_lines =
sres->layout.compression_layer_stride_B / AIL_CACHELINE; sres->layout.compression_layer_stride_B / AIL_CACHELINE;
c->stencil_meta_buffer_load_stride = (stride_lines - 1) << 14; c->stencil.comp_stride = (stride_lines - 1) << 14;
c->stencil_meta_buffer_store = c->stencil_meta_buffer_load;
c->stencil_meta_buffer_store_stride =
c->stencil_meta_buffer_load_stride;
c->stencil_meta_buffer_partial = c->stencil_meta_buffer_load;
c->stencil_meta_buffer_partial_stride =
c->stencil_meta_buffer_load_stride;
zls_control.s_compress_1 = true; zls_control.s_compress_1 = true;
zls_control.s_compress_2 = true; zls_control.s_compress_2 = true;
@ -1407,71 +1366,48 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
} }
} }
if (clear_pipeline_textures)
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
else
c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
if (zres && !(batch->clear & PIPE_CLEAR_DEPTH))
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
if (sres && !(batch->clear & PIPE_CLEAR_STENCIL))
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
if (dev->debug & AGX_DBG_NOCLUSTER) if (dev->debug & AGX_DBG_NOCLUSTER)
c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING; c->flags |= DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING;
/* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */ memcpy(&c->bg.rsrc_spec, &pipeline_clear.counts,
if (tib->nr_samples > 1 && framebuffer->zsbuf)
c->flags |= ASAHI_RENDER_MSAA_ZS;
memcpy(&c->load_pipeline_bind, &pipeline_clear.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
memcpy(&c->store_pipeline_bind, &pipeline_store.counts, memcpy(&c->eot.rsrc_spec, &pipeline_store.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
memcpy(&c->partial_reload_pipeline_bind, &pipeline_load.counts, memcpy(&c->partial_bg.rsrc_spec, &pipeline_load.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
memcpy(&c->partial_store_pipeline_bind, &pipeline_store.counts, memcpy(&c->partial_eot.rsrc_spec, &pipeline_store.counts,
sizeof(struct agx_counts_packed)); sizeof(struct agx_counts_packed));
/* XXX is this correct? */ /* XXX is this correct? */
c->load_pipeline = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4); c->bg.usc = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4);
c->store_pipeline = pipeline_store.usc | 4; c->eot.usc = pipeline_store.usc | 4;
c->partial_reload_pipeline = pipeline_load.usc | 4; c->partial_bg.usc = pipeline_load.usc | 4;
c->partial_store_pipeline = pipeline_store.usc | 4; c->partial_eot.usc = pipeline_store.usc | 4;
c->utile_width = tib->tile_size.width; c->utile_width_px = tib->tile_size.width;
c->utile_height = tib->tile_size.height; c->utile_height_px = tib->tile_size.height;
c->samples = tib->nr_samples; c->samples = tib->nr_samples;
c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1); c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1);
c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl; c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl;
c->sample_size = tib->sample_size_B; c->sample_size_B = tib->sample_size_B;
/* XXX OR 0x80 with eMRT? */
c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(tib), 2048) / 2048;
float tan_60 = 1.732051f; float tan_60 = 1.732051f;
c->merge_upper_x = fui(tan_60 / framebuffer->width); c->isp_merge_upper_x = fui(tan_60 / framebuffer->width);
c->merge_upper_y = fui(tan_60 / framebuffer->height); c->isp_merge_upper_y = fui(tan_60 / framebuffer->height);
c->scissor_array = scissor_ptr; c->isp_scissor_base = scissor_ptr;
c->depth_bias_array = depth_bias_ptr; c->isp_dbias_base = depth_bias_ptr;
c->visibility_result_buffer = visibility_result_ptr; c->isp_oclqry_base = visibility_result_ptr;
c->vertex_sampler_array = if (batch->sampler_heap.bo) {
batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0; c->sampler_heap = batch->sampler_heap.bo->va->addr;
c->vertex_sampler_count = batch->sampler_heap.count; c->sampler_count = batch->sampler_heap.count;
c->vertex_sampler_max = batch->sampler_heap.count + 1; }
/* In the future we could split the heaps if useful */
c->fragment_sampler_array = c->vertex_sampler_array;
c->fragment_sampler_count = c->vertex_sampler_count;
c->fragment_sampler_max = c->vertex_sampler_max;
/* If a tile is empty, we do not want to process it, as the redundant /* If a tile is empty, we do not want to process it, as the redundant
* roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of * roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
@ -1483,39 +1419,25 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
* This case matters a LOT for performance in workloads that split batches. * This case matters a LOT for performance in workloads that split batches.
*/ */
if (batch->clear & batch->resolve) if (batch->clear & batch->resolve)
c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES; c->flags |= DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES;
for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) {
if (!framebuffer->cbufs[i])
continue;
asahi_add_attachment(att, agx_resource(framebuffer->cbufs[i]->texture),
framebuffer->cbufs[i]);
}
if (framebuffer->zsbuf) {
struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
asahi_add_attachment(att, rsrc, framebuffer->zsbuf);
if (rsrc->separate_stencil) {
asahi_add_attachment(att, rsrc->separate_stencil, framebuffer->zsbuf);
}
}
c->fragment_attachments = (uint64_t)(uintptr_t)&att->list[0];
c->fragment_attachment_count = att->count;
if (batch->vs_scratch) { if (batch->vs_scratch) {
c->flags |= ASAHI_RENDER_VERTEX_SPILLS; c->flags |= DRM_ASAHI_RENDER_VERTEX_SCRATCH;
c->vertex_helper_arg = batch->ctx->scratch_vs.buf->va->addr; c->vertex_helper.data = batch->ctx->scratch_vs.buf->va->addr;
c->vertex_helper_cfg = batch->vs_preamble_scratch << 16; c->vertex_helper.cfg = batch->vs_preamble_scratch << 16;
c->vertex_helper_program = agx_helper_program(&batch->ctx->bg_eot); c->vertex_helper.binary = agx_helper_program(&batch->ctx->bg_eot);
} }
if (batch->fs_scratch) { if (batch->fs_scratch) {
c->fragment_helper_arg = batch->ctx->scratch_fs.buf->va->addr; c->fragment_helper.data = batch->ctx->scratch_fs.buf->va->addr;
c->fragment_helper_cfg = batch->fs_preamble_scratch << 16; c->fragment_helper.cfg = batch->fs_preamble_scratch << 16;
c->fragment_helper_program = agx_helper_program(&batch->ctx->bg_eot); c->fragment_helper.binary = agx_helper_program(&batch->ctx->bg_eot);
}
if (batch->timestamps.size > 0) {
c->ts_vtx.start.handle = batch->ctx->timestamp_handle;
c->ts_frag.end.handle = batch->ctx->timestamp_handle;
c->ts_vtx.start.offset = timestamp_offset(batch, vtx_start);
c->ts_frag.end.offset = timestamp_offset(batch, frag_end);
} }
} }
@ -1595,8 +1517,6 @@ static void
agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch, agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
struct drm_asahi_cmd_compute *cmdbuf) struct drm_asahi_cmd_compute *cmdbuf)
{ {
struct agx_device *dev = agx_device(ctx->base.screen);
/* Finalize the encoder */ /* Finalize the encoder */
agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _) agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _)
; ;
@ -1606,27 +1526,14 @@ agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
if (batch->cs_scratch) if (batch->cs_scratch)
agx_batch_add_bo(batch, ctx->scratch_cs.buf); agx_batch_add_bo(batch, ctx->scratch_cs.buf);
unsigned cmdbuf_id = agx_get_global_id(dev);
unsigned encoder_id = agx_get_global_id(dev);
*cmdbuf = (struct drm_asahi_cmd_compute){ *cmdbuf = (struct drm_asahi_cmd_compute){
.flags = 0, .cdm_ctrl_stream_base = batch->cdm.bo->va->addr,
.encoder_ptr = batch->cdm.bo->va->addr, .cdm_ctrl_stream_end =
.encoder_end =
batch->cdm.bo->va->addr + batch->cdm.bo->va->addr +
(batch->cdm.current - (uint8_t *)agx_bo_map(batch->cdm.bo)), (batch->cdm.current - (uint8_t *)agx_bo_map(batch->cdm.bo)),
.usc_base = dev->shader_base, .sampler_heap =
.helper_arg = 0,
.helper_cfg = 0,
.helper_program = 0,
.iogpu_unk_40 = 0,
.sampler_array =
batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0, batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0,
.sampler_count = batch->sampler_heap.count, .sampler_count = batch->sampler_heap.count,
.sampler_max = batch->sampler_heap.count + 1,
.encoder_id = encoder_id,
.cmd_id = cmdbuf_id,
.unk_mask = 0xffffffff,
}; };
if (batch->cs_scratch) { if (batch->cs_scratch) {
@ -1635,16 +1542,23 @@ agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
// helper. Disable them for now. // helper. Disable them for now.
// cmdbuf->iogpu_unk_40 = 0x1c; // cmdbuf->iogpu_unk_40 = 0x1c;
cmdbuf->helper_arg = ctx->scratch_cs.buf->va->addr; cmdbuf->helper.data = ctx->scratch_cs.buf->va->addr;
cmdbuf->helper_cfg = batch->cs_preamble_scratch << 16; cmdbuf->helper.cfg = batch->cs_preamble_scratch << 16;
// cmdbuf->helper_cfg |= 0x40; // cmdbuf->helper.cfg |= 0x40;
cmdbuf->helper_program = agx_helper_program(&batch->ctx->bg_eot); cmdbuf->helper.binary = agx_helper_program(&batch->ctx->bg_eot);
}
if (batch->timestamps.size > 0) {
cmdbuf->ts.start.handle = ctx->timestamp_handle;
cmdbuf->ts.end.handle = ctx->timestamp_handle;
cmdbuf->ts.start.offset = timestamp_offset(batch, comp_start);
cmdbuf->ts.end.offset = timestamp_offset(batch, comp_start);
} }
} }
static void static void
agx_flush_render(struct agx_context *ctx, struct agx_batch *batch, agx_flush_render(struct agx_context *ctx, struct agx_batch *batch,
struct drm_asahi_cmd_render *cmdbuf, struct attachments *att) struct drm_asahi_cmd_render *cmdbuf)
{ {
struct agx_device *dev = agx_device(ctx->base.screen); struct agx_device *dev = agx_device(ctx->base.screen);
@ -1694,16 +1608,11 @@ agx_flush_render(struct agx_context *ctx, struct agx_batch *batch,
*/ */
agx_batch_add_bo(batch, batch->vdm.bo); agx_batch_add_bo(batch, batch->vdm.bo);
unsigned cmd_ta_id = agx_get_global_id(dev); agx_cmdbuf(
unsigned cmd_3d_id = agx_get_global_id(dev); dev, cmdbuf, &batch->pool, batch, &batch->key, batch->vdm.bo->va->addr,
unsigned encoder_id = agx_get_global_id(dev); scissor, zbias, agx_get_occlusion_heap(batch), pipeline_background,
pipeline_background_partial, pipeline_store, clear_pipeline_textures,
agx_cmdbuf(dev, cmdbuf, att, &batch->pool, batch, &batch->key, batch->clear_depth, batch->clear_stencil, &batch->tilebuffer_layout);
batch->vdm.bo->va->addr, encoder_id, cmd_ta_id, cmd_3d_id,
scissor, zbias, agx_get_occlusion_heap(batch),
pipeline_background, pipeline_background_partial, pipeline_store,
clear_pipeline_textures, batch->clear_depth, batch->clear_stencil,
&batch->tilebuffer_layout);
} }
void void
@ -1712,7 +1621,6 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
assert(agx_batch_is_active(batch)); assert(agx_batch_is_active(batch));
assert(!agx_batch_is_submitted(batch)); assert(!agx_batch_is_submitted(batch));
struct attachments att = {.count = 0};
struct drm_asahi_cmd_render render; struct drm_asahi_cmd_render render;
struct drm_asahi_cmd_compute compute; struct drm_asahi_cmd_compute compute;
bool has_vdm = false, has_cdm = false; bool has_vdm = false, has_cdm = false;
@ -1723,7 +1631,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
} }
if (batch->vdm.bo && (batch->clear || batch->initialized)) { if (batch->vdm.bo && (batch->clear || batch->initialized)) {
agx_flush_render(ctx, batch, &render, &att); agx_flush_render(ctx, batch, &render);
has_vdm = true; has_vdm = true;
} }
@ -1761,8 +1669,6 @@ agx_destroy_context(struct pipe_context *pctx)
agx_bg_eot_cleanup(&ctx->bg_eot); agx_bg_eot_cleanup(&ctx->bg_eot);
agx_destroy_meta_shaders(ctx); agx_destroy_meta_shaders(ctx);
agx_bo_unreference(dev, ctx->result_buf);
/* Lock around the syncobj destruction, to avoid racing /* Lock around the syncobj destruction, to avoid racing
* command submission in another context. * command submission in another context.
**/ **/
@ -1778,6 +1684,9 @@ agx_destroy_context(struct pipe_context *pctx)
drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj); drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
} }
dev->ops.bo_unbind_object(dev, ctx->timestamp_handle);
agx_bo_unreference(dev, ctx->timestamps);
u_rwlock_wrunlock(&screen->destroy_lock); u_rwlock_wrunlock(&screen->destroy_lock);
pipe_resource_reference(&ctx->heap, NULL); pipe_resource_reference(&ctx->heap, NULL);
@ -1841,21 +1750,18 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
} }
pctx->const_uploader = pctx->stream_uploader; pctx->const_uploader = pctx->stream_uploader;
uint32_t priority = 2; enum drm_asahi_priority priority = DRM_ASAHI_PRIORITY_MEDIUM;
if (flags & PIPE_CONTEXT_PRIORITY_LOW)
priority = 3;
else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM)
priority = 2;
else if (flags & PIPE_CONTEXT_PRIORITY_HIGH)
priority = 1;
else if (flags & PIPE_CONTEXT_PRIORITY_REALTIME)
priority = 0;
ctx->queue_id = agx_create_command_queue(agx_device(screen), if (flags & PIPE_CONTEXT_PRIORITY_LOW)
DRM_ASAHI_QUEUE_CAP_RENDER | priority = DRM_ASAHI_PRIORITY_LOW;
DRM_ASAHI_QUEUE_CAP_BLIT | else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM)
DRM_ASAHI_QUEUE_CAP_COMPUTE, priority = DRM_ASAHI_PRIORITY_MEDIUM;
priority);
/* TODO: High/realtime need us to handle errors since we might not have
* permission. Sort this out later.
*/
ctx->queue_id = agx_create_command_queue(agx_device(screen), priority);
pctx->destroy = agx_destroy_context; pctx->destroy = agx_destroy_context;
pctx->flush = agx_flush; pctx->flush = agx_flush;
@ -1893,11 +1799,17 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
ctx->blitter = util_blitter_create(pctx); ctx->blitter = util_blitter_create(pctx);
ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx); ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx);
ctx->result_buf = struct agx_device *dev = agx_device(screen);
agx_bo_create(agx_device(screen), size_t timestamps_size = sizeof(struct agx_timestamps) * AGX_MAX_BATCHES;
(2 * sizeof(union agx_batch_result)) * AGX_MAX_BATCHES, 0,
AGX_BO_WRITEBACK, "Batch result buffer"); /* The kernel requires that timestamp buffers are SHARED */
assert(ctx->result_buf); ctx->timestamps =
agx_bo_create(dev, timestamps_size, 0, AGX_BO_WRITEBACK | AGX_BO_SHARED,
"Timestamp buffer");
assert(ctx->timestamps);
ret = agx_bind_timestamps(dev, ctx->timestamps, &ctx->timestamp_handle);
assert(!ret);
/* Sync object/FD used for NATIVE_FENCE_FD. */ /* Sync object/FD used for NATIVE_FENCE_FD. */
ctx->in_sync_fd = -1; ctx->in_sync_fd = -1;
@ -2101,7 +2013,7 @@ agx_init_screen_caps(struct pipe_screen *pscreen)
caps->texture_barrier = true; caps->texture_barrier = true;
/* Timer resolution is the length of a single tick in nanos */ /* Timer resolution is the length of a single tick in nanos */
caps->timer_resolution = agx_gpu_time_to_ns(agx_device(pscreen), 1); caps->timer_resolution = agx_gpu_timestamp_to_ns(agx_device(pscreen), 1);
caps->sampler_view_target = true; caps->sampler_view_target = true;
caps->texture_swizzle = true; caps->texture_swizzle = true;
@ -2452,8 +2364,7 @@ agx_screen_get_fd(struct pipe_screen *pscreen)
static uint64_t static uint64_t
agx_get_timestamp(struct pipe_screen *pscreen) agx_get_timestamp(struct pipe_screen *pscreen)
{ {
struct agx_device *dev = agx_device(pscreen); return agx_get_gpu_timestamp(agx_device(pscreen));
return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev));
} }
static void static void
@ -2488,13 +2399,6 @@ agx_screen_create(int fd, struct renderonly *ro,
struct agx_screen *agx_screen; struct agx_screen *agx_screen;
struct pipe_screen *screen; struct pipe_screen *screen;
/* Refuse to probe. There is no stable UAPI yet. Upstream Mesa cannot be used
* yet with Asahi. Do not try. Do not patch out this check. Do not teach
* others about patching this check. Do not distribute upstream Mesa with
* this check patched out.
*/
return NULL;
agx_screen = rzalloc(NULL, struct agx_screen); agx_screen = rzalloc(NULL, struct agx_screen);
if (!agx_screen) if (!agx_screen)
return NULL; return NULL;

View file

@ -408,12 +408,12 @@ agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
return true; return true;
case QUERY_COPY_TIMESTAMP: case QUERY_COPY_TIMESTAMP:
vresult->u64 = agx_gpu_time_to_ns(dev, value); vresult->u64 = agx_gpu_timestamp_to_ns(dev, value);
return true; return true;
case QUERY_COPY_TIME_ELAPSED: case QUERY_COPY_TIME_ELAPSED:
/* end - begin */ /* end - begin */
vresult->u64 = agx_gpu_time_to_ns(dev, ptr[0] - ptr[1]); vresult->u64 = agx_gpu_timestamp_to_ns(dev, ptr[0] - ptr[1]);
return true; return true;
default: default:

View file

@ -18,7 +18,6 @@
#include "asahi/lib/agx_tilebuffer.h" #include "asahi/lib/agx_tilebuffer.h"
#include "asahi/lib/agx_uvs.h" #include "asahi/lib/agx_uvs.h"
#include "asahi/lib/pool.h" #include "asahi/lib/pool.h"
#include "asahi/lib/unstable_asahi_drm.h"
#include "asahi/libagx/geometry.h" #include "asahi/libagx/geometry.h"
#include "compiler/shader_enums.h" #include "compiler/shader_enums.h"
#include "gallium/auxiliary/util/u_blitter.h" #include "gallium/auxiliary/util/u_blitter.h"
@ -355,9 +354,13 @@ struct agx_stage {
uint32_t valid_samplers; uint32_t valid_samplers;
}; };
union agx_batch_result { struct agx_timestamps {
struct drm_asahi_result_render render; uint64_t vtx_start;
struct drm_asahi_result_compute compute; uint64_t vtx_end;
uint64_t frag_start;
uint64_t frag_end;
uint64_t comp_start;
uint64_t comp_end;
}; };
/* This is a firmware limit. It should be possible to raise to 2048 in the /* This is a firmware limit. It should be possible to raise to 2048 in the
@ -454,10 +457,6 @@ struct agx_batch {
/* Arrays of GPU pointers that should be written with the batch timestamps */ /* Arrays of GPU pointers that should be written with the batch timestamps */
struct util_dynarray timestamps; struct util_dynarray timestamps;
/* Result buffer where the kernel places command execution information */
union agx_batch_result *result;
size_t result_off;
/* Actual pointer in a uniform */ /* Actual pointer in a uniform */
struct agx_bo *geom_params_bo, *geom_index_bo; struct agx_bo *geom_params_bo, *geom_index_bo;
uint64_t geom_index; uint64_t geom_index;
@ -646,7 +645,8 @@ struct agx_context {
uint32_t queue_id; uint32_t queue_id;
struct agx_batch *batch; struct agx_batch *batch;
struct agx_bo *result_buf; struct agx_bo *timestamps;
uint32_t timestamp_handle;
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
uint32_t vb_mask; uint32_t vb_mask;