mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 13:50:11 +01:00
asahi: port to stable uAPI
Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> Acked-by: Eric Engestrom <eric@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33984>
This commit is contained in:
parent
3e110005a6
commit
c64a2bbff5
25 changed files with 760 additions and 1257 deletions
|
|
@ -6,21 +6,17 @@
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "../lib/unstable_asahi_drm.h"
|
|
||||||
#include "drm-shim/drm_shim.h"
|
#include "drm-shim/drm_shim.h"
|
||||||
|
#include "drm-uapi/asahi_drm.h"
|
||||||
|
|
||||||
bool drm_shim_driver_prefers_first_render_node = true;
|
bool drm_shim_driver_prefers_first_render_node = true;
|
||||||
|
|
||||||
static const struct drm_asahi_params_global params = {
|
static const struct drm_asahi_params_global params = {
|
||||||
.unstable_uabi_version = DRM_ASAHI_UNSTABLE_UABI_VERSION,
|
|
||||||
.gpu_generation = 13,
|
.gpu_generation = 13,
|
||||||
.gpu_variant = 'G',
|
.gpu_variant = 'G',
|
||||||
.gpu_revision = 0,
|
.gpu_revision = 0,
|
||||||
.vm_user_start = 0x1000000,
|
.vm_start = 0x1000000,
|
||||||
.vm_user_end = 0x5000000,
|
.vm_end = 0x5000000,
|
||||||
.vm_usc_start = 0,
|
|
||||||
.vm_usc_end = 0,
|
|
||||||
.vm_page_size = 4096,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct asahi_bo {
|
struct asahi_bo {
|
||||||
|
|
@ -48,12 +44,6 @@ asahi_ioctl_noop(int fd, unsigned long request, void *arg)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
asahi_ioctl_submit(int fd, unsigned long request, void *arg)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
asahi_ioctl_gem_create(int fd, unsigned long request, void *arg)
|
asahi_ioctl_gem_create(int fd, unsigned long request, void *arg)
|
||||||
{
|
{
|
||||||
|
|
@ -110,12 +100,12 @@ static ioctl_fn_t driver_ioctls[] = {
|
||||||
[DRM_ASAHI_GET_PARAMS] = asahi_ioctl_get_param,
|
[DRM_ASAHI_GET_PARAMS] = asahi_ioctl_get_param,
|
||||||
[DRM_ASAHI_VM_CREATE] = asahi_ioctl_noop,
|
[DRM_ASAHI_VM_CREATE] = asahi_ioctl_noop,
|
||||||
[DRM_ASAHI_VM_DESTROY] = asahi_ioctl_noop,
|
[DRM_ASAHI_VM_DESTROY] = asahi_ioctl_noop,
|
||||||
|
[DRM_ASAHI_VM_BIND] = asahi_ioctl_noop,
|
||||||
[DRM_ASAHI_GEM_CREATE] = asahi_ioctl_gem_create,
|
[DRM_ASAHI_GEM_CREATE] = asahi_ioctl_gem_create,
|
||||||
[DRM_ASAHI_GEM_MMAP_OFFSET] = asahi_ioctl_gem_mmap_offset,
|
[DRM_ASAHI_GEM_MMAP_OFFSET] = asahi_ioctl_gem_mmap_offset,
|
||||||
[DRM_ASAHI_GEM_BIND] = asahi_ioctl_noop,
|
|
||||||
[DRM_ASAHI_QUEUE_CREATE] = asahi_ioctl_noop,
|
[DRM_ASAHI_QUEUE_CREATE] = asahi_ioctl_noop,
|
||||||
[DRM_ASAHI_QUEUE_DESTROY] = asahi_ioctl_noop,
|
[DRM_ASAHI_QUEUE_DESTROY] = asahi_ioctl_noop,
|
||||||
[DRM_ASAHI_SUBMIT] = asahi_ioctl_submit,
|
[DRM_ASAHI_SUBMIT] = asahi_ioctl_noop,
|
||||||
};
|
};
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
||||||
|
|
@ -1119,4 +1119,9 @@
|
||||||
<field name="S Resolve" start="58" size="1" type="bool"/>
|
<field name="S Resolve" start="58" size="1" type="bool"/>
|
||||||
</struct>
|
</struct>
|
||||||
|
|
||||||
|
<struct name="CR ISP ZLS Pixels" size="4">
|
||||||
|
<field name="X" start="0" size="15" type="uint" modifier="minus(1)"/>
|
||||||
|
<field name="Y" start="15" size="15" type="uint" modifier="minus(1)"/>
|
||||||
|
</struct>
|
||||||
|
|
||||||
</genxml>
|
</genxml>
|
||||||
|
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
[unstable_asahi_drm.h]
|
|
||||||
indent_style = tab
|
|
||||||
indent_size = 8
|
|
||||||
max_line_length = 100
|
|
||||||
|
|
@ -11,6 +11,7 @@
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
#include "agx_device.h"
|
#include "agx_device.h"
|
||||||
#include "decode.h"
|
#include "decode.h"
|
||||||
|
#include "layout.h"
|
||||||
|
|
||||||
/* Helper to calculate the bucket index of a BO */
|
/* Helper to calculate the bucket index of a BO */
|
||||||
static unsigned
|
static unsigned
|
||||||
|
|
@ -355,8 +356,8 @@ agx_bo_create(struct agx_device *dev, size_t size, unsigned align,
|
||||||
assert(size > 0);
|
assert(size > 0);
|
||||||
|
|
||||||
/* BOs are allocated in pages */
|
/* BOs are allocated in pages */
|
||||||
size = ALIGN_POT(size, (size_t)dev->params.vm_page_size);
|
size = ALIGN_POT(size, AIL_PAGESIZE);
|
||||||
align = MAX2(align, dev->params.vm_page_size);
|
align = MAX2(align, AIL_PAGESIZE);
|
||||||
|
|
||||||
/* See if we have a BO already in the cache */
|
/* See if we have a BO already in the cache */
|
||||||
bo = agx_bo_cache_fetch(dev, size, align, flags, true);
|
bo = agx_bo_cache_fetch(dev, size, align, flags, true);
|
||||||
|
|
|
||||||
|
|
@ -88,6 +88,11 @@ struct agx_bo {
|
||||||
/* Process-local index */
|
/* Process-local index */
|
||||||
uint32_t handle;
|
uint32_t handle;
|
||||||
|
|
||||||
|
/* Handle to refer to this BO in uAPI calls. This is either the GEM handle
|
||||||
|
* on native Linux, or the virtio resource ID with virtgpu.
|
||||||
|
*/
|
||||||
|
uint32_t uapi_handle;
|
||||||
|
|
||||||
/* DMA-BUF fd clone for adding fences to imports/exports */
|
/* DMA-BUF fd clone for adding fences to imports/exports */
|
||||||
int prime_fd;
|
int prime_fd;
|
||||||
|
|
||||||
|
|
@ -99,10 +104,6 @@ struct agx_bo {
|
||||||
|
|
||||||
/* For debugging */
|
/* For debugging */
|
||||||
const char *label;
|
const char *label;
|
||||||
|
|
||||||
/* virtio blob_id */
|
|
||||||
uint32_t blob_id;
|
|
||||||
uint32_t vbo_res_id;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@
|
||||||
#include "agx_device.h"
|
#include "agx_device.h"
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include "clc/asahi_clc.h"
|
#include "clc/asahi_clc.h"
|
||||||
|
#include "drm-uapi/asahi_drm.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
#include "util/timespec.h"
|
#include "util/timespec.h"
|
||||||
|
|
@ -18,6 +19,7 @@
|
||||||
#include "agx_scratch.h"
|
#include "agx_scratch.h"
|
||||||
#include "decode.h"
|
#include "decode.h"
|
||||||
#include "glsl_types.h"
|
#include "glsl_types.h"
|
||||||
|
#include "layout.h"
|
||||||
#include "libagx_dgc.h"
|
#include "libagx_dgc.h"
|
||||||
#include "libagx_shaders.h"
|
#include "libagx_shaders.h"
|
||||||
|
|
||||||
|
|
@ -34,7 +36,6 @@
|
||||||
#include "util/u_printf.h"
|
#include "util/u_printf.h"
|
||||||
#include "git_sha1.h"
|
#include "git_sha1.h"
|
||||||
#include "nir_serialize.h"
|
#include "nir_serialize.h"
|
||||||
#include "unstable_asahi_drm.h"
|
|
||||||
#include "vdrm.h"
|
#include "vdrm.h"
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
|
|
@ -80,7 +81,7 @@ static const struct debug_named_value agx_debug_options[] = {
|
||||||
void
|
void
|
||||||
agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
|
agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
|
||||||
{
|
{
|
||||||
const uint64_t handle = bo->handle;
|
const uint64_t handle = bo->uapi_handle;
|
||||||
|
|
||||||
if (bo->_map)
|
if (bo->_map)
|
||||||
munmap(bo->_map, bo->size);
|
munmap(bo->_map, bo->size);
|
||||||
|
|
@ -103,33 +104,58 @@ agx_bo_free(struct agx_device *dev, struct agx_bo *bo)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
agx_drm_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
|
||||||
|
uint32_t count)
|
||||||
|
{
|
||||||
|
struct drm_asahi_vm_bind vm_bind = {
|
||||||
|
.num_binds = count,
|
||||||
|
.vm_id = dev->vm_id,
|
||||||
|
.userptr = (uintptr_t)ops,
|
||||||
|
.stride = sizeof(*ops),
|
||||||
|
};
|
||||||
|
|
||||||
|
int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_VM_BIND, &vm_bind);
|
||||||
|
if (ret) {
|
||||||
|
fprintf(stderr, "DRM_IOCTL_ASAHI_VM_BIND failed\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convenience helper to bind a single BO regardless of kernel module.
|
||||||
|
*/
|
||||||
|
int
|
||||||
agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
|
agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
|
||||||
size_t size_B, uint64_t offset_B, uint32_t flags, bool unbind)
|
size_t size_B, uint64_t offset_B, uint32_t flags)
|
||||||
{
|
{
|
||||||
assert((size_B % 16384) == 0 && "alignment required");
|
assert((size_B % 16384) == 0 && "alignment required");
|
||||||
assert((offset_B % 16384) == 0 && "alignment required");
|
assert((offset_B % 16384) == 0 && "alignment required");
|
||||||
assert((addr % 16384) == 0 && "alignment required");
|
assert((addr % 16384) == 0 && "alignment required");
|
||||||
|
|
||||||
struct drm_asahi_gem_bind gem_bind = {
|
struct drm_asahi_gem_bind_op op = {
|
||||||
.op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND,
|
|
||||||
.flags = flags,
|
.flags = flags,
|
||||||
.handle = bo ? bo->handle : 0,
|
.handle = bo ? bo->uapi_handle : 0,
|
||||||
.vm_id = dev->vm_id,
|
|
||||||
.offset = offset_B,
|
.offset = offset_B,
|
||||||
.range = size_B,
|
.range = size_B,
|
||||||
.addr = addr,
|
.addr = addr,
|
||||||
};
|
};
|
||||||
|
|
||||||
assert((size_B % 16384) == 0 && "page alignment required");
|
return dev->ops.bo_bind(dev, &op, 1);
|
||||||
assert((offset_B % 16384) == 0 && "page alignment required");
|
}
|
||||||
assert((addr % 16384) == 0 && "page alignment required");
|
|
||||||
|
|
||||||
int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND, &gem_bind);
|
int
|
||||||
if (ret) {
|
agx_bind_timestamps(struct agx_device *dev, struct agx_bo *bo, uint32_t *handle)
|
||||||
fprintf(stderr, "DRM_IOCTL_ASAHI_GEM_BIND failed: %m (handle=%d)\n",
|
{
|
||||||
bo ? bo->handle : 0);
|
struct drm_asahi_gem_bind_object bind = {
|
||||||
}
|
.op = DRM_ASAHI_BIND_OBJECT_OP_BIND,
|
||||||
|
.flags = DRM_ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS,
|
||||||
|
.handle = bo->uapi_handle,
|
||||||
|
.range = bo->size,
|
||||||
|
};
|
||||||
|
|
||||||
|
int ret = dev->ops.bo_bind_object(dev, &bind);
|
||||||
|
*handle = bind.object_handle;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -146,10 +172,10 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
|
||||||
struct drm_asahi_gem_create gem_create = {.size = size};
|
struct drm_asahi_gem_create gem_create = {.size = size};
|
||||||
|
|
||||||
if (flags & AGX_BO_WRITEBACK)
|
if (flags & AGX_BO_WRITEBACK)
|
||||||
gem_create.flags |= ASAHI_GEM_WRITEBACK;
|
gem_create.flags |= DRM_ASAHI_GEM_WRITEBACK;
|
||||||
|
|
||||||
if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) {
|
if (!(flags & (AGX_BO_SHARED | AGX_BO_SHAREABLE))) {
|
||||||
gem_create.flags |= ASAHI_GEM_VM_PRIVATE;
|
gem_create.flags |= DRM_ASAHI_GEM_VM_PRIVATE;
|
||||||
gem_create.vm_id = dev->vm_id;
|
gem_create.vm_id = dev->vm_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -173,7 +199,7 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
|
||||||
bo->size = gem_create.size;
|
bo->size = gem_create.size;
|
||||||
bo->align = align;
|
bo->align = align;
|
||||||
bo->flags = flags;
|
bo->flags = flags;
|
||||||
bo->handle = handle;
|
bo->handle = bo->uapi_handle = handle;
|
||||||
bo->prime_fd = -1;
|
bo->prime_fd = -1;
|
||||||
|
|
||||||
enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0;
|
enum agx_va_flags va_flags = flags & AGX_BO_LOW_VA ? AGX_VA_USC : 0;
|
||||||
|
|
@ -184,12 +210,12 @@ agx_bo_alloc(struct agx_device *dev, size_t size, size_t align,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t bind = ASAHI_BIND_READ;
|
uint32_t bind = DRM_ASAHI_BIND_READ;
|
||||||
if (!(flags & AGX_BO_READONLY)) {
|
if (!(flags & AGX_BO_READONLY)) {
|
||||||
bind |= ASAHI_BIND_WRITE;
|
bind |= DRM_ASAHI_BIND_WRITE;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind, false);
|
ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0, bind);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
agx_bo_free(dev, bo);
|
agx_bo_free(dev, bo);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
@ -203,7 +229,8 @@ agx_bo_mmap(struct agx_device *dev, struct agx_bo *bo)
|
||||||
{
|
{
|
||||||
assert(bo->_map == NULL && "not double mapped");
|
assert(bo->_map == NULL && "not double mapped");
|
||||||
|
|
||||||
struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle = bo->handle};
|
struct drm_asahi_gem_mmap_offset gem_mmap_offset = {.handle =
|
||||||
|
bo->uapi_handle};
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset);
|
ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET, &gem_mmap_offset);
|
||||||
|
|
@ -245,7 +272,7 @@ agx_bo_import(struct agx_device *dev, int fd)
|
||||||
if (!bo->size) {
|
if (!bo->size) {
|
||||||
bo->dev = dev;
|
bo->dev = dev;
|
||||||
bo->size = lseek(fd, 0, SEEK_END);
|
bo->size = lseek(fd, 0, SEEK_END);
|
||||||
bo->align = dev->params.vm_page_size;
|
bo->align = AIL_PAGESIZE;
|
||||||
|
|
||||||
/* Sometimes this can fail and return -1. size of -1 is not
|
/* Sometimes this can fail and return -1. size of -1 is not
|
||||||
* a nice thing for mmap to try mmap. Be more robust also
|
* a nice thing for mmap to try mmap. Be more robust also
|
||||||
|
|
@ -255,7 +282,7 @@ agx_bo_import(struct agx_device *dev, int fd)
|
||||||
pthread_mutex_unlock(&dev->bo_map_lock);
|
pthread_mutex_unlock(&dev->bo_map_lock);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (bo->size & (dev->params.vm_page_size - 1)) {
|
if (bo->size & (AIL_PAGESIZE - 1)) {
|
||||||
fprintf(
|
fprintf(
|
||||||
stderr,
|
stderr,
|
||||||
"import failed: BO is not a multiple of the page size (0x%llx bytes)\n",
|
"import failed: BO is not a multiple of the page size (0x%llx bytes)\n",
|
||||||
|
|
@ -281,11 +308,13 @@ agx_bo_import(struct agx_device *dev, int fd)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dev->is_virtio) {
|
if (dev->is_virtio) {
|
||||||
bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, bo->handle);
|
bo->uapi_handle = vdrm_handle_to_res_id(dev->vdrm, bo->handle);
|
||||||
|
} else {
|
||||||
|
bo->uapi_handle = bo->handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = dev->ops.bo_bind(dev, bo, bo->va->addr, bo->size, 0,
|
ret = agx_bo_bind(dev, bo, bo->va->addr, bo->size, 0,
|
||||||
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
|
DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n",
|
fprintf(stderr, "import failed: Could not bind BO at 0x%llx\n",
|
||||||
(long long)bo->va->addr);
|
(long long)bo->va->addr);
|
||||||
|
|
@ -361,38 +390,24 @@ agx_bo_export(struct agx_device *dev, struct agx_bo *bo)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
agx_bo_bind_object(struct agx_device *dev, struct agx_bo *bo,
|
agx_bo_bind_object(struct agx_device *dev,
|
||||||
uint32_t *object_handle, size_t size_B, uint64_t offset_B,
|
struct drm_asahi_gem_bind_object *bind)
|
||||||
uint32_t flags)
|
|
||||||
{
|
{
|
||||||
struct drm_asahi_gem_bind_object gem_bind = {
|
int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, bind);
|
||||||
.op = ASAHI_BIND_OBJECT_OP_BIND,
|
|
||||||
.flags = flags,
|
|
||||||
.handle = bo->handle,
|
|
||||||
.vm_id = 0,
|
|
||||||
.offset = offset_B,
|
|
||||||
.range = size_B,
|
|
||||||
};
|
|
||||||
|
|
||||||
int ret = drmIoctl(dev->fd, DRM_IOCTL_ASAHI_GEM_BIND_OBJECT, &gem_bind);
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n",
|
"DRM_IOCTL_ASAHI_GEM_BIND_OBJECT failed: %m (handle=%d)\n",
|
||||||
bo->handle);
|
bind->handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
*object_handle = gem_bind.object_handle;
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle,
|
agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle)
|
||||||
uint32_t flags)
|
|
||||||
{
|
{
|
||||||
struct drm_asahi_gem_bind_object gem_bind = {
|
struct drm_asahi_gem_bind_object gem_bind = {
|
||||||
.op = ASAHI_BIND_OBJECT_OP_UNBIND,
|
.op = DRM_ASAHI_BIND_OBJECT_OP_UNBIND,
|
||||||
.flags = flags,
|
|
||||||
.object_handle = object_handle,
|
.object_handle = object_handle,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -406,23 +421,6 @@ agx_bo_unbind_object(struct agx_device *dev, uint32_t object_handle,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
agx_get_global_ids(struct agx_device *dev)
|
|
||||||
{
|
|
||||||
dev->next_global_id = 0;
|
|
||||||
dev->last_global_id = 0x1000000;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t
|
|
||||||
agx_get_global_id(struct agx_device *dev)
|
|
||||||
{
|
|
||||||
if (unlikely(dev->next_global_id >= dev->last_global_id)) {
|
|
||||||
agx_get_global_ids(dev);
|
|
||||||
}
|
|
||||||
|
|
||||||
return dev->next_global_id++;
|
|
||||||
}
|
|
||||||
|
|
||||||
static ssize_t
|
static ssize_t
|
||||||
agx_get_params(struct agx_device *dev, void *buf, size_t size)
|
agx_get_params(struct agx_device *dev, void *buf, size_t size)
|
||||||
{
|
{
|
||||||
|
|
@ -452,7 +450,7 @@ agx_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
|
||||||
|
|
||||||
const agx_device_ops_t agx_device_drm_ops = {
|
const agx_device_ops_t agx_device_drm_ops = {
|
||||||
.bo_alloc = agx_bo_alloc,
|
.bo_alloc = agx_bo_alloc,
|
||||||
.bo_bind = agx_bo_bind,
|
.bo_bind = agx_drm_bo_bind,
|
||||||
.bo_mmap = agx_bo_mmap,
|
.bo_mmap = agx_bo_mmap,
|
||||||
.get_params = agx_get_params,
|
.get_params = agx_get_params,
|
||||||
.submit = agx_submit,
|
.submit = agx_submit,
|
||||||
|
|
@ -475,16 +473,12 @@ gcd(uint64_t n, uint64_t m)
|
||||||
static void
|
static void
|
||||||
agx_init_timestamps(struct agx_device *dev)
|
agx_init_timestamps(struct agx_device *dev)
|
||||||
{
|
{
|
||||||
uint64_t ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC);
|
uint64_t user_ts_gcd =
|
||||||
|
gcd(dev->params.command_timestamp_frequency_hz, NSEC_PER_SEC);
|
||||||
dev->timestamp_to_ns.num = NSEC_PER_SEC / ts_gcd;
|
|
||||||
dev->timestamp_to_ns.den = dev->params.timer_frequency_hz / ts_gcd;
|
|
||||||
|
|
||||||
uint64_t user_ts_gcd = gcd(dev->params.timer_frequency_hz, NSEC_PER_SEC);
|
|
||||||
|
|
||||||
dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd;
|
dev->user_timestamp_to_ns.num = NSEC_PER_SEC / user_ts_gcd;
|
||||||
dev->user_timestamp_to_ns.den =
|
dev->user_timestamp_to_ns.den =
|
||||||
dev->params.user_timestamp_frequency_hz / user_ts_gcd;
|
dev->params.command_timestamp_frequency_hz / user_ts_gcd;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|
@ -533,47 +527,6 @@ agx_open_device(void *memctx, struct agx_device *dev)
|
||||||
}
|
}
|
||||||
assert(params_size >= sizeof(dev->params));
|
assert(params_size >= sizeof(dev->params));
|
||||||
|
|
||||||
/* Refuse to probe. */
|
|
||||||
if (dev->params.unstable_uabi_version != DRM_ASAHI_UNSTABLE_UABI_VERSION) {
|
|
||||||
fprintf(
|
|
||||||
stderr,
|
|
||||||
"You are attempting to use upstream Mesa with a downstream kernel!\n"
|
|
||||||
"This WILL NOT work.\n"
|
|
||||||
"The Asahi UABI is unstable and NOT SUPPORTED in upstream Mesa.\n"
|
|
||||||
"UABI related code in upstream Mesa is not for use!\n"
|
|
||||||
"\n"
|
|
||||||
"Do NOT attempt to patch out checks, you WILL break your system.\n"
|
|
||||||
"Do NOT report bugs.\n"
|
|
||||||
"Do NOT ask Mesa developers for support.\n"
|
|
||||||
"Do NOT write guides about how to patch out these checks.\n"
|
|
||||||
"Do NOT package patches to Mesa to bypass this.\n"
|
|
||||||
"\n"
|
|
||||||
"~~~\n"
|
|
||||||
"This is not a place of honor.\n"
|
|
||||||
"No highly esteemed deed is commemorated here.\n"
|
|
||||||
"Nothing valued is here.\n"
|
|
||||||
"\n"
|
|
||||||
"What is here was dangerous and repulsive to us.\n"
|
|
||||||
"This message is a warning about danger.\n"
|
|
||||||
"\n"
|
|
||||||
"The danger is still present, in your time, as it was in ours.\n"
|
|
||||||
"The danger is unleashed only if you substantially disturb this place physically.\n"
|
|
||||||
"This place is best shunned and left uninhabited.\n"
|
|
||||||
"~~~\n"
|
|
||||||
"\n"
|
|
||||||
"THIS IS NOT A BUG. THIS IS YOU DOING SOMETHING BROKEN!\n");
|
|
||||||
abort();
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t incompat =
|
|
||||||
dev->params.feat_incompat & (~AGX_SUPPORTED_INCOMPAT_FEATURES);
|
|
||||||
if (incompat) {
|
|
||||||
fprintf(stderr, "Missing GPU incompat features: 0x%" PRIx64 "\n",
|
|
||||||
incompat);
|
|
||||||
assert(0);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(dev->params.gpu_generation >= 13);
|
assert(dev->params.gpu_generation >= 13);
|
||||||
const char *variant = " Unknown";
|
const char *variant = " Unknown";
|
||||||
switch (dev->params.gpu_variant) {
|
switch (dev->params.gpu_variant) {
|
||||||
|
|
@ -611,14 +564,10 @@ agx_open_device(void *memctx, struct agx_device *dev)
|
||||||
assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS);
|
assert(reservation == LIBAGX_PRINTF_BUFFER_ADDRESS);
|
||||||
reservation += LIBAGX_PRINTF_BUFFER_SIZE;
|
reservation += LIBAGX_PRINTF_BUFFER_SIZE;
|
||||||
|
|
||||||
dev->guard_size = dev->params.vm_page_size;
|
dev->guard_size = AIL_PAGESIZE;
|
||||||
if (dev->params.vm_usc_start) {
|
// Put the USC heap at the bottom of the user address space, 4GiB aligned
|
||||||
dev->shader_base = dev->params.vm_usc_start;
|
dev->shader_base =
|
||||||
} else {
|
ALIGN_POT(MAX2(dev->params.vm_start, reservation), 0x100000000ull);
|
||||||
// Put the USC heap at the bottom of the user address space, 4GiB aligned
|
|
||||||
dev->shader_base = ALIGN_POT(MAX2(dev->params.vm_user_start, reservation),
|
|
||||||
0x100000000ull);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dev->shader_base < reservation) {
|
if (dev->shader_base < reservation) {
|
||||||
/* Our robustness implementation requires the bottom unmapped */
|
/* Our robustness implementation requires the bottom unmapped */
|
||||||
|
|
@ -631,8 +580,8 @@ agx_open_device(void *memctx, struct agx_device *dev)
|
||||||
// Put the user heap after the USC heap
|
// Put the user heap after the USC heap
|
||||||
uint64_t user_start = dev->shader_base + shader_size;
|
uint64_t user_start = dev->shader_base + shader_size;
|
||||||
|
|
||||||
assert(dev->shader_base >= dev->params.vm_user_start);
|
assert(dev->shader_base >= dev->params.vm_start);
|
||||||
assert(user_start < dev->params.vm_user_end);
|
assert(user_start < dev->params.vm_end);
|
||||||
|
|
||||||
dev->agxdecode = agxdecode_new_context(dev->shader_base);
|
dev->agxdecode = agxdecode_new_context(dev->shader_base);
|
||||||
|
|
||||||
|
|
@ -652,8 +601,8 @@ agx_open_device(void *memctx, struct agx_device *dev)
|
||||||
// reasonable use case.
|
// reasonable use case.
|
||||||
uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30);
|
uint64_t kernel_size = MAX2(dev->params.vm_kernel_min_size, 32ull << 30);
|
||||||
struct drm_asahi_vm_create vm_create = {
|
struct drm_asahi_vm_create vm_create = {
|
||||||
.kernel_start = dev->params.vm_user_end - kernel_size,
|
.kernel_start = dev->params.vm_end - kernel_size,
|
||||||
.kernel_end = dev->params.vm_user_end,
|
.kernel_end = dev->params.vm_end,
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64_t user_size = vm_create.kernel_start - user_start;
|
uint64_t user_size = vm_create.kernel_start - user_start;
|
||||||
|
|
@ -671,8 +620,6 @@ agx_open_device(void *memctx, struct agx_device *dev)
|
||||||
|
|
||||||
dev->vm_id = vm_create.vm_id;
|
dev->vm_id = vm_create.vm_id;
|
||||||
|
|
||||||
agx_get_global_ids(dev);
|
|
||||||
|
|
||||||
glsl_type_singleton_init_or_ref();
|
glsl_type_singleton_init_or_ref();
|
||||||
|
|
||||||
if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) {
|
if (agx_gather_device_key(dev).needs_g13x_coherency == U_TRISTATE_YES) {
|
||||||
|
|
@ -698,8 +645,8 @@ agx_open_device(void *memctx, struct agx_device *dev)
|
||||||
*/
|
*/
|
||||||
{
|
{
|
||||||
void *bo = agx_bo_create(dev, 16384, 0, 0, "Zero page");
|
void *bo = agx_bo_create(dev, 16384, 0, 0, "Zero page");
|
||||||
int ret = dev->ops.bo_bind(dev, bo, AGX_ZERO_PAGE_ADDRESS, 16384, 0,
|
int ret = agx_bo_bind(dev, bo, AGX_ZERO_PAGE_ADDRESS, 16384, 0,
|
||||||
ASAHI_BIND_READ, false);
|
DRM_ASAHI_BIND_READ);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "Failed to bind zero page");
|
fprintf(stderr, "Failed to bind zero page");
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -709,9 +656,9 @@ agx_open_device(void *memctx, struct agx_device *dev)
|
||||||
void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK,
|
void *bo = agx_bo_create(dev, LIBAGX_PRINTF_BUFFER_SIZE, 0, AGX_BO_WRITEBACK,
|
||||||
"Printf/abort");
|
"Printf/abort");
|
||||||
|
|
||||||
ret = dev->ops.bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS,
|
ret = agx_bo_bind(dev, bo, LIBAGX_PRINTF_BUFFER_ADDRESS,
|
||||||
LIBAGX_PRINTF_BUFFER_SIZE, 0,
|
LIBAGX_PRINTF_BUFFER_SIZE, 0,
|
||||||
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
|
DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "Failed to bind printf buffer");
|
fprintf(stderr, "Failed to bind printf buffer");
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -738,8 +685,8 @@ agx_close_device(struct agx_device *dev)
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t
|
uint32_t
|
||||||
agx_create_command_queue(struct agx_device *dev, uint32_t caps,
|
agx_create_command_queue(struct agx_device *dev,
|
||||||
uint32_t priority)
|
enum drm_asahi_priority priority)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (dev->debug & AGX_DBG_1QUEUE) {
|
if (dev->debug & AGX_DBG_1QUEUE) {
|
||||||
|
|
@ -753,9 +700,8 @@ agx_create_command_queue(struct agx_device *dev, uint32_t caps,
|
||||||
|
|
||||||
struct drm_asahi_queue_create queue_create = {
|
struct drm_asahi_queue_create queue_create = {
|
||||||
.vm_id = dev->vm_id,
|
.vm_id = dev->vm_id,
|
||||||
.queue_caps = caps,
|
|
||||||
.priority = priority,
|
.priority = priority,
|
||||||
.flags = 0,
|
.usc_exec_base = dev->shader_base,
|
||||||
};
|
};
|
||||||
|
|
||||||
int ret =
|
int ret =
|
||||||
|
|
@ -873,28 +819,14 @@ agx_debug_fault(struct agx_device *dev, uint64_t addr)
|
||||||
uint64_t
|
uint64_t
|
||||||
agx_get_gpu_timestamp(struct agx_device *dev)
|
agx_get_gpu_timestamp(struct agx_device *dev)
|
||||||
{
|
{
|
||||||
if (dev->params.feat_compat & DRM_ASAHI_FEAT_GETTIME) {
|
struct drm_asahi_get_time get_time = {.flags = 0};
|
||||||
struct drm_asahi_get_time get_time = {.flags = 0, .extensions = 0};
|
|
||||||
|
|
||||||
int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time);
|
int ret = asahi_simple_ioctl(dev, DRM_IOCTL_ASAHI_GET_TIME, &get_time);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n");
|
fprintf(stderr, "DRM_IOCTL_ASAHI_GET_TIME failed: %m\n");
|
||||||
} else {
|
|
||||||
return get_time.gpu_timestamp;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#if DETECT_ARCH_AARCH64
|
|
||||||
uint64_t ret;
|
return get_time.gpu_timestamp;
|
||||||
__asm__ volatile("mrs \t%0, cntvct_el0" : "=r"(ret));
|
|
||||||
return ret;
|
|
||||||
#elif DETECT_ARCH_X86 || DETECT_ARCH_X86_64
|
|
||||||
/* Maps to the above when run under FEX without thunking */
|
|
||||||
uint32_t high, low;
|
|
||||||
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
|
|
||||||
return (uint64_t)low | ((uint64_t)high << 32);
|
|
||||||
#else
|
|
||||||
#error "invalid architecture for asahi"
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */
|
/* (Re)define UUID_SIZE to avoid including vulkan.h (or p_defines.h) here. */
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <xf86drm.h>
|
#include <xf86drm.h>
|
||||||
|
#include "drm-uapi/asahi_drm.h"
|
||||||
#include "util/ralloc.h"
|
#include "util/ralloc.h"
|
||||||
#include "util/simple_mtx.h"
|
#include "util/simple_mtx.h"
|
||||||
#include "util/sparse_array.h"
|
#include "util/sparse_array.h"
|
||||||
|
|
@ -18,16 +19,11 @@
|
||||||
#include "decode.h"
|
#include "decode.h"
|
||||||
#include "layout.h"
|
#include "layout.h"
|
||||||
#include "libagx_dgc.h"
|
#include "libagx_dgc.h"
|
||||||
#include "unstable_asahi_drm.h"
|
|
||||||
|
|
||||||
#include "vdrm.h"
|
#include "vdrm.h"
|
||||||
|
|
||||||
#include "asahi_proto.h"
|
#include "asahi_proto.h"
|
||||||
|
|
||||||
// TODO: this is a lie right now
|
|
||||||
static const uint64_t AGX_SUPPORTED_INCOMPAT_FEATURES =
|
|
||||||
DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION;
|
|
||||||
|
|
||||||
enum agx_dbg {
|
enum agx_dbg {
|
||||||
AGX_DBG_TRACE = BITFIELD_BIT(0),
|
AGX_DBG_TRACE = BITFIELD_BIT(0),
|
||||||
AGX_DBG_BODUMP = BITFIELD_BIT(1),
|
AGX_DBG_BODUMP = BITFIELD_BIT(1),
|
||||||
|
|
@ -69,7 +65,6 @@ struct nir_shader;
|
||||||
#define BARRIER_COMPUTE (1 << DRM_ASAHI_SUBQUEUE_COMPUTE)
|
#define BARRIER_COMPUTE (1 << DRM_ASAHI_SUBQUEUE_COMPUTE)
|
||||||
|
|
||||||
struct agx_submit_virt {
|
struct agx_submit_virt {
|
||||||
uint32_t vbo_res_id;
|
|
||||||
uint32_t extres_count;
|
uint32_t extres_count;
|
||||||
struct asahi_ccmd_submit_res *extres;
|
struct asahi_ccmd_submit_res *extres;
|
||||||
};
|
};
|
||||||
|
|
@ -77,21 +72,23 @@ struct agx_submit_virt {
|
||||||
typedef struct {
|
typedef struct {
|
||||||
struct agx_bo *(*bo_alloc)(struct agx_device *dev, size_t size, size_t align,
|
struct agx_bo *(*bo_alloc)(struct agx_device *dev, size_t size, size_t align,
|
||||||
enum agx_bo_flags flags);
|
enum agx_bo_flags flags);
|
||||||
int (*bo_bind)(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
|
int (*bo_bind)(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
|
||||||
size_t size_B, uint64_t offset_B, uint32_t flags,
|
uint32_t count);
|
||||||
bool unbind);
|
|
||||||
void (*bo_mmap)(struct agx_device *dev, struct agx_bo *bo);
|
void (*bo_mmap)(struct agx_device *dev, struct agx_bo *bo);
|
||||||
ssize_t (*get_params)(struct agx_device *dev, void *buf, size_t size);
|
ssize_t (*get_params)(struct agx_device *dev, void *buf, size_t size);
|
||||||
int (*submit)(struct agx_device *dev, struct drm_asahi_submit *submit,
|
int (*submit)(struct agx_device *dev, struct drm_asahi_submit *submit,
|
||||||
struct agx_submit_virt *virt);
|
struct agx_submit_virt *virt);
|
||||||
int (*bo_bind_object)(struct agx_device *dev, struct agx_bo *bo,
|
int (*bo_bind_object)(struct agx_device *dev,
|
||||||
uint32_t *object_handle, size_t size_B,
|
struct drm_asahi_gem_bind_object *bind);
|
||||||
uint64_t offset_B, uint32_t flags);
|
int (*bo_unbind_object)(struct agx_device *dev, uint32_t object_handle);
|
||||||
int (*bo_unbind_object)(struct agx_device *dev, uint32_t object_handle,
|
|
||||||
uint32_t flags);
|
|
||||||
|
|
||||||
} agx_device_ops_t;
|
} agx_device_ops_t;
|
||||||
|
|
||||||
|
int agx_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
|
||||||
|
size_t size_B, uint64_t offset_B, uint32_t flags);
|
||||||
|
|
||||||
|
int agx_bind_timestamps(struct agx_device *dev, struct agx_bo *bo,
|
||||||
|
uint32_t *handle);
|
||||||
|
|
||||||
struct agx_device {
|
struct agx_device {
|
||||||
uint32_t debug;
|
uint32_t debug;
|
||||||
|
|
||||||
|
|
@ -100,7 +97,6 @@ struct agx_device {
|
||||||
|
|
||||||
char name[64];
|
char name[64];
|
||||||
struct drm_asahi_params_global params;
|
struct drm_asahi_params_global params;
|
||||||
uint64_t next_global_id, last_global_id;
|
|
||||||
bool is_virtio;
|
bool is_virtio;
|
||||||
agx_device_ops_t ops;
|
agx_device_ops_t ops;
|
||||||
|
|
||||||
|
|
@ -160,11 +156,6 @@ struct agx_device {
|
||||||
/* Simplified device selection */
|
/* Simplified device selection */
|
||||||
enum agx_chip chip;
|
enum agx_chip chip;
|
||||||
|
|
||||||
struct {
|
|
||||||
uint64_t num;
|
|
||||||
uint64_t den;
|
|
||||||
} timestamp_to_ns;
|
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
uint64_t num;
|
uint64_t num;
|
||||||
uint64_t den;
|
uint64_t den;
|
||||||
|
|
@ -185,7 +176,7 @@ agx_bo_map(struct agx_bo *bo)
|
||||||
static inline bool
|
static inline bool
|
||||||
agx_has_soft_fault(struct agx_device *dev)
|
agx_has_soft_fault(struct agx_device *dev)
|
||||||
{
|
{
|
||||||
return (dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS) &&
|
return (dev->params.features & DRM_ASAHI_FEATURE_SOFT_FAULTS) &&
|
||||||
!(dev->debug & AGX_DBG_NOSOFT);
|
!(dev->debug & AGX_DBG_NOSOFT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -208,10 +199,8 @@ agx_lookup_bo(struct agx_device *dev, uint32_t handle)
|
||||||
return util_sparse_array_get(&dev->bo_map, handle);
|
return util_sparse_array_get(&dev->bo_map, handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t agx_get_global_id(struct agx_device *dev);
|
uint32_t agx_create_command_queue(struct agx_device *dev,
|
||||||
|
enum drm_asahi_priority priority);
|
||||||
uint32_t agx_create_command_queue(struct agx_device *dev, uint32_t caps,
|
|
||||||
uint32_t priority);
|
|
||||||
int agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id);
|
int agx_destroy_command_queue(struct agx_device *dev, uint32_t queue_id);
|
||||||
|
|
||||||
int agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd);
|
int agx_import_sync_file(struct agx_device *dev, struct agx_bo *bo, int fd);
|
||||||
|
|
@ -221,12 +210,6 @@ void agx_debug_fault(struct agx_device *dev, uint64_t addr);
|
||||||
|
|
||||||
uint64_t agx_get_gpu_timestamp(struct agx_device *dev);
|
uint64_t agx_get_gpu_timestamp(struct agx_device *dev);
|
||||||
|
|
||||||
static inline uint64_t
|
|
||||||
agx_gpu_time_to_ns(struct agx_device *dev, uint64_t gpu_time)
|
|
||||||
{
|
|
||||||
return (gpu_time * dev->timestamp_to_ns.num) / dev->timestamp_to_ns.den;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline uint64_t
|
static inline uint64_t
|
||||||
agx_gpu_timestamp_to_ns(struct agx_device *dev, uint64_t gpu_timestamp)
|
agx_gpu_timestamp_to_ns(struct agx_device *dev, uint64_t gpu_timestamp)
|
||||||
{
|
{
|
||||||
|
|
@ -245,8 +228,14 @@ struct agx_va *agx_va_alloc(struct agx_device *dev, uint64_t size_B,
|
||||||
uint64_t fixed_va);
|
uint64_t fixed_va);
|
||||||
void agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind);
|
void agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind);
|
||||||
|
|
||||||
static inline bool
|
static inline struct drm_asahi_cmd_header
|
||||||
agx_supports_timestamps(const struct agx_device *dev)
|
agx_cmd_header(bool compute, uint16_t barrier_vdm, uint16_t barrier_cdm)
|
||||||
{
|
{
|
||||||
return (dev->params.feat_compat & DRM_ASAHI_FEAT_USER_TIMESTAMPS);
|
return (struct drm_asahi_cmd_header){
|
||||||
|
.cmd_type = compute ? DRM_ASAHI_CMD_COMPUTE : DRM_ASAHI_CMD_RENDER,
|
||||||
|
.size = compute ? sizeof(struct drm_asahi_cmd_compute)
|
||||||
|
: sizeof(struct drm_asahi_cmd_render),
|
||||||
|
.vdm_barrier = barrier_vdm,
|
||||||
|
.cdm_barrier = barrier_cdm,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
|
||||||
#include "drm-uapi/virtgpu_drm.h"
|
#include "drm-uapi/virtgpu_drm.h"
|
||||||
#include "unstable_asahi_drm.h"
|
|
||||||
|
|
||||||
#include "vdrm.h"
|
#include "vdrm.h"
|
||||||
|
|
||||||
|
|
@ -67,14 +66,14 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (flags & AGX_BO_WRITEBACK)
|
if (flags & AGX_BO_WRITEBACK)
|
||||||
req.flags |= ASAHI_GEM_WRITEBACK;
|
req.flags |= DRM_ASAHI_GEM_WRITEBACK;
|
||||||
|
|
||||||
uint32_t blob_flags =
|
uint32_t blob_flags =
|
||||||
VIRTGPU_BLOB_FLAG_USE_MAPPABLE | VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
|
VIRTGPU_BLOB_FLAG_USE_MAPPABLE | VIRTGPU_BLOB_FLAG_USE_SHAREABLE;
|
||||||
|
|
||||||
req.bind_flags = ASAHI_BIND_READ;
|
req.bind_flags = DRM_ASAHI_BIND_READ;
|
||||||
if (!(flags & AGX_BO_READONLY)) {
|
if (!(flags & AGX_BO_READONLY)) {
|
||||||
req.bind_flags |= ASAHI_BIND_WRITE;
|
req.bind_flags |= DRM_ASAHI_BIND_WRITE;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t blob_id = p_atomic_inc_return(&dev->next_blob_id);
|
uint32_t blob_id = p_atomic_inc_return(&dev->next_blob_id);
|
||||||
|
|
@ -86,7 +85,6 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Note: optional, can zero out for not mapping for sparse */
|
|
||||||
req.addr = va->addr;
|
req.addr = va->addr;
|
||||||
req.blob_id = blob_id;
|
req.blob_id = blob_id;
|
||||||
req.vm_id = dev->vm_id;
|
req.vm_id = dev->vm_id;
|
||||||
|
|
@ -111,55 +109,46 @@ agx_virtio_bo_alloc(struct agx_device *dev, size_t size, size_t align,
|
||||||
bo->flags = flags;
|
bo->flags = flags;
|
||||||
bo->handle = handle;
|
bo->handle = handle;
|
||||||
bo->prime_fd = -1;
|
bo->prime_fd = -1;
|
||||||
bo->blob_id = blob_id;
|
|
||||||
bo->va = va;
|
bo->va = va;
|
||||||
bo->vbo_res_id = vdrm_handle_to_res_id(dev->vdrm, handle);
|
bo->uapi_handle = vdrm_handle_to_res_id(dev->vdrm, handle);
|
||||||
return bo;
|
return bo;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
agx_virtio_bo_bind(struct agx_device *dev, struct agx_bo *bo, uint64_t addr,
|
agx_virtio_bo_bind(struct agx_device *dev, struct drm_asahi_gem_bind_op *ops,
|
||||||
size_t size_B, uint64_t offset_B, uint32_t flags,
|
uint32_t count)
|
||||||
bool unbind)
|
|
||||||
{
|
{
|
||||||
struct asahi_ccmd_gem_bind_req req = {
|
size_t payload_size = sizeof(*ops) * count;
|
||||||
.hdr.cmd = ASAHI_CCMD_GEM_BIND,
|
size_t req_len = sizeof(struct asahi_ccmd_vm_bind_req) + payload_size;
|
||||||
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_req),
|
struct asahi_ccmd_vm_bind_req *req = calloc(1, req_len);
|
||||||
.bind = {
|
|
||||||
.op = unbind ? ASAHI_BIND_OP_UNBIND : ASAHI_BIND_OP_BIND,
|
|
||||||
.flags = flags,
|
|
||||||
.vm_id = dev->vm_id,
|
|
||||||
.handle = bo ? bo->vbo_res_id : 0,
|
|
||||||
.offset = offset_B,
|
|
||||||
.range = size_B,
|
|
||||||
.addr = addr,
|
|
||||||
}};
|
|
||||||
|
|
||||||
int ret = vdrm_send_req(dev->vdrm, &req.hdr, false);
|
*req = (struct asahi_ccmd_vm_bind_req){
|
||||||
|
.hdr.cmd = ASAHI_CCMD_VM_BIND,
|
||||||
|
.hdr.len = sizeof(struct asahi_ccmd_vm_bind_req),
|
||||||
|
.vm_id = dev->vm_id,
|
||||||
|
.stride = sizeof(*ops),
|
||||||
|
.count = count,
|
||||||
|
};
|
||||||
|
|
||||||
|
memcpy(req->payload, ops, payload_size);
|
||||||
|
|
||||||
|
int ret = vdrm_send_req(dev->vdrm, &req->hdr, false);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d (handle=%d)\n", ret,
|
fprintf(stderr, "ASAHI_CCMD_GEM_BIND failed: %d\n", ret);
|
||||||
bo ? bo->handle : 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
agx_virtio_bo_bind_object(struct agx_device *dev, struct agx_bo *bo,
|
agx_virtio_bo_bind_object(struct agx_device *dev,
|
||||||
uint32_t *object_handle, size_t size_B,
|
struct drm_asahi_gem_bind_object *bind)
|
||||||
uint64_t offset_B, uint32_t flags)
|
|
||||||
{
|
{
|
||||||
struct asahi_ccmd_gem_bind_object_req req = {
|
struct asahi_ccmd_gem_bind_object_req req = {
|
||||||
.hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT,
|
.hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT,
|
||||||
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req),
|
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req),
|
||||||
.bind = {
|
.bind = *bind,
|
||||||
.op = ASAHI_BIND_OBJECT_OP_BIND,
|
};
|
||||||
.flags = flags,
|
|
||||||
.vm_id = 0,
|
|
||||||
.handle = bo->vbo_res_id,
|
|
||||||
.offset = offset_B,
|
|
||||||
.range = size_B,
|
|
||||||
}};
|
|
||||||
|
|
||||||
struct asahi_ccmd_gem_bind_object_rsp *rsp;
|
struct asahi_ccmd_gem_bind_object_rsp *rsp;
|
||||||
|
|
||||||
|
|
@ -170,25 +159,23 @@ agx_virtio_bo_bind_object(struct agx_device *dev, struct agx_bo *bo,
|
||||||
if (ret || rsp->ret) {
|
if (ret || rsp->ret) {
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"ASAHI_CCMD_GEM_BIND_OBJECT bind failed: %d:%d (handle=%d)\n",
|
"ASAHI_CCMD_GEM_BIND_OBJECT bind failed: %d:%d (handle=%d)\n",
|
||||||
ret, rsp->ret, bo->handle);
|
ret, rsp->ret, bind->handle);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!rsp->ret)
|
if (!rsp->ret)
|
||||||
*object_handle = rsp->object_handle;
|
bind->object_handle = rsp->object_handle;
|
||||||
|
|
||||||
return rsp->ret;
|
return rsp->ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
agx_virtio_bo_unbind_object(struct agx_device *dev, uint32_t object_handle,
|
agx_virtio_bo_unbind_object(struct agx_device *dev, uint32_t object_handle)
|
||||||
uint32_t flags)
|
|
||||||
{
|
{
|
||||||
struct asahi_ccmd_gem_bind_object_req req = {
|
struct asahi_ccmd_gem_bind_object_req req = {
|
||||||
.hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT,
|
.hdr.cmd = ASAHI_CCMD_GEM_BIND_OBJECT,
|
||||||
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req),
|
.hdr.len = sizeof(struct asahi_ccmd_gem_bind_object_req),
|
||||||
.bind = {
|
.bind = {
|
||||||
.op = ASAHI_BIND_OBJECT_OP_UNBIND,
|
.op = DRM_ASAHI_BIND_OBJECT_OP_UNBIND,
|
||||||
.flags = flags,
|
|
||||||
.object_handle = object_handle,
|
.object_handle = object_handle,
|
||||||
}};
|
}};
|
||||||
|
|
||||||
|
|
@ -228,89 +215,24 @@ agx_virtio_get_params(struct agx_device *dev, void *buf, size_t size)
|
||||||
sizeof(struct asahi_ccmd_get_params_rsp) + size);
|
sizeof(struct asahi_ccmd_get_params_rsp) + size);
|
||||||
|
|
||||||
int ret = vdrm_send_req(vdrm, &req.hdr, true);
|
int ret = vdrm_send_req(vdrm, &req.hdr, true);
|
||||||
if (ret)
|
if (!ret)
|
||||||
goto out;
|
return ret;
|
||||||
|
|
||||||
if (rsp->virt_uabi_version != ASAHI_PROTO_UNSTABLE_UABI_VERSION) {
|
|
||||||
fprintf(stderr, "Virt UABI mismatch: Host %d, Mesa %d\n",
|
|
||||||
rsp->virt_uabi_version, ASAHI_PROTO_UNSTABLE_UABI_VERSION);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = rsp->ret;
|
ret = rsp->ret;
|
||||||
if (!ret) {
|
if (ret)
|
||||||
memcpy(buf, &rsp->payload, size);
|
return ret;
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
memcpy(buf, &rsp->payload, size);
|
||||||
return ret;
|
return size;
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
agx_virtio_serialize_attachments(char **ptr, uint64_t attachments,
|
|
||||||
uint32_t count)
|
|
||||||
{
|
|
||||||
if (!count)
|
|
||||||
return;
|
|
||||||
|
|
||||||
size_t attachments_size = sizeof(struct drm_asahi_attachment) * count;
|
|
||||||
memcpy(*ptr, (char *)(uintptr_t)attachments, attachments_size);
|
|
||||||
*ptr += attachments_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
|
agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
|
||||||
struct agx_submit_virt *virt)
|
struct agx_submit_virt *virt)
|
||||||
{
|
{
|
||||||
struct drm_asahi_command *commands =
|
struct drm_asahi_sync *syncs =
|
||||||
(struct drm_asahi_command *)(uintptr_t)submit->commands;
|
(struct drm_asahi_sync *)(uintptr_t)submit->syncs;
|
||||||
struct drm_asahi_sync *in_syncs =
|
size_t req_len = sizeof(struct asahi_ccmd_submit_req) + submit->cmdbuf_size;
|
||||||
(struct drm_asahi_sync *)(uintptr_t)submit->in_syncs;
|
|
||||||
struct drm_asahi_sync *out_syncs =
|
|
||||||
(struct drm_asahi_sync *)(uintptr_t)submit->out_syncs;
|
|
||||||
size_t req_len = sizeof(struct asahi_ccmd_submit_req);
|
|
||||||
|
|
||||||
for (int i = 0; i < submit->command_count; i++) {
|
|
||||||
switch (commands[i].cmd_type) {
|
|
||||||
case DRM_ASAHI_CMD_COMPUTE: {
|
|
||||||
struct drm_asahi_cmd_compute *compute =
|
|
||||||
(struct drm_asahi_cmd_compute *)(uintptr_t)commands[i].cmd_buffer;
|
|
||||||
req_len += sizeof(struct drm_asahi_command) +
|
|
||||||
sizeof(struct drm_asahi_cmd_compute);
|
|
||||||
req_len +=
|
|
||||||
compute->attachment_count * sizeof(struct drm_asahi_attachment);
|
|
||||||
|
|
||||||
if (compute->extensions) {
|
|
||||||
assert(*(uint32_t *)(uintptr_t)compute->extensions ==
|
|
||||||
ASAHI_COMPUTE_EXT_TIMESTAMPS);
|
|
||||||
req_len += sizeof(struct drm_asahi_cmd_compute_user_timestamps);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
case DRM_ASAHI_CMD_RENDER: {
|
|
||||||
struct drm_asahi_cmd_render *render =
|
|
||||||
(struct drm_asahi_cmd_render *)(uintptr_t)commands[i].cmd_buffer;
|
|
||||||
req_len += sizeof(struct drm_asahi_command) +
|
|
||||||
sizeof(struct drm_asahi_cmd_render);
|
|
||||||
req_len += render->fragment_attachment_count *
|
|
||||||
sizeof(struct drm_asahi_attachment);
|
|
||||||
req_len += render->vertex_attachment_count *
|
|
||||||
sizeof(struct drm_asahi_attachment);
|
|
||||||
|
|
||||||
if (render->extensions) {
|
|
||||||
assert(*(uint32_t *)(uintptr_t)render->extensions ==
|
|
||||||
ASAHI_RENDER_EXT_TIMESTAMPS);
|
|
||||||
req_len += sizeof(struct drm_asahi_cmd_render_user_timestamps);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
default:
|
|
||||||
return EINVAL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t extres_size =
|
size_t extres_size =
|
||||||
sizeof(struct asahi_ccmd_submit_res) * virt->extres_count;
|
sizeof(struct asahi_ccmd_submit_res) * virt->extres_count;
|
||||||
|
|
@ -320,55 +242,13 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
|
||||||
(struct asahi_ccmd_submit_req *)calloc(1, req_len);
|
(struct asahi_ccmd_submit_req *)calloc(1, req_len);
|
||||||
|
|
||||||
req->queue_id = submit->queue_id;
|
req->queue_id = submit->queue_id;
|
||||||
req->result_res_id = virt->vbo_res_id;
|
|
||||||
req->command_count = submit->command_count;
|
|
||||||
req->extres_count = virt->extres_count;
|
req->extres_count = virt->extres_count;
|
||||||
|
req->cmdbuf_size = submit->cmdbuf_size;
|
||||||
|
|
||||||
char *ptr = (char *)&req->payload;
|
char *ptr = (char *)&req->payload;
|
||||||
|
|
||||||
for (int i = 0; i < submit->command_count; i++) {
|
memcpy(ptr, (void *)(uintptr_t)submit->cmdbuf, req->cmdbuf_size);
|
||||||
memcpy(ptr, &commands[i], sizeof(struct drm_asahi_command));
|
ptr += req->cmdbuf_size;
|
||||||
ptr += sizeof(struct drm_asahi_command);
|
|
||||||
|
|
||||||
memcpy(ptr, (char *)(uintptr_t)commands[i].cmd_buffer,
|
|
||||||
commands[i].cmd_buffer_size);
|
|
||||||
ptr += commands[i].cmd_buffer_size;
|
|
||||||
|
|
||||||
switch (commands[i].cmd_type) {
|
|
||||||
case DRM_ASAHI_CMD_RENDER: {
|
|
||||||
struct drm_asahi_cmd_render *render =
|
|
||||||
(struct drm_asahi_cmd_render *)(uintptr_t)commands[i].cmd_buffer;
|
|
||||||
agx_virtio_serialize_attachments(&ptr, render->vertex_attachments,
|
|
||||||
render->vertex_attachment_count);
|
|
||||||
agx_virtio_serialize_attachments(&ptr, render->fragment_attachments,
|
|
||||||
render->fragment_attachment_count);
|
|
||||||
if (render->extensions) {
|
|
||||||
struct drm_asahi_cmd_render_user_timestamps *ext =
|
|
||||||
(struct drm_asahi_cmd_render_user_timestamps *)(uintptr_t)
|
|
||||||
render->extensions;
|
|
||||||
assert(!ext->next);
|
|
||||||
memcpy(ptr, (void *)ext, sizeof(*ext));
|
|
||||||
ptr += sizeof(*ext);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case DRM_ASAHI_CMD_COMPUTE: {
|
|
||||||
struct drm_asahi_cmd_compute *compute =
|
|
||||||
(struct drm_asahi_cmd_compute *)(uintptr_t)commands[i].cmd_buffer;
|
|
||||||
agx_virtio_serialize_attachments(&ptr, compute->attachments,
|
|
||||||
compute->attachment_count);
|
|
||||||
if (compute->extensions) {
|
|
||||||
struct drm_asahi_cmd_compute_user_timestamps *ext =
|
|
||||||
(struct drm_asahi_cmd_compute_user_timestamps *)(uintptr_t)
|
|
||||||
compute->extensions;
|
|
||||||
assert(!ext->next);
|
|
||||||
memcpy(ptr, (void *)ext, sizeof(*ext));
|
|
||||||
ptr += sizeof(*ext);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(ptr, virt->extres, extres_size);
|
memcpy(ptr, virt->extres, extres_size);
|
||||||
ptr += extres_size;
|
ptr += extres_size;
|
||||||
|
|
@ -376,18 +256,12 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
|
||||||
req->hdr.cmd = ASAHI_CCMD_SUBMIT;
|
req->hdr.cmd = ASAHI_CCMD_SUBMIT;
|
||||||
req->hdr.len = req_len;
|
req->hdr.len = req_len;
|
||||||
|
|
||||||
struct drm_virtgpu_execbuffer_syncobj *vdrm_in_syncs = calloc(
|
uint32_t total_syncs = submit->in_sync_count + submit->out_sync_count;
|
||||||
submit->in_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj));
|
struct drm_virtgpu_execbuffer_syncobj *vdrm_syncs =
|
||||||
for (int i = 0; i < submit->in_sync_count; i++) {
|
calloc(total_syncs, sizeof(struct drm_virtgpu_execbuffer_syncobj));
|
||||||
vdrm_in_syncs[i].handle = in_syncs[i].handle;
|
for (int i = 0; i < total_syncs; i++) {
|
||||||
vdrm_in_syncs[i].point = in_syncs[i].timeline_value;
|
vdrm_syncs[i].handle = syncs[i].handle;
|
||||||
}
|
vdrm_syncs[i].point = syncs[i].timeline_value;
|
||||||
|
|
||||||
struct drm_virtgpu_execbuffer_syncobj *vdrm_out_syncs = calloc(
|
|
||||||
submit->out_sync_count, sizeof(struct drm_virtgpu_execbuffer_syncobj));
|
|
||||||
for (int i = 0; i < submit->out_sync_count; i++) {
|
|
||||||
vdrm_out_syncs[i].handle = out_syncs[i].handle;
|
|
||||||
vdrm_out_syncs[i].point = out_syncs[i].timeline_value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct vdrm_execbuf_params p = {
|
struct vdrm_execbuf_params p = {
|
||||||
|
|
@ -395,15 +269,14 @@ agx_virtio_submit(struct agx_device *dev, struct drm_asahi_submit *submit,
|
||||||
.ring_idx = 1,
|
.ring_idx = 1,
|
||||||
.req = &req->hdr,
|
.req = &req->hdr,
|
||||||
.num_in_syncobjs = submit->in_sync_count,
|
.num_in_syncobjs = submit->in_sync_count,
|
||||||
.in_syncobjs = vdrm_in_syncs,
|
.in_syncobjs = vdrm_syncs,
|
||||||
.num_out_syncobjs = submit->out_sync_count,
|
.num_out_syncobjs = submit->out_sync_count,
|
||||||
.out_syncobjs = vdrm_out_syncs,
|
.out_syncobjs = vdrm_syncs + submit->in_sync_count,
|
||||||
};
|
};
|
||||||
|
|
||||||
int ret = vdrm_execbuf(dev->vdrm, &p);
|
int ret = vdrm_execbuf(dev->vdrm, &p);
|
||||||
|
|
||||||
free(vdrm_out_syncs);
|
free(vdrm_syncs);
|
||||||
free(vdrm_in_syncs);
|
|
||||||
free(req);
|
free(req);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ agx_va_free(struct agx_device *dev, struct agx_va *va, bool unbind)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (unbind) {
|
if (unbind) {
|
||||||
dev->ops.bo_bind(dev, NULL, va->addr, va->size_B, 0, 0, true);
|
agx_bo_bind(dev, NULL, va->addr, va->size_B, 0, DRM_ASAHI_BIND_UNBIND);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct util_vma_heap *heap = agx_vma_heap(dev, va->flags);
|
struct util_vma_heap *heap = agx_vma_heap(dev, va->flags);
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
#ifndef ASAHI_PROTO_H_
|
#ifndef ASAHI_PROTO_H_
|
||||||
#define ASAHI_PROTO_H_
|
#define ASAHI_PROTO_H_
|
||||||
|
|
||||||
#define ASAHI_PROTO_UNSTABLE_UABI_VERSION 1
|
#include "drm-uapi/asahi_drm.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines the layout of shmem buffer used for host->guest communication.
|
* Defines the layout of shmem buffer used for host->guest communication.
|
||||||
|
|
@ -37,7 +37,7 @@ enum asahi_ccmd {
|
||||||
ASAHI_CCMD_IOCTL_SIMPLE,
|
ASAHI_CCMD_IOCTL_SIMPLE,
|
||||||
ASAHI_CCMD_GET_PARAMS,
|
ASAHI_CCMD_GET_PARAMS,
|
||||||
ASAHI_CCMD_GEM_NEW,
|
ASAHI_CCMD_GEM_NEW,
|
||||||
ASAHI_CCMD_GEM_BIND,
|
ASAHI_CCMD_VM_BIND,
|
||||||
ASAHI_CCMD_SUBMIT,
|
ASAHI_CCMD_SUBMIT,
|
||||||
ASAHI_CCMD_GEM_BIND_OBJECT,
|
ASAHI_CCMD_GEM_BIND_OBJECT,
|
||||||
};
|
};
|
||||||
|
|
@ -93,7 +93,6 @@ DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_get_params_req)
|
||||||
struct asahi_ccmd_get_params_rsp {
|
struct asahi_ccmd_get_params_rsp {
|
||||||
struct vdrm_ccmd_rsp hdr;
|
struct vdrm_ccmd_rsp hdr;
|
||||||
int32_t ret;
|
int32_t ret;
|
||||||
uint32_t virt_uabi_version;
|
|
||||||
uint8_t payload[];
|
uint8_t payload[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -108,11 +107,14 @@ struct asahi_ccmd_gem_new_req {
|
||||||
};
|
};
|
||||||
DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_new_req)
|
DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_new_req)
|
||||||
|
|
||||||
struct asahi_ccmd_gem_bind_req {
|
struct asahi_ccmd_vm_bind_req {
|
||||||
struct vdrm_ccmd_req hdr;
|
struct vdrm_ccmd_req hdr;
|
||||||
struct drm_asahi_gem_bind bind;
|
uint32_t vm_id;
|
||||||
|
uint32_t stride;
|
||||||
|
uint32_t count;
|
||||||
|
uint8_t payload[];
|
||||||
};
|
};
|
||||||
DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_gem_bind_req)
|
DEFINE_CAST(vdrm_ccmd_req, asahi_ccmd_vm_bind_req)
|
||||||
|
|
||||||
struct asahi_ccmd_gem_bind_object_req {
|
struct asahi_ccmd_gem_bind_object_req {
|
||||||
struct vdrm_ccmd_req hdr;
|
struct vdrm_ccmd_req hdr;
|
||||||
|
|
@ -138,8 +140,7 @@ struct asahi_ccmd_submit_req {
|
||||||
struct vdrm_ccmd_req hdr;
|
struct vdrm_ccmd_req hdr;
|
||||||
uint32_t flags;
|
uint32_t flags;
|
||||||
uint32_t queue_id;
|
uint32_t queue_id;
|
||||||
uint32_t result_res_id;
|
uint32_t cmdbuf_size;
|
||||||
uint32_t command_count;
|
|
||||||
uint32_t extres_count;
|
uint32_t extres_count;
|
||||||
|
|
||||||
uint8_t payload[];
|
uint8_t payload[];
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@
|
||||||
|
|
||||||
#include "util/u_hexdump.h"
|
#include "util/u_hexdump.h"
|
||||||
#include "decode.h"
|
#include "decode.h"
|
||||||
#include "unstable_asahi_drm.h"
|
|
||||||
|
|
||||||
struct libagxdecode_config lib_config;
|
struct libagxdecode_config lib_config;
|
||||||
|
|
||||||
|
|
@ -801,106 +800,119 @@ agxdecode_helper(struct agxdecode_ctx *ctx, const char *prefix, uint64_t helper)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static void
|
||||||
agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx,
|
agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx,
|
||||||
struct drm_asahi_params_global *params,
|
struct drm_asahi_params_global *params,
|
||||||
struct drm_asahi_cmd_render *c, bool verbose)
|
struct drm_asahi_cmd_render *c, bool verbose)
|
||||||
{
|
{
|
||||||
agxdecode_dump_file_open();
|
DUMP_FIELD(c, "%x", flags);
|
||||||
|
DUMP_FIELD(c, "0x%llx", vdm_ctrl_stream_base);
|
||||||
DUMP_FIELD(c, "%llx", flags);
|
agxdecode_stateful(ctx, c->vdm_ctrl_stream_base, "Encoder", agxdecode_vdm,
|
||||||
DUMP_FIELD(c, "0x%llx", encoder_ptr);
|
verbose, params, NULL);
|
||||||
agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_vdm, verbose,
|
|
||||||
params, NULL);
|
|
||||||
DUMP_FIELD(c, "0x%x", encoder_id);
|
|
||||||
DUMP_FIELD(c, "0x%x", cmd_ta_id);
|
|
||||||
DUMP_FIELD(c, "0x%x", cmd_3d_id);
|
|
||||||
DUMP_FIELD(c, "0x%x", ppp_ctrl);
|
DUMP_FIELD(c, "0x%x", ppp_ctrl);
|
||||||
DUMP_FIELD(c, "0x%llx", ppp_multisamplectl);
|
DUMP_FIELD(c, "0x%llx", ppp_multisamplectl);
|
||||||
DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control");
|
DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control");
|
||||||
DUMP_FIELD(c, "0x%llx", depth_buffer_load);
|
DUMP_FIELD(c, "0x%llx", depth.base);
|
||||||
DUMP_FIELD(c, "0x%llx", depth_buffer_store);
|
DUMP_FIELD(c, "0x%llx", depth.comp_base);
|
||||||
DUMP_FIELD(c, "0x%llx", depth_buffer_partial);
|
DUMP_FIELD(c, "%u", depth.stride);
|
||||||
DUMP_FIELD(c, "0x%llx", stencil_buffer_load);
|
DUMP_FIELD(c, "%u", depth.comp_stride);
|
||||||
DUMP_FIELD(c, "0x%llx", stencil_buffer_store);
|
DUMP_FIELD(c, "0x%llx", stencil.base);
|
||||||
DUMP_FIELD(c, "0x%llx", stencil_buffer_partial);
|
DUMP_FIELD(c, "0x%llx", stencil.comp_base);
|
||||||
DUMP_FIELD(c, "0x%llx", scissor_array);
|
DUMP_FIELD(c, "%u", stencil.stride);
|
||||||
DUMP_FIELD(c, "0x%llx", depth_bias_array);
|
DUMP_FIELD(c, "%u", stencil.comp_stride);
|
||||||
DUMP_FIELD(c, "%d", fb_width);
|
DUMP_FIELD(c, "0x%llx", isp_scissor_base);
|
||||||
DUMP_FIELD(c, "%d", fb_height);
|
DUMP_FIELD(c, "0x%llx", isp_dbias_base);
|
||||||
|
DUMP_FIELD(c, "%d", width_px);
|
||||||
|
DUMP_FIELD(c, "%d", height_px);
|
||||||
DUMP_FIELD(c, "%d", layers);
|
DUMP_FIELD(c, "%d", layers);
|
||||||
DUMP_FIELD(c, "%d", samples);
|
DUMP_FIELD(c, "%d", samples);
|
||||||
DUMP_FIELD(c, "%d", sample_size);
|
DUMP_FIELD(c, "%d", sample_size_B);
|
||||||
DUMP_FIELD(c, "%d", tib_blocks);
|
DUMP_FIELD(c, "%d", utile_width_px);
|
||||||
DUMP_FIELD(c, "%d", utile_width);
|
DUMP_FIELD(c, "%d", utile_height_px);
|
||||||
DUMP_FIELD(c, "%d", utile_height);
|
DUMP_FIELD(c, "0x%x", bg.usc);
|
||||||
DUMP_FIELD(c, "0x%x", load_pipeline);
|
DUMP_FIELD(c, "0x%x", bg.rsrc_spec);
|
||||||
DUMP_FIELD(c, "0x%x", load_pipeline_bind);
|
agxdecode_stateful(ctx, decode_usc(ctx, c->bg.usc & ~0x7), "Load pipeline",
|
||||||
agxdecode_stateful(ctx, decode_usc(ctx, c->load_pipeline & ~0x7),
|
agxdecode_usc, verbose, params, NULL);
|
||||||
"Load pipeline", agxdecode_usc, verbose, params, NULL);
|
DUMP_FIELD(c, "0x%x", eot.usc);
|
||||||
DUMP_FIELD(c, "0x%x", store_pipeline);
|
DUMP_FIELD(c, "0x%x", eot.rsrc_spec);
|
||||||
DUMP_FIELD(c, "0x%x", store_pipeline_bind);
|
agxdecode_stateful(ctx, decode_usc(ctx, c->eot.usc & ~0x7), "Store pipeline",
|
||||||
agxdecode_stateful(ctx, decode_usc(ctx, c->store_pipeline & ~0x7),
|
agxdecode_usc, verbose, params, NULL);
|
||||||
"Store pipeline", agxdecode_usc, verbose, params, NULL);
|
DUMP_FIELD(c, "0x%x", partial_bg.usc);
|
||||||
DUMP_FIELD(c, "0x%x", partial_reload_pipeline);
|
DUMP_FIELD(c, "0x%x", partial_bg.rsrc_spec);
|
||||||
DUMP_FIELD(c, "0x%x", partial_reload_pipeline_bind);
|
agxdecode_stateful(ctx, decode_usc(ctx, c->partial_bg.usc & ~0x7),
|
||||||
agxdecode_stateful(ctx, decode_usc(ctx, c->partial_reload_pipeline & ~0x7),
|
|
||||||
"Partial reload pipeline", agxdecode_usc, verbose, params,
|
"Partial reload pipeline", agxdecode_usc, verbose, params,
|
||||||
NULL);
|
NULL);
|
||||||
DUMP_FIELD(c, "0x%x", partial_store_pipeline);
|
DUMP_FIELD(c, "0x%x", partial_eot.usc);
|
||||||
DUMP_FIELD(c, "0x%x", partial_store_pipeline_bind);
|
DUMP_FIELD(c, "0x%x", partial_eot.rsrc_spec);
|
||||||
agxdecode_stateful(ctx, decode_usc(ctx, c->partial_store_pipeline & ~0x7),
|
agxdecode_stateful(ctx, decode_usc(ctx, c->partial_eot.usc & ~0x7),
|
||||||
"Partial store pipeline", agxdecode_usc, verbose, params,
|
"Partial store pipeline", agxdecode_usc, verbose, params,
|
||||||
NULL);
|
NULL);
|
||||||
|
|
||||||
DUMP_FIELD(c, "0x%x", depth_dimensions);
|
DUMP_FIELD(c, "0x%x", isp_zls_pixels);
|
||||||
DUMP_FIELD(c, "0x%x", isp_bgobjdepth);
|
DUMP_FIELD(c, "0x%x", isp_bgobjdepth);
|
||||||
DUMP_FIELD(c, "0x%x", isp_bgobjvals);
|
DUMP_FIELD(c, "0x%x", isp_bgobjvals);
|
||||||
|
|
||||||
agxdecode_sampler_heap(ctx, c->vertex_sampler_array,
|
agxdecode_sampler_heap(ctx, c->sampler_heap, c->sampler_count);
|
||||||
c->vertex_sampler_count);
|
|
||||||
|
|
||||||
/* Linux driver doesn't use this, at least for now */
|
agxdecode_helper(ctx, "Vertex", c->vertex_helper.binary);
|
||||||
assert(c->fragment_sampler_array == c->vertex_sampler_array);
|
agxdecode_helper(ctx, "Fragment", c->fragment_helper.binary);
|
||||||
assert(c->fragment_sampler_count == c->vertex_sampler_count);
|
|
||||||
|
|
||||||
DUMP_FIELD(c, "%d", vertex_attachment_count);
|
|
||||||
struct drm_asahi_attachment *vertex_attachments =
|
|
||||||
(void *)(uintptr_t)c->vertex_attachments;
|
|
||||||
for (unsigned i = 0; i < c->vertex_attachment_count; i++) {
|
|
||||||
DUMP_FIELD((&vertex_attachments[i]), "0x%x", order);
|
|
||||||
DUMP_FIELD((&vertex_attachments[i]), "0x%llx", size);
|
|
||||||
DUMP_FIELD((&vertex_attachments[i]), "0x%llx", pointer);
|
|
||||||
}
|
|
||||||
DUMP_FIELD(c, "%d", fragment_attachment_count);
|
|
||||||
struct drm_asahi_attachment *fragment_attachments =
|
|
||||||
(void *)(uintptr_t)c->fragment_attachments;
|
|
||||||
for (unsigned i = 0; i < c->fragment_attachment_count; i++) {
|
|
||||||
DUMP_FIELD((&fragment_attachments[i]), "0x%x", order);
|
|
||||||
DUMP_FIELD((&fragment_attachments[i]), "0x%llx", size);
|
|
||||||
DUMP_FIELD((&fragment_attachments[i]), "0x%llx", pointer);
|
|
||||||
}
|
|
||||||
|
|
||||||
agxdecode_helper(ctx, "Vertex", c->vertex_helper_program);
|
|
||||||
agxdecode_helper(ctx, "Fragment", c->fragment_helper_program);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static void
|
||||||
agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx,
|
agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx,
|
||||||
struct drm_asahi_params_global *params,
|
struct drm_asahi_params_global *params,
|
||||||
struct drm_asahi_cmd_compute *c, bool verbose)
|
struct drm_asahi_cmd_compute *c, bool verbose)
|
||||||
|
{
|
||||||
|
DUMP_FIELD(c, "%x", flags);
|
||||||
|
DUMP_FIELD(c, "0x%llx", cdm_ctrl_stream_base);
|
||||||
|
agxdecode_stateful(ctx, c->cdm_ctrl_stream_base, "Encoder", agxdecode_cdm,
|
||||||
|
verbose, params, NULL);
|
||||||
|
|
||||||
|
agxdecode_sampler_heap(ctx, c->sampler_heap, c->sampler_count);
|
||||||
|
agxdecode_helper(ctx, "Compute", c->helper.binary);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
agxdecode_drm_attachments(const char *name, struct drm_asahi_attachment *atts,
|
||||||
|
size_t size)
|
||||||
|
{
|
||||||
|
fprintf(agxdecode_dump_stream, "%s attachments:\n", name);
|
||||||
|
unsigned count = size / sizeof(struct drm_asahi_attachment);
|
||||||
|
for (unsigned i = 0; i < count; i++) {
|
||||||
|
DUMP_FIELD((&atts[i]), "0x%llx", size);
|
||||||
|
DUMP_FIELD((&atts[i]), "0x%llx", pointer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
agxdecode_drm_cmdbuf(struct agxdecode_ctx *ctx,
|
||||||
|
struct drm_asahi_params_global *params,
|
||||||
|
struct util_dynarray *cmdbuf, bool verbose)
|
||||||
{
|
{
|
||||||
agxdecode_dump_file_open();
|
agxdecode_dump_file_open();
|
||||||
|
|
||||||
DUMP_FIELD(c, "%llx", flags);
|
for (unsigned offs = 0; offs < cmdbuf->size;) {
|
||||||
DUMP_FIELD(c, "0x%llx", encoder_ptr);
|
struct drm_asahi_cmd_header *header =
|
||||||
agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_cdm, verbose,
|
(void *)((uint8_t *)cmdbuf->data) + offs;
|
||||||
params, NULL);
|
offs += sizeof(*header);
|
||||||
DUMP_FIELD(c, "0x%x", encoder_id);
|
void *data = (void *)((uint8_t *)cmdbuf->data) + offs;
|
||||||
DUMP_FIELD(c, "0x%x", cmd_id);
|
|
||||||
|
|
||||||
agxdecode_sampler_heap(ctx, c->sampler_array, c->sampler_count);
|
if (header->cmd_type == DRM_ASAHI_CMD_RENDER) {
|
||||||
agxdecode_helper(ctx, "Compute", c->helper_program);
|
agxdecode_drm_cmd_render(ctx, params, data, verbose);
|
||||||
|
} else if (header->cmd_type == DRM_ASAHI_CMD_COMPUTE) {
|
||||||
|
agxdecode_drm_cmd_compute(ctx, params, data, verbose);
|
||||||
|
} else if (header->cmd_type == DRM_ASAHI_SET_VERTEX_ATTACHMENTS) {
|
||||||
|
agxdecode_drm_attachments("Vertex", data, header->size);
|
||||||
|
} else if (header->cmd_type == DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS) {
|
||||||
|
agxdecode_drm_attachments("Fragment", data, header->size);
|
||||||
|
} else if (header->cmd_type == DRM_ASAHI_SET_COMPUTE_ATTACHMENTS) {
|
||||||
|
agxdecode_drm_attachments("Compute", data, header->size);
|
||||||
|
} else {
|
||||||
|
unreachable("Invalid command type");
|
||||||
|
}
|
||||||
|
|
||||||
|
offs += header->size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include "agx_bo.h"
|
#include "agx_bo.h"
|
||||||
|
|
||||||
#include "unstable_asahi_drm.h"
|
#include "drm-uapi/asahi_drm.h"
|
||||||
|
|
||||||
struct agxdecode_ctx;
|
struct agxdecode_ctx;
|
||||||
|
|
||||||
|
|
@ -28,15 +28,11 @@ void agxdecode_cmdstream(struct agxdecode_ctx *ctx, unsigned cmdbuf_index,
|
||||||
void agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap,
|
void agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap,
|
||||||
unsigned nr_entries);
|
unsigned nr_entries);
|
||||||
|
|
||||||
void agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx,
|
struct util_dynarray;
|
||||||
struct drm_asahi_params_global *params,
|
|
||||||
struct drm_asahi_cmd_render *cmdbuf,
|
|
||||||
bool verbose);
|
|
||||||
|
|
||||||
void agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx,
|
void agxdecode_drm_cmdbuf(struct agxdecode_ctx *ctx,
|
||||||
struct drm_asahi_params_global *params,
|
struct drm_asahi_params_global *params,
|
||||||
struct drm_asahi_cmd_compute *cmdbuf,
|
struct util_dynarray *cmdbuf, bool verbose);
|
||||||
bool verbose);
|
|
||||||
|
|
||||||
void agxdecode_dump_file_open(void);
|
void agxdecode_dump_file_open(void);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -81,17 +81,9 @@ VkResult
|
||||||
hk_bind_scratch(struct hk_device *dev, struct agx_va *va, unsigned offset_B,
|
hk_bind_scratch(struct hk_device *dev, struct agx_va *va, unsigned offset_B,
|
||||||
size_t size_B)
|
size_t size_B)
|
||||||
{
|
{
|
||||||
VkResult result = VK_SUCCESS;
|
return agx_bo_bind(
|
||||||
|
&dev->dev, dev->sparse.write, va->addr + offset_B, size_B, 0,
|
||||||
for (unsigned i = 0; i < size_B; i += AIL_PAGESIZE) {
|
DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE | DRM_ASAHI_BIND_SINGLE_PAGE);
|
||||||
result = dev->dev.ops.bo_bind(&dev->dev, dev->sparse.write,
|
|
||||||
va->addr + offset_B + i, AIL_PAGESIZE, 0,
|
|
||||||
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
|
|
||||||
if (result != VK_SUCCESS)
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VKAPI_ATTR VkResult VKAPI_CALL
|
VKAPI_ATTR VkResult VKAPI_CALL
|
||||||
|
|
@ -253,11 +245,9 @@ hk_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
|
||||||
if (buffer->va) {
|
if (buffer->va) {
|
||||||
VK_FROM_HANDLE(hk_device, dev, device);
|
VK_FROM_HANDLE(hk_device, dev, device);
|
||||||
size_t size = MIN2(mem->bo->size, buffer->va->size_B);
|
size_t size = MIN2(mem->bo->size, buffer->va->size_B);
|
||||||
int ret =
|
int ret = agx_bo_bind(&dev->dev, mem->bo, buffer->vk.device_address,
|
||||||
dev->dev.ops.bo_bind(&dev->dev, mem->bo, buffer->vk.device_address,
|
size, pBindInfos[i].memoryOffset,
|
||||||
size, pBindInfos[i].memoryOffset,
|
DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE);
|
||||||
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
|
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
return VK_ERROR_UNKNOWN;
|
return VK_ERROR_UNKNOWN;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -151,7 +151,6 @@ struct hk_render_registers {
|
||||||
uint32_t isp_bgobjdepth;
|
uint32_t isp_bgobjdepth;
|
||||||
uint32_t isp_bgobjvals;
|
uint32_t isp_bgobjvals;
|
||||||
struct agx_zls_control_packed zls_control, zls_control_partial;
|
struct agx_zls_control_packed zls_control, zls_control_partial;
|
||||||
uint32_t iogpu_unk_214;
|
|
||||||
uint32_t depth_dimensions;
|
uint32_t depth_dimensions;
|
||||||
bool process_empty_tiles;
|
bool process_empty_tiles;
|
||||||
enum u_tristate dbias_is_int;
|
enum u_tristate dbias_is_int;
|
||||||
|
|
|
||||||
|
|
@ -680,8 +680,6 @@ hk_CmdBeginRendering(VkCommandBuffer commandBuffer,
|
||||||
const VkRenderingAttachmentInfo *attach_s =
|
const VkRenderingAttachmentInfo *attach_s =
|
||||||
pRenderingInfo->pStencilAttachment;
|
pRenderingInfo->pStencilAttachment;
|
||||||
|
|
||||||
render->cr.iogpu_unk_214 = 0xc000;
|
|
||||||
|
|
||||||
struct ail_layout *z_layout = NULL, *s_layout = NULL;
|
struct ail_layout *z_layout = NULL, *s_layout = NULL;
|
||||||
|
|
||||||
if (attach_z != NULL && attach_z != VK_NULL_HANDLE && attach_z->imageView) {
|
if (attach_z != NULL && attach_z != VK_NULL_HANDLE && attach_z->imageView) {
|
||||||
|
|
|
||||||
|
|
@ -285,7 +285,7 @@ static VkResult
|
||||||
hk_get_timestamp(struct vk_device *device, uint64_t *timestamp)
|
hk_get_timestamp(struct vk_device *device, uint64_t *timestamp)
|
||||||
{
|
{
|
||||||
struct hk_device *dev = container_of(device, struct hk_device, vk);
|
struct hk_device *dev = container_of(device, struct hk_device, vk);
|
||||||
*timestamp = agx_gpu_time_to_ns(&dev->dev, agx_get_gpu_timestamp(&dev->dev));
|
*timestamp = agx_get_gpu_timestamp(&dev->dev);
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ hk_memory_type_flags(const VkMemoryType *type,
|
||||||
static void
|
static void
|
||||||
hk_add_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo)
|
hk_add_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo)
|
||||||
{
|
{
|
||||||
uint32_t id = bo->vbo_res_id;
|
uint32_t id = bo->uapi_handle;
|
||||||
|
|
||||||
unsigned count = util_dynarray_num_elements(&dev->external_bos.list,
|
unsigned count = util_dynarray_num_elements(&dev->external_bos.list,
|
||||||
struct asahi_ccmd_submit_res);
|
struct asahi_ccmd_submit_res);
|
||||||
|
|
@ -89,7 +89,7 @@ hk_add_ext_bo(struct hk_device *dev, struct agx_bo *bo)
|
||||||
static void
|
static void
|
||||||
hk_remove_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo)
|
hk_remove_ext_bo_locked(struct hk_device *dev, struct agx_bo *bo)
|
||||||
{
|
{
|
||||||
uint32_t id = bo->vbo_res_id;
|
uint32_t id = bo->uapi_handle;
|
||||||
unsigned count = util_dynarray_num_elements(&dev->external_bos.list,
|
unsigned count = util_dynarray_num_elements(&dev->external_bos.list,
|
||||||
struct asahi_ccmd_submit_res);
|
struct asahi_ccmd_submit_res);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,6 @@
|
||||||
#include "util/simple_mtx.h"
|
#include "util/simple_mtx.h"
|
||||||
#include "vulkan/vulkan_core.h"
|
#include "vulkan/vulkan_core.h"
|
||||||
#include "vulkan/wsi/wsi_common.h"
|
#include "vulkan/wsi/wsi_common.h"
|
||||||
#include "unstable_asahi_drm.h"
|
|
||||||
#include "vk_drm_syncobj.h"
|
#include "vk_drm_syncobj.h"
|
||||||
#include "vk_shader_module.h"
|
#include "vk_shader_module.h"
|
||||||
|
|
||||||
|
|
@ -274,13 +273,10 @@ hk_get_device_features(
|
||||||
.sparseResidencyAliased = true,
|
.sparseResidencyAliased = true,
|
||||||
.sparseResidencyImage2D = true,
|
.sparseResidencyImage2D = true,
|
||||||
|
|
||||||
/* We depend on soft fault to implement sparse residency on buffers with
|
/* TODO: We need to implement sparse buffer without soft fault to avoid
|
||||||
* the appropriate semantics. Lifting this requirement would be possible
|
* tying our hands later.
|
||||||
* but challenging, given the requirements imposed by
|
|
||||||
* sparseResidencyNonResidentStrict.
|
|
||||||
*/
|
*/
|
||||||
.sparseResidencyBuffer =
|
.sparseResidencyBuffer = false,
|
||||||
(dev->params.feat_compat & DRM_ASAHI_FEAT_SOFT_FAULTS),
|
|
||||||
|
|
||||||
/* This needs investigation. */
|
/* This needs investigation. */
|
||||||
.sparseResidencyImage3D = false,
|
.sparseResidencyImage3D = false,
|
||||||
|
|
@ -743,7 +739,7 @@ hk_get_device_properties(const struct agx_device *dev,
|
||||||
.sampledImageStencilSampleCounts = sample_counts,
|
.sampledImageStencilSampleCounts = sample_counts,
|
||||||
.storageImageSampleCounts = sample_counts,
|
.storageImageSampleCounts = sample_counts,
|
||||||
.maxSampleMaskWords = 1,
|
.maxSampleMaskWords = 1,
|
||||||
.timestampComputeAndGraphics = agx_supports_timestamps(dev),
|
.timestampComputeAndGraphics = true,
|
||||||
/* FIXME: Is timestamp period actually 1? */
|
/* FIXME: Is timestamp period actually 1? */
|
||||||
.timestampPeriod = 1.0f,
|
.timestampPeriod = 1.0f,
|
||||||
.maxClipDistances = 8,
|
.maxClipDistances = 8,
|
||||||
|
|
@ -1141,9 +1137,6 @@ hk_create_drm_physical_device(struct vk_instance *_instance,
|
||||||
struct hk_instance *instance = (struct hk_instance *)_instance;
|
struct hk_instance *instance = (struct hk_instance *)_instance;
|
||||||
VkResult result;
|
VkResult result;
|
||||||
|
|
||||||
/* Blanket refusal to probe due to unstable UAPI. */
|
|
||||||
return VK_ERROR_INCOMPATIBLE_DRIVER;
|
|
||||||
|
|
||||||
if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
|
if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
|
||||||
drm_device->bustype != DRM_BUS_PLATFORM)
|
drm_device->bustype != DRM_BUS_PLATFORM)
|
||||||
return VK_ERROR_INCOMPATIBLE_DRIVER;
|
return VK_ERROR_INCOMPATIBLE_DRIVER;
|
||||||
|
|
@ -1433,8 +1426,7 @@ hk_GetPhysicalDeviceQueueFamilyProperties2(
|
||||||
{
|
{
|
||||||
p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
|
p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
|
||||||
p->queueFamilyProperties.queueCount = queue_family->queue_count;
|
p->queueFamilyProperties.queueCount = queue_family->queue_count;
|
||||||
p->queueFamilyProperties.timestampValidBits =
|
p->queueFamilyProperties.timestampValidBits = 64;
|
||||||
agx_supports_timestamps(&pdev->dev) ? 64 : 0;
|
|
||||||
p->queueFamilyProperties.minImageTransferGranularity =
|
p->queueFamilyProperties.minImageTransferGranularity =
|
||||||
(VkExtent3D){1, 1, 1};
|
(VkExtent3D){1, 1, 1};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
#include "compiler/nir/nir.h"
|
#include "compiler/nir/nir.h"
|
||||||
#include "compiler/nir/nir_builder.h"
|
#include "compiler/nir/nir_builder.h"
|
||||||
|
|
||||||
|
#include "drm-uapi/asahi_drm.h"
|
||||||
#include "util/os_time.h"
|
#include "util/os_time.h"
|
||||||
#include "util/u_dynarray.h"
|
#include "util/u_dynarray.h"
|
||||||
#include "vulkan/vulkan_core.h"
|
#include "vulkan/vulkan_core.h"
|
||||||
|
|
@ -88,10 +89,6 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
|
||||||
bool timestamp = pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP;
|
bool timestamp = pCreateInfo->queryType == VK_QUERY_TYPE_TIMESTAMP;
|
||||||
unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0;
|
unsigned occlusion_queries = occlusion ? pCreateInfo->queryCount : 0;
|
||||||
|
|
||||||
/* Workaround for DXVK on old kernels */
|
|
||||||
if (!agx_supports_timestamps(&dev->dev))
|
|
||||||
timestamp = false;
|
|
||||||
|
|
||||||
pool =
|
pool =
|
||||||
vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool));
|
vk_query_pool_create(&dev->vk, pCreateInfo, pAllocator, sizeof(*pool));
|
||||||
if (!pool)
|
if (!pool)
|
||||||
|
|
@ -131,10 +128,7 @@ hk_CreateQueryPool(VkDevice device, const VkQueryPoolCreateInfo *pCreateInfo,
|
||||||
* them.
|
* them.
|
||||||
*/
|
*/
|
||||||
if (timestamp) {
|
if (timestamp) {
|
||||||
int ret = dev->dev.ops.bo_bind_object(
|
int ret = agx_bind_timestamps(&dev->dev, pool->bo, &pool->handle);
|
||||||
&dev->dev, pool->bo, &pool->handle, pool->bo->size, 0,
|
|
||||||
ASAHI_BIND_OBJECT_USAGE_TIMESTAMPS);
|
|
||||||
|
|
||||||
if (ret) {
|
if (ret) {
|
||||||
hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool),
|
hk_DestroyQueryPool(device, hk_query_pool_to_handle(pool),
|
||||||
pAllocator);
|
pAllocator);
|
||||||
|
|
@ -186,7 +180,7 @@ hk_DestroyQueryPool(VkDevice device, VkQueryPool queryPool,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pool->handle)
|
if (pool->handle)
|
||||||
dev->dev.ops.bo_unbind_object(&dev->dev, pool->handle, 0);
|
dev->dev.ops.bo_unbind_object(&dev->dev, pool->handle);
|
||||||
|
|
||||||
agx_bo_unreference(&dev->dev, pool->bo);
|
agx_bo_unreference(&dev->dev, pool->bo);
|
||||||
vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
|
vk_query_pool_destroy(&dev->vk, pAllocator, &pool->vk);
|
||||||
|
|
@ -390,12 +384,7 @@ hk_CmdWriteTimestamp2(VkCommandBuffer commandBuffer,
|
||||||
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
VK_FROM_HANDLE(hk_query_pool, pool, queryPool);
|
||||||
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
struct hk_device *dev = hk_cmd_buffer_device(cmd);
|
||||||
|
|
||||||
/* Workaround for DXVK on old kernels */
|
|
||||||
if (!agx_supports_timestamps(&dev->dev))
|
|
||||||
return;
|
|
||||||
|
|
||||||
uint64_t report_addr = hk_query_report_addr(dev, pool, query);
|
uint64_t report_addr = hk_query_report_addr(dev, pool, query);
|
||||||
|
|
||||||
bool after_gfx = cmd->current_cs.gfx != NULL;
|
bool after_gfx = cmd->current_cs.gfx != NULL;
|
||||||
|
|
||||||
/* When writing timestamps for compute, we split the control stream at each
|
/* When writing timestamps for compute, we split the control stream at each
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,9 @@
|
||||||
#include "hk_physical_device.h"
|
#include "hk_physical_device.h"
|
||||||
|
|
||||||
#include <xf86drm.h>
|
#include <xf86drm.h>
|
||||||
#include "asahi/lib/unstable_asahi_drm.h"
|
|
||||||
#include "util/list.h"
|
#include "util/list.h"
|
||||||
#include "util/macros.h"
|
#include "util/macros.h"
|
||||||
|
#include "util/u_dynarray.h"
|
||||||
#include "vulkan/vulkan_core.h"
|
#include "vulkan/vulkan_core.h"
|
||||||
|
|
||||||
#include "hk_private.h"
|
#include "hk_private.h"
|
||||||
|
|
@ -78,67 +78,39 @@ queue_submit_empty(struct hk_device *dev, struct hk_queue *queue,
|
||||||
|
|
||||||
static void
|
static void
|
||||||
asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
|
asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||||
struct drm_asahi_cmd_compute *cmd,
|
struct drm_asahi_cmd_compute *cmd)
|
||||||
struct drm_asahi_cmd_compute_user_timestamps *timestamps)
|
|
||||||
{
|
{
|
||||||
size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start);
|
size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start);
|
||||||
|
|
||||||
*cmd = (struct drm_asahi_cmd_compute){
|
*cmd = (struct drm_asahi_cmd_compute){
|
||||||
.encoder_ptr = cs->addr,
|
.cdm_ctrl_stream_base = cs->addr,
|
||||||
.encoder_end = cs->addr + len,
|
.cdm_ctrl_stream_end = cs->addr + len,
|
||||||
|
|
||||||
.sampler_array = dev->samplers.table.bo->va->addr,
|
.sampler_heap = dev->samplers.table.bo->va->addr,
|
||||||
.sampler_count = dev->samplers.table.alloc,
|
.sampler_count = dev->samplers.table.alloc,
|
||||||
.sampler_max = dev->samplers.table.alloc + 1,
|
|
||||||
|
|
||||||
.usc_base = dev->dev.shader_base,
|
.ts.end.handle = cs->timestamp.end.handle,
|
||||||
|
.ts.end.offset = cs->timestamp.end.offset_B,
|
||||||
.encoder_id = agx_get_global_id(&dev->dev),
|
|
||||||
.cmd_id = agx_get_global_id(&dev->dev),
|
|
||||||
.unk_mask = 0xffffffff,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if (cs->timestamp.end.handle) {
|
|
||||||
assert(agx_supports_timestamps(&dev->dev));
|
|
||||||
|
|
||||||
*timestamps = (struct drm_asahi_cmd_compute_user_timestamps){
|
|
||||||
.type = ASAHI_COMPUTE_EXT_TIMESTAMPS,
|
|
||||||
.end_handle = cs->timestamp.end.handle,
|
|
||||||
.end_offset = cs->timestamp.end.offset_B,
|
|
||||||
};
|
|
||||||
|
|
||||||
cmd->extensions = (uint64_t)(uintptr_t)timestamps;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cs->scratch.cs.main || cs->scratch.cs.preamble) {
|
if (cs->scratch.cs.main || cs->scratch.cs.preamble) {
|
||||||
cmd->helper_arg = dev->scratch.cs.buf->va->addr;
|
cmd->helper.data = dev->scratch.cs.buf->va->addr;
|
||||||
cmd->helper_cfg = cs->scratch.cs.preamble ? (1 << 16) : 0;
|
cmd->helper.cfg = cs->scratch.cs.preamble ? (1 << 16) : 0;
|
||||||
cmd->helper_program = agx_helper_program(&dev->bg_eot);
|
cmd->helper.binary = agx_helper_program(&dev->bg_eot);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
|
asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||||
struct drm_asahi_cmd_render *c,
|
struct drm_asahi_cmd_render *c)
|
||||||
struct drm_asahi_cmd_render_user_timestamps *timestamps)
|
|
||||||
{
|
{
|
||||||
unsigned cmd_ta_id = agx_get_global_id(&dev->dev);
|
|
||||||
unsigned cmd_3d_id = agx_get_global_id(&dev->dev);
|
|
||||||
unsigned encoder_id = agx_get_global_id(&dev->dev);
|
|
||||||
|
|
||||||
memset(c, 0, sizeof(*c));
|
memset(c, 0, sizeof(*c));
|
||||||
|
|
||||||
c->encoder_ptr = cs->addr;
|
c->vdm_ctrl_stream_base = cs->addr;
|
||||||
c->encoder_id = encoder_id;
|
|
||||||
c->cmd_3d_id = cmd_3d_id;
|
|
||||||
c->cmd_ta_id = cmd_ta_id;
|
|
||||||
c->ppp_ctrl = 0x202;
|
c->ppp_ctrl = 0x202;
|
||||||
|
|
||||||
c->fragment_usc_base = dev->dev.shader_base;
|
c->width_px = cs->cr.width;
|
||||||
c->vertex_usc_base = c->fragment_usc_base;
|
c->height_px = cs->cr.height;
|
||||||
|
|
||||||
c->fb_width = cs->cr.width;
|
|
||||||
c->fb_height = cs->cr.height;
|
|
||||||
|
|
||||||
c->isp_bgobjdepth = cs->cr.isp_bgobjdepth;
|
c->isp_bgobjdepth = cs->cr.isp_bgobjdepth;
|
||||||
c->isp_bgobjvals = cs->cr.isp_bgobjvals;
|
c->isp_bgobjvals = cs->cr.isp_bgobjvals;
|
||||||
|
|
@ -146,65 +118,30 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||||
static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control));
|
static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control));
|
||||||
memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control));
|
memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control));
|
||||||
|
|
||||||
c->depth_dimensions =
|
agx_pack(&c->isp_zls_pixels, CR_ISP_ZLS_PIXELS, cfg) {
|
||||||
(cs->cr.zls_width - 1) | ((cs->cr.zls_height - 1) << 15);
|
cfg.x = cs->cr.zls_width;
|
||||||
|
cfg.y = cs->cr.zls_height;
|
||||||
|
}
|
||||||
|
|
||||||
c->depth_buffer_load = cs->cr.depth.buffer;
|
c->depth.base = cs->cr.depth.buffer;
|
||||||
c->depth_buffer_store = cs->cr.depth.buffer;
|
c->depth.stride = cs->cr.depth.stride;
|
||||||
c->depth_buffer_partial = cs->cr.depth.buffer;
|
c->depth.comp_base = cs->cr.depth.meta;
|
||||||
|
c->depth.comp_stride = cs->cr.depth.meta_stride;
|
||||||
c->depth_buffer_load_stride = cs->cr.depth.stride;
|
c->stencil.base = cs->cr.stencil.buffer;
|
||||||
c->depth_buffer_store_stride = cs->cr.depth.stride;
|
c->stencil.stride = cs->cr.stencil.stride;
|
||||||
c->depth_buffer_partial_stride = cs->cr.depth.stride;
|
c->stencil.comp_base = cs->cr.stencil.meta;
|
||||||
|
c->stencil.comp_stride = cs->cr.stencil.meta_stride;
|
||||||
c->depth_meta_buffer_load = cs->cr.depth.meta;
|
|
||||||
c->depth_meta_buffer_store = cs->cr.depth.meta;
|
|
||||||
c->depth_meta_buffer_partial = cs->cr.depth.meta;
|
|
||||||
|
|
||||||
c->depth_meta_buffer_load_stride = cs->cr.depth.stride;
|
|
||||||
c->depth_meta_buffer_store_stride = cs->cr.depth.meta_stride;
|
|
||||||
c->depth_meta_buffer_partial_stride = cs->cr.depth.meta_stride;
|
|
||||||
|
|
||||||
c->stencil_buffer_load = cs->cr.stencil.buffer;
|
|
||||||
c->stencil_buffer_store = cs->cr.stencil.buffer;
|
|
||||||
c->stencil_buffer_partial = cs->cr.stencil.buffer;
|
|
||||||
|
|
||||||
c->stencil_buffer_load_stride = cs->cr.stencil.stride;
|
|
||||||
c->stencil_buffer_store_stride = cs->cr.stencil.stride;
|
|
||||||
c->stencil_buffer_partial_stride = cs->cr.stencil.stride;
|
|
||||||
|
|
||||||
c->stencil_meta_buffer_load = cs->cr.stencil.meta;
|
|
||||||
c->stencil_meta_buffer_store = cs->cr.stencil.meta;
|
|
||||||
c->stencil_meta_buffer_partial = cs->cr.stencil.meta;
|
|
||||||
|
|
||||||
c->stencil_meta_buffer_load_stride = cs->cr.stencil.stride;
|
|
||||||
c->stencil_meta_buffer_store_stride = cs->cr.stencil.meta_stride;
|
|
||||||
c->stencil_meta_buffer_partial_stride = cs->cr.stencil.meta_stride;
|
|
||||||
|
|
||||||
c->iogpu_unk_214 = cs->cr.iogpu_unk_214;
|
|
||||||
|
|
||||||
if (cs->cr.dbias_is_int == U_TRISTATE_YES) {
|
if (cs->cr.dbias_is_int == U_TRISTATE_YES) {
|
||||||
c->iogpu_unk_214 |= 0x40000;
|
c->flags |= DRM_ASAHI_RENDER_DBIAS_IS_INT;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dev->dev.debug & AGX_DBG_NOCLUSTER) {
|
if (dev->dev.debug & AGX_DBG_NOCLUSTER) {
|
||||||
c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING;
|
c->flags |= DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING;
|
||||||
} else {
|
|
||||||
/* XXX: We don't know what this does exactly, and the name is
|
|
||||||
* surely wrong. But it fixes dEQP-VK.memory.pipeline_barrier.* tests on
|
|
||||||
* G14C when clustering is enabled...
|
|
||||||
*/
|
|
||||||
c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
c->utile_width_px = cs->tib.tile_size.width;
|
||||||
/* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */
|
c->utile_height_px = cs->tib.tile_size.height;
|
||||||
if (tib->nr_samples > 1 && framebuffer->zsbuf)
|
|
||||||
c->flags |= ASAHI_RENDER_MSAA_ZS;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
c->utile_width = cs->tib.tile_size.width;
|
|
||||||
c->utile_height = cs->tib.tile_size.height;
|
|
||||||
|
|
||||||
/* Can be 0 for attachmentless rendering with no draws */
|
/* Can be 0 for attachmentless rendering with no draws */
|
||||||
c->samples = MAX2(cs->tib.nr_samples, 1);
|
c->samples = MAX2(cs->tib.nr_samples, 1);
|
||||||
|
|
@ -217,75 +154,62 @@ asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
|
||||||
*
|
*
|
||||||
* XXX: Hack for vkd3d-proton.
|
* XXX: Hack for vkd3d-proton.
|
||||||
*/
|
*/
|
||||||
if (c->layers == 2048 && c->fb_width == 16384 && c->fb_height == 16384) {
|
if (c->layers == 2048 && c->width_px == 16384 && c->height_px == 16384) {
|
||||||
mesa_log(MESA_LOG_WARN, MESA_LOG_TAG, "Clamping massive framebuffer");
|
mesa_log(MESA_LOG_WARN, MESA_LOG_TAG, "Clamping massive framebuffer");
|
||||||
c->layers = 32;
|
c->layers = 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
c->ppp_multisamplectl = cs->ppp_multisamplectl;
|
c->ppp_multisamplectl = cs->ppp_multisamplectl;
|
||||||
c->sample_size = cs->tib.sample_size_B;
|
c->sample_size_B = cs->tib.sample_size_B;
|
||||||
c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(&cs->tib), 2048) / 2048;
|
|
||||||
|
|
||||||
float tan_60 = 1.732051f;
|
float tan_60 = 1.732051f;
|
||||||
c->merge_upper_x = fui(tan_60 / cs->cr.width);
|
c->isp_merge_upper_x = fui(tan_60 / cs->cr.width);
|
||||||
c->merge_upper_y = fui(tan_60 / cs->cr.height);
|
c->isp_merge_upper_y = fui(tan_60 / cs->cr.height);
|
||||||
|
|
||||||
c->load_pipeline = cs->cr.bg.main.usc | 4;
|
c->bg.usc = cs->cr.bg.main.usc | 4;
|
||||||
c->store_pipeline = cs->cr.eot.main.usc | 4;
|
c->eot.usc = cs->cr.eot.main.usc | 4;
|
||||||
c->partial_reload_pipeline = cs->cr.bg.partial.usc | 4;
|
c->partial_bg.usc = cs->cr.bg.partial.usc | 4;
|
||||||
c->partial_store_pipeline = cs->cr.eot.partial.usc | 4;
|
c->partial_eot.usc = cs->cr.eot.partial.usc | 4;
|
||||||
|
|
||||||
memcpy(&c->load_pipeline_bind, &cs->cr.bg.main.counts,
|
memcpy(&c->bg.rsrc_spec, &cs->cr.bg.main.counts,
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
memcpy(&c->store_pipeline_bind, &cs->cr.eot.main.counts,
|
memcpy(&c->eot.rsrc_spec, &cs->cr.eot.main.counts,
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
memcpy(&c->partial_reload_pipeline_bind, &cs->cr.bg.partial.counts,
|
memcpy(&c->partial_bg.rsrc_spec, &cs->cr.bg.partial.counts,
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
memcpy(&c->partial_store_pipeline_bind, &cs->cr.eot.partial.counts,
|
memcpy(&c->partial_eot.rsrc_spec, &cs->cr.eot.partial.counts,
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
c->scissor_array = cs->uploaded_scissor;
|
c->isp_scissor_base = cs->uploaded_scissor;
|
||||||
c->depth_bias_array = cs->uploaded_zbias;
|
c->isp_dbias_base = cs->uploaded_zbias;
|
||||||
|
|
||||||
c->vertex_sampler_array = dev->samplers.table.bo->va->addr;
|
c->sampler_heap = dev->samplers.table.bo->va->addr;
|
||||||
c->vertex_sampler_count = dev->samplers.table.alloc;
|
c->sampler_count = dev->samplers.table.alloc;
|
||||||
c->vertex_sampler_max = dev->samplers.table.alloc + 1;
|
|
||||||
|
|
||||||
c->fragment_sampler_array = c->vertex_sampler_array;
|
c->isp_oclqry_base = dev->occlusion_queries.bo->va->addr;
|
||||||
c->fragment_sampler_count = c->vertex_sampler_count;
|
|
||||||
c->fragment_sampler_max = c->vertex_sampler_max;
|
|
||||||
|
|
||||||
c->visibility_result_buffer = dev->occlusion_queries.bo->va->addr;
|
|
||||||
|
|
||||||
if (cs->cr.process_empty_tiles)
|
if (cs->cr.process_empty_tiles)
|
||||||
c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES;
|
c->flags |= DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES;
|
||||||
|
|
||||||
if (cs->scratch.vs.main || cs->scratch.vs.preamble) {
|
if (cs->scratch.vs.main || cs->scratch.vs.preamble) {
|
||||||
c->flags |= ASAHI_RENDER_VERTEX_SPILLS;
|
c->flags |= DRM_ASAHI_RENDER_VERTEX_SCRATCH;
|
||||||
c->vertex_helper_arg = dev->scratch.vs.buf->va->addr;
|
c->vertex_helper.data = dev->scratch.vs.buf->va->addr;
|
||||||
c->vertex_helper_cfg = cs->scratch.vs.preamble ? (1 << 16) : 0;
|
c->vertex_helper.cfg = cs->scratch.vs.preamble ? (1 << 16) : 0;
|
||||||
c->vertex_helper_program = agx_helper_program(&dev->bg_eot);
|
c->vertex_helper.binary = agx_helper_program(&dev->bg_eot);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cs->scratch.fs.main || cs->scratch.fs.preamble) {
|
if (cs->scratch.fs.main || cs->scratch.fs.preamble) {
|
||||||
c->fragment_helper_arg = dev->scratch.fs.buf->va->addr;
|
c->fragment_helper.data = dev->scratch.fs.buf->va->addr;
|
||||||
c->fragment_helper_cfg = cs->scratch.fs.preamble ? (1 << 16) : 0;
|
c->fragment_helper.cfg = cs->scratch.fs.preamble ? (1 << 16) : 0;
|
||||||
c->fragment_helper_program = agx_helper_program(&dev->bg_eot);
|
c->fragment_helper.binary = agx_helper_program(&dev->bg_eot);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cs->timestamp.end.handle) {
|
if (cs->timestamp.end.handle) {
|
||||||
assert(agx_supports_timestamps(&dev->dev));
|
c->ts_frag.end.handle = cs->timestamp.end.handle;
|
||||||
|
c->ts_frag.end.offset = cs->timestamp.end.offset_B;
|
||||||
c->extensions = (uint64_t)(uintptr_t)timestamps;
|
|
||||||
|
|
||||||
*timestamps = (struct drm_asahi_cmd_render_user_timestamps){
|
|
||||||
.type = ASAHI_RENDER_EXT_TIMESTAMPS,
|
|
||||||
.frg_end_handle = cs->timestamp.end.handle,
|
|
||||||
.frg_end_offset = cs->timestamp.end.offset_B,
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -314,11 +238,6 @@ union drm_asahi_cmd {
|
||||||
struct drm_asahi_cmd_render render;
|
struct drm_asahi_cmd_render render;
|
||||||
};
|
};
|
||||||
|
|
||||||
union drm_asahi_user_timestamps {
|
|
||||||
struct drm_asahi_cmd_compute_user_timestamps compute;
|
|
||||||
struct drm_asahi_cmd_render_user_timestamps render;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes
|
/* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes
|
||||||
* on the CTS once lossless compression is enabled. This needs to be
|
* on the CTS once lossless compression is enabled. This needs to be
|
||||||
* investigated before we can reenable this mechanism. We are likely missing a
|
* investigated before we can reenable this mechanism. We are likely missing a
|
||||||
|
|
@ -333,11 +252,7 @@ max_commands_per_submit(struct hk_device *dev)
|
||||||
static VkResult
|
static VkResult
|
||||||
queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit)
|
queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit)
|
||||||
{
|
{
|
||||||
/* Currently we don't use the result buffer or implicit sync */
|
struct agx_submit_virt virt = {0};
|
||||||
struct agx_submit_virt virt = {
|
|
||||||
.vbo_res_id = 0,
|
|
||||||
.extres_count = 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
if (dev->dev.is_virtio) {
|
if (dev->dev.is_virtio) {
|
||||||
u_rwlock_rdlock(&dev->external_bos.lock);
|
u_rwlock_rdlock(&dev->external_bos.lock);
|
||||||
|
|
@ -367,14 +282,19 @@ queue_submit_single(struct hk_device *dev, struct drm_asahi_submit *submit)
|
||||||
* bounds.
|
* bounds.
|
||||||
*/
|
*/
|
||||||
static VkResult
|
static VkResult
|
||||||
queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit)
|
queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit,
|
||||||
|
unsigned command_count)
|
||||||
{
|
{
|
||||||
struct drm_asahi_command *cmds = (void *)(uintptr_t)submit->commands;
|
uint8_t *cmdbuf = (uint8_t *)(uintptr_t)submit->cmdbuf;
|
||||||
unsigned commands_remaining = submit->command_count;
|
uint32_t offs = 0;
|
||||||
unsigned submitted[DRM_ASAHI_SUBQUEUE_COUNT] = {0};
|
unsigned submitted_vdm = 0, submitted_cdm = 0;
|
||||||
|
unsigned commands_remaining = command_count;
|
||||||
|
|
||||||
|
uint64_t out_syncs =
|
||||||
|
submit->syncs + sizeof(struct drm_asahi_sync) * submit->in_sync_count;
|
||||||
|
|
||||||
while (commands_remaining) {
|
while (commands_remaining) {
|
||||||
bool first = commands_remaining == submit->command_count;
|
bool first = commands_remaining == command_count;
|
||||||
bool last = commands_remaining <= max_commands_per_submit(dev);
|
bool last = commands_remaining <= max_commands_per_submit(dev);
|
||||||
|
|
||||||
unsigned count = MIN2(commands_remaining, max_commands_per_submit(dev));
|
unsigned count = MIN2(commands_remaining, max_commands_per_submit(dev));
|
||||||
|
|
@ -383,13 +303,27 @@ queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit)
|
||||||
assert(!last || commands_remaining == 0);
|
assert(!last || commands_remaining == 0);
|
||||||
assert(count > 0);
|
assert(count > 0);
|
||||||
|
|
||||||
|
unsigned base_offs = offs;
|
||||||
|
unsigned cdm_count = 0, vdm_count = 0;
|
||||||
|
|
||||||
/* We need to fix up the barriers since barriers are ioctl-relative */
|
/* We need to fix up the barriers since barriers are ioctl-relative */
|
||||||
for (unsigned i = 0; i < count; ++i) {
|
for (unsigned i = 0; i < count; ++i) {
|
||||||
for (unsigned q = 0; q < DRM_ASAHI_SUBQUEUE_COUNT; ++q) {
|
struct drm_asahi_cmd_header *cmd = (void *)(cmdbuf + offs);
|
||||||
if (cmds[i].barriers[q] != DRM_ASAHI_BARRIER_NONE) {
|
offs += sizeof(*cmd) + cmd->size;
|
||||||
assert(cmds[i].barriers[q] >= submitted[q]);
|
|
||||||
cmds[i].barriers[q] -= submitted[q];
|
if (cmd->cmd_type == DRM_ASAHI_CMD_RENDER)
|
||||||
}
|
vdm_count++;
|
||||||
|
else if (cmd->cmd_type == DRM_ASAHI_CMD_COMPUTE)
|
||||||
|
cdm_count++;
|
||||||
|
|
||||||
|
if (cmd->vdm_barrier != DRM_ASAHI_BARRIER_NONE) {
|
||||||
|
assert(cmd->vdm_barrier >= submitted_vdm);
|
||||||
|
cmd->vdm_barrier -= submitted_vdm;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cmd->cdm_barrier != DRM_ASAHI_BARRIER_NONE) {
|
||||||
|
assert(cmd->cdm_barrier >= submitted_cdm);
|
||||||
|
cmd->cdm_barrier -= submitted_cdm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -399,38 +333,35 @@ queue_submit_looped(struct hk_device *dev, struct drm_asahi_submit *submit)
|
||||||
* TODO: there might be a more performant way to do this.
|
* TODO: there might be a more performant way to do this.
|
||||||
*/
|
*/
|
||||||
if (last && !first) {
|
if (last && !first) {
|
||||||
for (unsigned q = 0; q < DRM_ASAHI_SUBQUEUE_COUNT; ++q) {
|
struct drm_asahi_cmd_header *cmd = (void *)(cmdbuf + base_offs);
|
||||||
if (cmds[0].barriers[q] == DRM_ASAHI_BARRIER_NONE)
|
|
||||||
cmds[0].barriers[q] = 0;
|
if (cmd->vdm_barrier == DRM_ASAHI_BARRIER_NONE)
|
||||||
}
|
cmd->vdm_barrier = 0;
|
||||||
|
|
||||||
|
if (cmd->cdm_barrier == DRM_ASAHI_BARRIER_NONE)
|
||||||
|
cmd->cdm_barrier = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool has_in_syncs = first;
|
||||||
|
bool has_out_syncs = last;
|
||||||
|
|
||||||
struct drm_asahi_submit submit_ioctl = {
|
struct drm_asahi_submit submit_ioctl = {
|
||||||
.flags = submit->flags,
|
.flags = submit->flags,
|
||||||
.queue_id = submit->queue_id,
|
.queue_id = submit->queue_id,
|
||||||
.result_handle = submit->result_handle,
|
.cmdbuf = submit->cmdbuf + base_offs,
|
||||||
.commands = (uint64_t)(uintptr_t)(cmds),
|
.cmdbuf_size = offs - base_offs,
|
||||||
.command_count = count,
|
|
||||||
.in_syncs = first ? submit->in_syncs : 0,
|
.syncs = has_in_syncs ? submit->syncs : out_syncs,
|
||||||
.in_sync_count = first ? submit->in_sync_count : 0,
|
.in_sync_count = has_in_syncs ? submit->in_sync_count : 0,
|
||||||
.out_syncs = last ? submit->out_syncs : 0,
|
.out_sync_count = has_out_syncs ? submit->out_sync_count : 0,
|
||||||
.out_sync_count = last ? submit->out_sync_count : 0,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
VkResult result = queue_submit_single(dev, &submit_ioctl);
|
VkResult result = queue_submit_single(dev, &submit_ioctl);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
for (unsigned i = 0; i < count; ++i) {
|
submitted_cdm += cdm_count;
|
||||||
if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE)
|
submitted_vdm += vdm_count;
|
||||||
submitted[DRM_ASAHI_SUBQUEUE_COMPUTE]++;
|
|
||||||
else if (cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER)
|
|
||||||
submitted[DRM_ASAHI_SUBQUEUE_RENDER]++;
|
|
||||||
else
|
|
||||||
unreachable("unknown subqueue");
|
|
||||||
}
|
|
||||||
|
|
||||||
cmds += count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return VK_SUCCESS;
|
return VK_SUCCESS;
|
||||||
|
|
@ -449,18 +380,24 @@ struct hk_bind_builder {
|
||||||
VkDeviceSize size;
|
VkDeviceSize size;
|
||||||
VkDeviceSize memoryOffset;
|
VkDeviceSize memoryOffset;
|
||||||
VkResult result;
|
VkResult result;
|
||||||
|
|
||||||
|
/* Array of drm_asahi_gem_bind_op's */
|
||||||
|
struct util_dynarray binds;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline struct hk_bind_builder
|
static inline struct hk_bind_builder
|
||||||
hk_bind_builder(struct hk_device *dev, struct vk_object_base *obj_base,
|
hk_bind_builder(struct hk_device *dev, struct vk_object_base *obj_base,
|
||||||
struct agx_va *va, struct hk_image *image)
|
struct agx_va *va, struct hk_image *image)
|
||||||
{
|
{
|
||||||
return (struct hk_bind_builder){
|
struct hk_bind_builder b = {
|
||||||
.dev = dev,
|
.dev = dev,
|
||||||
.obj_base = obj_base,
|
.obj_base = obj_base,
|
||||||
.va = va,
|
.va = va,
|
||||||
.image = image,
|
.image = image,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
util_dynarray_init(&b.binds, NULL);
|
||||||
|
return b;
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
|
|
@ -523,13 +460,43 @@ hk_flush_bind(struct hk_bind_builder *b)
|
||||||
/* When the app wants to unbind, replace the bound pages with scratch pages
|
/* When the app wants to unbind, replace the bound pages with scratch pages
|
||||||
* so we don't leave a gap.
|
* so we don't leave a gap.
|
||||||
*/
|
*/
|
||||||
|
struct drm_asahi_gem_bind_op op;
|
||||||
if (!b->mem) {
|
if (!b->mem) {
|
||||||
return hk_bind_scratch(b->dev, b->va, b->resourceOffset, b->size);
|
op = (struct drm_asahi_gem_bind_op){
|
||||||
|
.handle = b->dev->sparse.write->uapi_handle,
|
||||||
|
.flags = DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE |
|
||||||
|
DRM_ASAHI_BIND_SINGLE_PAGE,
|
||||||
|
.addr = b->va->addr + b->resourceOffset,
|
||||||
|
.range = b->size,
|
||||||
|
};
|
||||||
} else {
|
} else {
|
||||||
return b->dev->dev.ops.bo_bind(&b->dev->dev, b->mem->bo, va_addr, b->size,
|
op = (struct drm_asahi_gem_bind_op){
|
||||||
b->memoryOffset,
|
.handle = b->mem->bo->uapi_handle,
|
||||||
ASAHI_BIND_READ | ASAHI_BIND_WRITE, false);
|
.flags = DRM_ASAHI_BIND_READ | DRM_ASAHI_BIND_WRITE,
|
||||||
|
.addr = va_addr,
|
||||||
|
.offset = b->memoryOffset,
|
||||||
|
.range = b->size,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
util_dynarray_append(&b->binds, struct drm_asahi_gem_bind_op, op);
|
||||||
|
return VK_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
hk_bind_builder_finish(struct hk_bind_builder *b)
|
||||||
|
{
|
||||||
|
hk_flush_bind(b);
|
||||||
|
|
||||||
|
/* Submit everything to the kernel at once */
|
||||||
|
if (b->binds.size > 0) {
|
||||||
|
b->dev->dev.ops.bo_bind(
|
||||||
|
&b->dev->dev, b->binds.data,
|
||||||
|
util_dynarray_num_elements(&b->binds, struct drm_asahi_gem_bind_op));
|
||||||
|
}
|
||||||
|
|
||||||
|
util_dynarray_fini(&b->binds);
|
||||||
|
return b->result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -577,7 +544,7 @@ hk_sparse_buffer_bind_memory(struct hk_device *device,
|
||||||
bind->pBinds[i].size, bind->pBinds[i].memoryOffset);
|
bind->pBinds[i].size, bind->pBinds[i].memoryOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
return hk_flush_bind(&b);
|
return hk_bind_builder_finish(&b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
|
|
@ -623,7 +590,7 @@ hk_sparse_image_opaque_bind_memory(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hk_flush_bind(&b);
|
return hk_bind_builder_finish(&b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -714,7 +681,7 @@ hk_sparse_image_bind_memory(struct hk_device *device,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hk_flush_bind(&b);
|
return hk_bind_builder_finish(&b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
|
|
@ -778,11 +745,9 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
return queue_submit_empty(dev, queue, submit);
|
return queue_submit_empty(dev, queue, submit);
|
||||||
|
|
||||||
unsigned wait_count = 0;
|
unsigned wait_count = 0;
|
||||||
struct drm_asahi_sync *waits =
|
struct drm_asahi_sync *syncs =
|
||||||
alloca(submit->wait_count * sizeof(struct drm_asahi_sync));
|
alloca((submit->wait_count + submit->signal_count + 1) *
|
||||||
|
sizeof(struct drm_asahi_sync));
|
||||||
struct drm_asahi_sync *signals =
|
|
||||||
alloca((submit->signal_count + 1) * sizeof(struct drm_asahi_sync));
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < submit->wait_count; ++i) {
|
for (unsigned i = 0; i < submit->wait_count; ++i) {
|
||||||
/* The kernel rejects the submission if we try to wait on the same
|
/* The kernel rejects the submission if we try to wait on the same
|
||||||
|
|
@ -808,36 +773,31 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
asahi_fill_sync(&waits[wait_count++], submit->waits[i].sync,
|
asahi_fill_sync(&syncs[wait_count++], submit->waits[i].sync,
|
||||||
submit->waits[i].wait_value);
|
submit->waits[i].wait_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < submit->signal_count; ++i) {
|
for (unsigned i = 0; i < submit->signal_count; ++i) {
|
||||||
asahi_fill_sync(&signals[i], submit->signals[i].sync,
|
asahi_fill_sync(&syncs[wait_count + i], submit->signals[i].sync,
|
||||||
submit->signals[i].signal_value);
|
submit->signals[i].signal_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Signal progress on the queue itself */
|
/* Signal progress on the queue itself */
|
||||||
signals[submit->signal_count] = (struct drm_asahi_sync){
|
syncs[wait_count + submit->signal_count] = (struct drm_asahi_sync){
|
||||||
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
|
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
|
||||||
.handle = queue->drm.syncobj,
|
.handle = queue->drm.syncobj,
|
||||||
.timeline_value = ++queue->drm.timeline_value,
|
.timeline_value = ++queue->drm.timeline_value,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Now setup the command structs */
|
/* Now setup the command structs */
|
||||||
struct drm_asahi_command *cmds = malloc(sizeof(*cmds) * command_count);
|
struct util_dynarray payload;
|
||||||
union drm_asahi_cmd *cmds_inner =
|
util_dynarray_init(&payload, NULL);
|
||||||
malloc(sizeof(*cmds_inner) * command_count);
|
union drm_asahi_cmd *cmds = malloc(sizeof(*cmds) * command_count);
|
||||||
union drm_asahi_user_timestamps *ts_inner =
|
if (cmds == NULL) {
|
||||||
malloc(sizeof(*ts_inner) * command_count);
|
|
||||||
if (cmds == NULL || cmds_inner == NULL || ts_inner == NULL) {
|
|
||||||
free(ts_inner);
|
|
||||||
free(cmds_inner);
|
|
||||||
free(cmds);
|
free(cmds);
|
||||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned cmd_it = 0;
|
|
||||||
unsigned nr_vdm = 0, nr_cdm = 0;
|
unsigned nr_vdm = 0, nr_cdm = 0;
|
||||||
|
|
||||||
for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
|
for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
|
||||||
|
|
@ -845,15 +805,11 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
(struct hk_cmd_buffer *)submit->command_buffers[i];
|
(struct hk_cmd_buffer *)submit->command_buffers[i];
|
||||||
|
|
||||||
list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) {
|
list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) {
|
||||||
assert(cmd_it < command_count);
|
/* Barrier on previous command */
|
||||||
|
struct drm_asahi_cmd_header header =
|
||||||
|
agx_cmd_header(cs->type == HK_CS_CDM, nr_vdm, nr_cdm);
|
||||||
|
|
||||||
struct drm_asahi_command cmd = {
|
util_dynarray_append(&payload, struct drm_asahi_cmd_header, header);
|
||||||
.cmd_buffer = (uint64_t)(uintptr_t)&cmds_inner[cmd_it],
|
|
||||||
.result_offset = 0 /* TODO */,
|
|
||||||
.result_size = 0 /* TODO */,
|
|
||||||
/* Barrier on previous command */
|
|
||||||
.barriers = {nr_vdm, nr_cdm},
|
|
||||||
};
|
|
||||||
|
|
||||||
if (cs->type == HK_CS_CDM) {
|
if (cs->type == HK_CS_CDM) {
|
||||||
perf_debug(
|
perf_debug(
|
||||||
|
|
@ -864,17 +820,10 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
assert(cs->stats.cmds > 0 || cs->stats.flushes > 0 ||
|
assert(cs->stats.cmds > 0 || cs->stats.flushes > 0 ||
|
||||||
cs->timestamp.end.handle);
|
cs->timestamp.end.handle);
|
||||||
|
|
||||||
cmd.cmd_type = DRM_ASAHI_CMD_COMPUTE;
|
struct drm_asahi_cmd_compute cmd;
|
||||||
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute);
|
asahi_fill_cdm_command(dev, cs, &cmd);
|
||||||
|
util_dynarray_append(&payload, struct drm_asahi_cmd_compute, cmd);
|
||||||
nr_cdm++;
|
nr_cdm++;
|
||||||
|
|
||||||
asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute,
|
|
||||||
&ts_inner[cmd_it].compute);
|
|
||||||
|
|
||||||
/* Work around for shipping 6.11.8 kernels, remove when we bump uapi
|
|
||||||
*/
|
|
||||||
if (!agx_supports_timestamps(&dev->dev))
|
|
||||||
cmd.cmd_buffer_size -= 8;
|
|
||||||
} else {
|
} else {
|
||||||
assert(cs->type == HK_CS_VDM);
|
assert(cs->type == HK_CS_VDM);
|
||||||
perf_debug(cmdbuf, "%u: Submitting VDM with %u API draws, %u draws",
|
perf_debug(cmdbuf, "%u: Submitting VDM with %u API draws, %u draws",
|
||||||
|
|
@ -882,31 +831,17 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles ||
|
assert(cs->stats.cmds > 0 || cs->cr.process_empty_tiles ||
|
||||||
cs->timestamp.end.handle);
|
cs->timestamp.end.handle);
|
||||||
|
|
||||||
cmd.cmd_type = DRM_ASAHI_CMD_RENDER;
|
struct drm_asahi_cmd_render cmd;
|
||||||
cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render);
|
asahi_fill_vdm_command(dev, cs, &cmd);
|
||||||
|
util_dynarray_append(&payload, struct drm_asahi_cmd_render, cmd);
|
||||||
nr_vdm++;
|
nr_vdm++;
|
||||||
|
|
||||||
asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render,
|
|
||||||
&ts_inner[cmd_it].render);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cmds[cmd_it++] = cmd;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(cmd_it == command_count);
|
|
||||||
|
|
||||||
if (dev->dev.debug & AGX_DBG_TRACE) {
|
if (dev->dev.debug & AGX_DBG_TRACE) {
|
||||||
for (unsigned i = 0; i < command_count; ++i) {
|
agxdecode_drm_cmdbuf(dev->dev.agxdecode, &dev->dev.params, &payload,
|
||||||
if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) {
|
true);
|
||||||
agxdecode_drm_cmd_compute(dev->dev.agxdecode, &dev->dev.params,
|
|
||||||
&cmds_inner[i].compute, true);
|
|
||||||
} else {
|
|
||||||
assert(cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER);
|
|
||||||
agxdecode_drm_cmd_render(dev->dev.agxdecode, &dev->dev.params,
|
|
||||||
&cmds_inner[i].render, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr,
|
agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr,
|
||||||
dev->images.alloc);
|
dev->images.alloc);
|
||||||
|
|
@ -917,25 +852,20 @@ queue_submit(struct hk_device *dev, struct hk_queue *queue,
|
||||||
struct drm_asahi_submit submit_ioctl = {
|
struct drm_asahi_submit submit_ioctl = {
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.queue_id = queue->drm.id,
|
.queue_id = queue->drm.id,
|
||||||
.result_handle = 0 /* TODO */,
|
|
||||||
.in_sync_count = wait_count,
|
.in_sync_count = wait_count,
|
||||||
.out_sync_count = submit->signal_count + 1,
|
.out_sync_count = submit->signal_count + 1,
|
||||||
.command_count = command_count,
|
.cmdbuf_size = payload.size,
|
||||||
.in_syncs = (uint64_t)(uintptr_t)(waits),
|
.syncs = (uint64_t)(uintptr_t)(syncs),
|
||||||
.out_syncs = (uint64_t)(uintptr_t)(signals),
|
.cmdbuf = (uint64_t)(uintptr_t)(payload.data),
|
||||||
.commands = (uint64_t)(uintptr_t)(cmds),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
VkResult result;
|
VkResult result;
|
||||||
if (command_count <= max_commands_per_submit(dev))
|
if (command_count <= max_commands_per_submit(dev))
|
||||||
result = queue_submit_single(dev, &submit_ioctl);
|
result = queue_submit_single(dev, &submit_ioctl);
|
||||||
else
|
else
|
||||||
result = queue_submit_looped(dev, &submit_ioctl);
|
result = queue_submit_looped(dev, &submit_ioctl, command_count);
|
||||||
|
|
||||||
free(ts_inner);
|
|
||||||
free(cmds_inner);
|
|
||||||
free(cmds);
|
|
||||||
|
|
||||||
|
util_dynarray_fini(&payload);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -970,18 +900,25 @@ hk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t
|
static enum drm_asahi_priority
|
||||||
translate_priority(VkQueueGlobalPriorityKHR prio)
|
translate_priority(VkQueueGlobalPriorityKHR prio)
|
||||||
{
|
{
|
||||||
/* clang-format off */
|
|
||||||
switch (prio) {
|
switch (prio) {
|
||||||
case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR: return 0;
|
case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR:
|
||||||
case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR: return 1;
|
return DRM_ASAHI_PRIORITY_REALTIME;
|
||||||
case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR: return 2;
|
|
||||||
case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR: return 3;
|
case VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR:
|
||||||
default: unreachable("Invalid VkQueueGlobalPriorityKHR");
|
return DRM_ASAHI_PRIORITY_HIGH;
|
||||||
|
|
||||||
|
case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR:
|
||||||
|
return DRM_ASAHI_PRIORITY_MEDIUM;
|
||||||
|
|
||||||
|
case VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR:
|
||||||
|
return DRM_ASAHI_PRIORITY_LOW;
|
||||||
|
|
||||||
|
default:
|
||||||
|
unreachable("Invalid VkQueueGlobalPriorityKHR");
|
||||||
}
|
}
|
||||||
/* clang-format on */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VkResult
|
VkResult
|
||||||
|
|
@ -1001,17 +938,21 @@ hk_queue_init(struct hk_device *dev, struct hk_queue *queue,
|
||||||
priority_info ? priority_info->globalPriority
|
priority_info ? priority_info->globalPriority
|
||||||
: VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
|
: VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
|
||||||
|
|
||||||
|
/* TODO: Lift when kernel side is ready and we can handle failures in
|
||||||
|
* create_command_queue.
|
||||||
|
*/
|
||||||
|
enum drm_asahi_priority drm_priority = translate_priority(priority);
|
||||||
|
if (drm_priority >= DRM_ASAHI_PRIORITY_HIGH) {
|
||||||
|
return VK_ERROR_NOT_PERMITTED_EXT;
|
||||||
|
}
|
||||||
|
|
||||||
result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
|
result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
|
||||||
if (result != VK_SUCCESS)
|
if (result != VK_SUCCESS)
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
queue->vk.driver_submit = hk_queue_submit;
|
queue->vk.driver_submit = hk_queue_submit;
|
||||||
|
|
||||||
queue->drm.id = agx_create_command_queue(&dev->dev,
|
queue->drm.id = agx_create_command_queue(&dev->dev, drm_priority);
|
||||||
DRM_ASAHI_QUEUE_CAP_RENDER |
|
|
||||||
DRM_ASAHI_QUEUE_CAP_BLIT |
|
|
||||||
DRM_ASAHI_QUEUE_CAP_COMPUTE,
|
|
||||||
translate_priority(priority));
|
|
||||||
|
|
||||||
if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) {
|
if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) {
|
||||||
mesa_loge("drmSyncobjCreate() failed %d\n", errno);
|
mesa_loge("drmSyncobjCreate() failed %d\n", errno);
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@
|
||||||
#include "util/u_math.h"
|
#include "util/u_math.h"
|
||||||
|
|
||||||
#include <xf86drm.h>
|
#include <xf86drm.h>
|
||||||
#include "asahi/lib/unstable_asahi_drm.h"
|
#include "drm-uapi/asahi_drm.h"
|
||||||
#include "drm-uapi/amdgpu_drm.h"
|
#include "drm-uapi/amdgpu_drm.h"
|
||||||
#include "drm-uapi/i915_drm.h"
|
#include "drm-uapi/i915_drm.h"
|
||||||
#include "drm-uapi/v3d_drm.h"
|
#include "drm-uapi/v3d_drm.h"
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@
|
||||||
#include "util/bitset.h"
|
#include "util/bitset.h"
|
||||||
#include "util/u_dynarray.h"
|
#include "util/u_dynarray.h"
|
||||||
#include "util/u_range.h"
|
#include "util/u_range.h"
|
||||||
|
#include "agx_device.h"
|
||||||
#include "agx_state.h"
|
#include "agx_state.h"
|
||||||
#include "vdrm.h"
|
#include "vdrm.h"
|
||||||
|
|
||||||
|
|
@ -159,137 +160,14 @@ agx_batch_init(struct agx_context *ctx,
|
||||||
assert(!ret && batch->syncobj);
|
assert(!ret && batch->syncobj);
|
||||||
}
|
}
|
||||||
|
|
||||||
batch->result_off =
|
|
||||||
(2 * sizeof(union agx_batch_result)) * agx_batch_idx(batch);
|
|
||||||
batch->result =
|
|
||||||
(void *)(((uint8_t *)agx_bo_map(ctx->result_buf)) + batch->result_off);
|
|
||||||
memset(batch->result, 0, sizeof(union agx_batch_result) * 2);
|
|
||||||
|
|
||||||
agx_batch_mark_active(batch);
|
agx_batch_mark_active(batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
const char *status_str[] = {
|
static struct agx_timestamps *
|
||||||
[DRM_ASAHI_STATUS_PENDING] = "(pending)",
|
agx_batch_timestamps(struct agx_batch *batch)
|
||||||
[DRM_ASAHI_STATUS_COMPLETE] = "Complete",
|
|
||||||
[DRM_ASAHI_STATUS_UNKNOWN_ERROR] = "UNKNOWN ERROR",
|
|
||||||
[DRM_ASAHI_STATUS_TIMEOUT] = "TIMEOUT",
|
|
||||||
[DRM_ASAHI_STATUS_FAULT] = "FAULT",
|
|
||||||
[DRM_ASAHI_STATUS_KILLED] = "KILLED",
|
|
||||||
[DRM_ASAHI_STATUS_NO_DEVICE] = "NO DEVICE",
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *fault_type_str[] = {
|
|
||||||
[DRM_ASAHI_FAULT_NONE] = "(none)",
|
|
||||||
[DRM_ASAHI_FAULT_UNKNOWN] = "Unknown",
|
|
||||||
[DRM_ASAHI_FAULT_UNMAPPED] = "Unmapped",
|
|
||||||
[DRM_ASAHI_FAULT_AF_FAULT] = "AF Fault",
|
|
||||||
[DRM_ASAHI_FAULT_WRITE_ONLY] = "Write Only",
|
|
||||||
[DRM_ASAHI_FAULT_READ_ONLY] = "Read Only",
|
|
||||||
[DRM_ASAHI_FAULT_NO_ACCESS] = "No Access",
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *low_unit_str[16] = {
|
|
||||||
"DCMP", "UL1C", "CMP", "GSL1", "IAP", "VCE", "TE", "RAS",
|
|
||||||
"VDM", "PPP", "IPF", "IPF_CPF", "VF", "VF_CPF", "ZLS", "UNK",
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *mid_unit_str[16] = {
|
|
||||||
"UNK", "dPM", "dCDM_KS0", "dCDM_KS1", "dCDM_KS2", "dIPP",
|
|
||||||
"dIPP_CS", "dVDM_CSD", "dVDM_SSD", "dVDM_ILF", "dVDM_ILD", "dRDE0",
|
|
||||||
"dRDE1", "FC", "GSL2", "UNK",
|
|
||||||
};
|
|
||||||
|
|
||||||
const char *high_unit_str[16] = {
|
|
||||||
"gPM_SP", "gVDM_CSD_SP", "gVDM_SSD_SP", "gVDM_ILF_SP",
|
|
||||||
"gVDM_TFP_SP", "gVDM_MMB_SP", "gCDM_CS_KS0_SP", "gCDM_CS_KS1_SP",
|
|
||||||
"gCDM_CS_KS2_SP", "gCDM_KS0_SP", "gCDM_KS1_SP", "gCDM_KS2_SP",
|
|
||||||
"gIPP_SP", "gIPP_CS_SP", "gRDE0_SP", "gRDE1_SP",
|
|
||||||
};
|
|
||||||
|
|
||||||
static void
|
|
||||||
agx_print_result(struct agx_device *dev, struct agx_context *ctx,
|
|
||||||
struct drm_asahi_result_info *info, unsigned batch_idx,
|
|
||||||
bool is_compute)
|
|
||||||
{
|
{
|
||||||
if (unlikely(info->status != DRM_ASAHI_STATUS_COMPLETE)) {
|
struct agx_timestamps *ts = agx_bo_map(batch->ctx->timestamps);
|
||||||
ctx->any_faults = true;
|
return ts + agx_batch_idx(batch);
|
||||||
}
|
|
||||||
|
|
||||||
if (likely(info->status == DRM_ASAHI_STATUS_COMPLETE &&
|
|
||||||
!((dev)->debug & AGX_DBG_STATS)))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (is_compute) {
|
|
||||||
struct drm_asahi_result_compute *r = (void *)info;
|
|
||||||
float time = (r->ts_end - r->ts_start) / dev->params.timer_frequency_hz;
|
|
||||||
|
|
||||||
mesa_logw(
|
|
||||||
"[Batch %d] Compute %s: %.06f\n", batch_idx,
|
|
||||||
info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?",
|
|
||||||
time);
|
|
||||||
} else {
|
|
||||||
struct drm_asahi_result_render *r = (void *)info;
|
|
||||||
float time_vtx = (r->vertex_ts_end - r->vertex_ts_start) /
|
|
||||||
(float)dev->params.timer_frequency_hz;
|
|
||||||
float time_frag = (r->fragment_ts_end - r->fragment_ts_start) /
|
|
||||||
(float)dev->params.timer_frequency_hz;
|
|
||||||
mesa_logw(
|
|
||||||
"[Batch %d] Render %s: TVB %9ld/%9ld bytes (%d ovf) %c%c%c | vtx %.06f frag %.06f\n",
|
|
||||||
batch_idx,
|
|
||||||
info->status < ARRAY_SIZE(status_str) ? status_str[info->status] : "?",
|
|
||||||
(long)r->tvb_usage_bytes, (long)r->tvb_size_bytes,
|
|
||||||
(int)r->num_tvb_overflows,
|
|
||||||
r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF ? 'G' : ' ',
|
|
||||||
r->flags & DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN ? 'M' : ' ',
|
|
||||||
r->flags & DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED ? 'O' : ' ',
|
|
||||||
time_vtx, time_frag);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info->fault_type != DRM_ASAHI_FAULT_NONE) {
|
|
||||||
const char *unit_name;
|
|
||||||
int unit_index;
|
|
||||||
|
|
||||||
switch (info->unit) {
|
|
||||||
case 0x00 ... 0x9f:
|
|
||||||
unit_name = low_unit_str[info->unit & 0xf];
|
|
||||||
unit_index = info->unit >> 4;
|
|
||||||
break;
|
|
||||||
case 0xa0 ... 0xaf:
|
|
||||||
unit_name = mid_unit_str[info->unit & 0xf];
|
|
||||||
unit_index = 0;
|
|
||||||
break;
|
|
||||||
case 0xb0 ... 0xb7:
|
|
||||||
unit_name = "GL2CC_META";
|
|
||||||
unit_index = info->unit & 0x7;
|
|
||||||
break;
|
|
||||||
case 0xb8:
|
|
||||||
unit_name = "GL2CC_MB";
|
|
||||||
unit_index = 0;
|
|
||||||
break;
|
|
||||||
case 0xe0 ... 0xff:
|
|
||||||
unit_name = high_unit_str[info->unit & 0xf];
|
|
||||||
unit_index = (info->unit >> 4) & 1;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
unit_name = "UNK";
|
|
||||||
unit_index = 0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
mesa_logw(
|
|
||||||
"[Batch %d] Fault: %s : Addr 0x%llx %c Unit %02x (%s/%d) SB 0x%02x L%d Extra 0x%x\n",
|
|
||||||
batch_idx,
|
|
||||||
info->fault_type < ARRAY_SIZE(fault_type_str)
|
|
||||||
? fault_type_str[info->fault_type]
|
|
||||||
: "?",
|
|
||||||
(long long)info->address, info->is_read ? 'r' : 'W', info->unit,
|
|
||||||
unit_name, unit_index, info->sideband, info->level, info->extra);
|
|
||||||
|
|
||||||
agx_debug_fault(dev, info->address);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(info->status == DRM_ASAHI_STATUS_COMPLETE ||
|
|
||||||
info->status == DRM_ASAHI_STATUS_KILLED);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -302,17 +180,25 @@ agx_batch_print_stats(struct agx_device *dev, struct agx_batch *batch)
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!batch->result)
|
if (likely(!(dev->debug & AGX_DBG_STATS)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
struct agx_timestamps *ts = agx_batch_timestamps(batch);
|
||||||
|
|
||||||
if (batch->cdm.bo) {
|
if (batch->cdm.bo) {
|
||||||
agx_print_result(dev, batch->ctx, &batch->result[0].compute.info,
|
float time = (ts->comp_end - ts->comp_start) /
|
||||||
batch_idx, true);
|
(float)dev->params.command_timestamp_frequency_hz;
|
||||||
|
|
||||||
|
mesa_logw("[Batch %d] Compute: %.06f\n", batch_idx, time);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (batch->vdm.bo) {
|
if (batch->vdm.bo) {
|
||||||
agx_print_result(dev, batch->ctx, &batch->result[1].render.info,
|
float time_vtx = (ts->vtx_end - ts->vtx_start) /
|
||||||
batch_idx, false);
|
(float)dev->params.command_timestamp_frequency_hz;
|
||||||
|
float time_frag = (ts->frag_end - ts->frag_start) /
|
||||||
|
(float)dev->params.command_timestamp_frequency_hz;
|
||||||
|
mesa_logw("[Batch %d] vtx %.06f frag %.06f\n", batch_idx, time_vtx,
|
||||||
|
time_frag);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -328,15 +214,17 @@ agx_batch_cleanup(struct agx_context *ctx, struct agx_batch *batch, bool reset)
|
||||||
assert(ctx->batch != batch);
|
assert(ctx->batch != batch);
|
||||||
|
|
||||||
uint64_t begin_ts = ~0, end_ts = 0;
|
uint64_t begin_ts = ~0, end_ts = 0;
|
||||||
if (batch->result) {
|
if (batch->timestamps.size) {
|
||||||
|
struct agx_timestamps *ts = agx_batch_timestamps(batch);
|
||||||
|
|
||||||
if (batch->cdm.bo) {
|
if (batch->cdm.bo) {
|
||||||
begin_ts = MIN2(begin_ts, batch->result[0].compute.ts_start);
|
begin_ts = MIN2(begin_ts, ts->comp_start);
|
||||||
end_ts = MAX2(end_ts, batch->result[0].compute.ts_end);
|
end_ts = MAX2(end_ts, ts->comp_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (batch->vdm.bo) {
|
if (batch->vdm.bo) {
|
||||||
begin_ts = MIN2(begin_ts, batch->result[1].render.vertex_ts_start);
|
begin_ts = MIN2(begin_ts, ts->vtx_start);
|
||||||
end_ts = MAX2(end_ts, batch->result[1].render.fragment_ts_end);
|
end_ts = MAX2(end_ts, ts->frag_end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -733,6 +621,24 @@ agx_add_sync(struct drm_asahi_sync *syncs, unsigned *count, uint32_t handle)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define MAX_ATTACHMENTS 16
|
||||||
|
|
||||||
|
struct attachments {
|
||||||
|
struct drm_asahi_attachment list[MAX_ATTACHMENTS];
|
||||||
|
size_t count;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc)
|
||||||
|
{
|
||||||
|
assert(att->count < MAX_ATTACHMENTS);
|
||||||
|
|
||||||
|
att->list[att->count++] = (struct drm_asahi_attachment){
|
||||||
|
.size = rsrc->layout.size_B,
|
||||||
|
.pointer = rsrc->bo->va->addr,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
struct drm_asahi_cmd_compute *compute,
|
struct drm_asahi_cmd_compute *compute,
|
||||||
|
|
@ -741,34 +647,19 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
struct agx_device *dev = agx_device(ctx->base.screen);
|
||||||
struct agx_screen *screen = agx_screen(ctx->base.screen);
|
struct agx_screen *screen = agx_screen(ctx->base.screen);
|
||||||
|
|
||||||
bool feedback = dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_STATS);
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
|
||||||
/* Debug builds always get feedback (for fault checks) */
|
|
||||||
feedback = true;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Timer queries use the feedback timestamping */
|
|
||||||
feedback |= (batch->timestamps.size > 0);
|
|
||||||
|
|
||||||
if (!feedback)
|
|
||||||
batch->result = NULL;
|
|
||||||
|
|
||||||
/* We allocate the worst-case sync array size since this won't be excessive
|
/* We allocate the worst-case sync array size since this won't be excessive
|
||||||
* for most workloads
|
* for most workloads
|
||||||
*/
|
*/
|
||||||
unsigned max_syncs = batch->bo_list.bit_count + 2;
|
unsigned max_syncs = batch->bo_list.bit_count + 2;
|
||||||
unsigned in_sync_count = 0;
|
unsigned in_sync_count = 0;
|
||||||
unsigned shared_bo_count = 0;
|
unsigned shared_bo_count = 0;
|
||||||
struct drm_asahi_sync *in_syncs =
|
struct drm_asahi_sync *syncs =
|
||||||
malloc(max_syncs * sizeof(struct drm_asahi_sync));
|
malloc((max_syncs * sizeof(struct drm_asahi_sync)) + 2);
|
||||||
struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *));
|
struct agx_bo **shared_bos = malloc(max_syncs * sizeof(struct agx_bo *));
|
||||||
|
|
||||||
uint64_t wait_seqid = p_atomic_read(&screen->flush_wait_seqid);
|
uint64_t wait_seqid = p_atomic_read(&screen->flush_wait_seqid);
|
||||||
|
|
||||||
struct agx_submit_virt virt = {
|
struct agx_submit_virt virt = {0};
|
||||||
.vbo_res_id = ctx->result_buf->vbo_res_id,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Elide syncing against our own queue */
|
/* Elide syncing against our own queue */
|
||||||
if (wait_seqid && wait_seqid == ctx->flush_my_seqid) {
|
if (wait_seqid && wait_seqid == ctx->flush_my_seqid) {
|
||||||
|
|
@ -784,37 +675,6 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
|
|
||||||
batch_debug(batch, "Sync point is %" PRIu64, seqid);
|
batch_debug(batch, "Sync point is %" PRIu64, seqid);
|
||||||
|
|
||||||
/* Subtle concurrency note: Since we assign seqids atomically and do
|
|
||||||
* not lock submission across contexts, it is possible for two threads
|
|
||||||
* to submit timeline syncobj updates out of order. As far as I can
|
|
||||||
* tell, this case is handled in the kernel conservatively: it triggers
|
|
||||||
* a fence context bump and effectively "splits" the timeline at the
|
|
||||||
* larger point, causing future lookups for earlier points to return a
|
|
||||||
* later point, waiting more. The signaling code still makes sure all
|
|
||||||
* prior fences have to be signaled before considering a given point
|
|
||||||
* signaled, regardless of order. That's good enough for us.
|
|
||||||
*
|
|
||||||
* (Note: this case breaks drm_syncobj_query_ioctl and for this reason
|
|
||||||
* triggers a DRM_DEBUG message on submission, but we don't use that
|
|
||||||
* so we don't care.)
|
|
||||||
*
|
|
||||||
* This case can be tested by setting seqid = 1 unconditionally here,
|
|
||||||
* causing every single syncobj update to reuse the same timeline point.
|
|
||||||
* Everything still works (but over-synchronizes because this effectively
|
|
||||||
* serializes all submissions once any context flushes once).
|
|
||||||
*/
|
|
||||||
struct drm_asahi_sync out_syncs[2] = {
|
|
||||||
{
|
|
||||||
.sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
|
|
||||||
.handle = batch->syncobj,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
|
|
||||||
.handle = screen->flush_syncobj,
|
|
||||||
.timeline_value = seqid,
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
/* This lock protects against a subtle race scenario:
|
/* This lock protects against a subtle race scenario:
|
||||||
* - Context 1 submits and registers itself as writer for a BO
|
* - Context 1 submits and registers itself as writer for a BO
|
||||||
* - Context 2 runs the below loop, and finds the writer syncobj
|
* - Context 2 runs the below loop, and finds the writer syncobj
|
||||||
|
|
@ -861,7 +721,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
close(in_sync_fd);
|
close(in_sync_fd);
|
||||||
|
|
||||||
/* Add it to our wait list */
|
/* Add it to our wait list */
|
||||||
agx_add_sync(in_syncs, &in_sync_count, sync_handle);
|
agx_add_sync(syncs, &in_sync_count, sync_handle);
|
||||||
|
|
||||||
/* And keep track of the BO for cloning the out_sync */
|
/* And keep track of the BO for cloning the out_sync */
|
||||||
shared_bos[shared_bo_count++] = bo;
|
shared_bos[shared_bo_count++] = bo;
|
||||||
|
|
@ -879,8 +739,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
batch, "Waits on inter-context BO @ 0x%" PRIx64 " from queue %u",
|
batch, "Waits on inter-context BO @ 0x%" PRIx64 " from queue %u",
|
||||||
bo->va->addr, queue_id);
|
bo->va->addr, queue_id);
|
||||||
|
|
||||||
agx_add_sync(in_syncs, &in_sync_count,
|
agx_add_sync(syncs, &in_sync_count, agx_bo_writer_syncobj(writer));
|
||||||
agx_bo_writer_syncobj(writer));
|
|
||||||
shared_bos[shared_bo_count++] = NULL;
|
shared_bos[shared_bo_count++] = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -894,7 +753,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
for (unsigned i = 0; i < virt.extres_count; i++) {
|
for (unsigned i = 0; i < virt.extres_count; i++) {
|
||||||
while (!*p)
|
while (!*p)
|
||||||
p++; // Skip inter-context slots which are not recorded here
|
p++; // Skip inter-context slots which are not recorded here
|
||||||
virt.extres[i].res_id = (*p)->vbo_res_id;
|
virt.extres[i].res_id = (*p)->uapi_handle;
|
||||||
virt.extres[i].flags = ASAHI_EXTRES_READ | ASAHI_EXTRES_WRITE;
|
virt.extres[i].flags = ASAHI_EXTRES_READ | ASAHI_EXTRES_WRITE;
|
||||||
p++;
|
p++;
|
||||||
}
|
}
|
||||||
|
|
@ -910,63 +769,109 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add an explicit fence from gallium, if any */
|
/* Add an explicit fence from gallium, if any */
|
||||||
agx_add_sync(in_syncs, &in_sync_count, agx_get_in_sync(ctx));
|
agx_add_sync(syncs, &in_sync_count, agx_get_in_sync(ctx));
|
||||||
|
|
||||||
/* Add an implicit cross-context flush sync point, if any */
|
/* Add an implicit cross-context flush sync point, if any */
|
||||||
if (wait_seqid) {
|
if (wait_seqid) {
|
||||||
batch_debug(batch, "Waits on inter-context sync point %" PRIu64,
|
batch_debug(batch, "Waits on inter-context sync point %" PRIu64,
|
||||||
wait_seqid);
|
wait_seqid);
|
||||||
in_syncs[in_sync_count++] = (struct drm_asahi_sync){
|
syncs[in_sync_count++] = (struct drm_asahi_sync){
|
||||||
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
|
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
|
||||||
.handle = screen->flush_syncobj,
|
.handle = screen->flush_syncobj,
|
||||||
.timeline_value = wait_seqid,
|
.timeline_value = wait_seqid,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Subtle concurrency note: Since we assign seqids atomically and do
|
||||||
|
* not lock submission across contexts, it is possible for two threads
|
||||||
|
* to submit timeline syncobj updates out of order. As far as I can
|
||||||
|
* tell, this case is handled in the kernel conservatively: it triggers
|
||||||
|
* a fence context bump and effectively "splits" the timeline at the
|
||||||
|
* larger point, causing future lookups for earlier points to return a
|
||||||
|
* later point, waiting more. The signaling code still makes sure all
|
||||||
|
* prior fences have to be signaled before considering a given point
|
||||||
|
* signaled, regardless of order. That's good enough for us.
|
||||||
|
*
|
||||||
|
* (Note: this case breaks drm_syncobj_query_ioctl and for this reason
|
||||||
|
* triggers a DRM_DEBUG message on submission, but we don't use that
|
||||||
|
* so we don't care.)
|
||||||
|
*
|
||||||
|
* This case can be tested by setting seqid = 1 unconditionally here,
|
||||||
|
* causing every single syncobj update to reuse the same timeline point.
|
||||||
|
* Everything still works (but over-synchronizes because this effectively
|
||||||
|
* serializes all submissions once any context flushes once).
|
||||||
|
*/
|
||||||
|
struct drm_asahi_sync *out_syncs = syncs + in_sync_count;
|
||||||
|
|
||||||
|
out_syncs[0] = (struct drm_asahi_sync){
|
||||||
|
.sync_type = DRM_ASAHI_SYNC_SYNCOBJ,
|
||||||
|
.handle = batch->syncobj,
|
||||||
|
};
|
||||||
|
|
||||||
|
out_syncs[1] = (struct drm_asahi_sync){
|
||||||
|
.sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
|
||||||
|
.handle = screen->flush_syncobj,
|
||||||
|
.timeline_value = seqid,
|
||||||
|
};
|
||||||
|
|
||||||
/* Submit! */
|
/* Submit! */
|
||||||
struct drm_asahi_command commands[2];
|
struct util_dynarray cmdbuf;
|
||||||
unsigned command_count = 0;
|
util_dynarray_init(&cmdbuf, NULL);
|
||||||
|
|
||||||
if (compute) {
|
if (compute) {
|
||||||
commands[command_count++] = (struct drm_asahi_command){
|
/* Barrier on previous submission */
|
||||||
.cmd_type = DRM_ASAHI_CMD_COMPUTE,
|
struct drm_asahi_cmd_header header = agx_cmd_header(true, 0, 0);
|
||||||
.flags = 0,
|
|
||||||
.cmd_buffer = (uint64_t)(uintptr_t)compute,
|
|
||||||
|
|
||||||
/* Work around for shipping 6.11.8 kernels, remove when we bump uapi
|
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header);
|
||||||
*/
|
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_compute, *compute);
|
||||||
.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute) - 8,
|
|
||||||
.result_offset = feedback ? batch->result_off : 0,
|
|
||||||
.result_size = feedback ? sizeof(union agx_batch_result) : 0,
|
|
||||||
/* Barrier on previous submission */
|
|
||||||
.barriers = {0, 0},
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (render) {
|
if (render) {
|
||||||
commands[command_count++] = (struct drm_asahi_command){
|
struct attachments att = {.count = 0};
|
||||||
.cmd_type = DRM_ASAHI_CMD_RENDER,
|
struct pipe_framebuffer_state *fb = &batch->key;
|
||||||
.flags = 0,
|
|
||||||
.cmd_buffer = (uint64_t)(uintptr_t)render,
|
for (unsigned i = 0; i < fb->nr_cbufs; ++i) {
|
||||||
.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render),
|
if (fb->cbufs[i])
|
||||||
.result_offset =
|
asahi_add_attachment(&att, agx_resource(fb->cbufs[i]->texture));
|
||||||
feedback ? (batch->result_off + sizeof(union agx_batch_result)) : 0,
|
}
|
||||||
.result_size = feedback ? sizeof(union agx_batch_result) : 0,
|
|
||||||
/* Barrier on previous submission */
|
if (fb->zsbuf) {
|
||||||
.barriers = {compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0},
|
struct agx_resource *rsrc = agx_resource(fb->zsbuf->texture);
|
||||||
};
|
asahi_add_attachment(&att, rsrc);
|
||||||
|
|
||||||
|
if (rsrc->separate_stencil)
|
||||||
|
asahi_add_attachment(&att, rsrc->separate_stencil);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (att.count) {
|
||||||
|
struct drm_asahi_cmd_header header = {
|
||||||
|
.cmd_type = DRM_ASAHI_SET_FRAGMENT_ATTACHMENTS,
|
||||||
|
.size = sizeof(att.list[0]) * att.count,
|
||||||
|
.cdm_barrier = DRM_ASAHI_BARRIER_NONE,
|
||||||
|
.vdm_barrier = DRM_ASAHI_BARRIER_NONE,
|
||||||
|
};
|
||||||
|
|
||||||
|
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header);
|
||||||
|
util_dynarray_append_array(&cmdbuf, struct drm_asahi_attachment,
|
||||||
|
att.list, att.count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Barrier on previous submission */
|
||||||
|
struct drm_asahi_cmd_header header = agx_cmd_header(
|
||||||
|
false, compute ? DRM_ASAHI_BARRIER_NONE : 0, compute ? 1 : 0);
|
||||||
|
|
||||||
|
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_header, header);
|
||||||
|
util_dynarray_append(&cmdbuf, struct drm_asahi_cmd_render, *render);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct drm_asahi_submit submit = {
|
struct drm_asahi_submit submit = {
|
||||||
.flags = 0,
|
.flags = 0,
|
||||||
.queue_id = ctx->queue_id,
|
.queue_id = ctx->queue_id,
|
||||||
.result_handle = feedback ? ctx->result_buf->handle : 0,
|
|
||||||
.in_sync_count = in_sync_count,
|
.in_sync_count = in_sync_count,
|
||||||
.out_sync_count = 2,
|
.out_sync_count = 2,
|
||||||
.command_count = command_count,
|
.syncs = (uint64_t)(uintptr_t)(syncs),
|
||||||
.in_syncs = (uint64_t)(uintptr_t)(in_syncs),
|
.cmdbuf = (uint64_t)(uintptr_t)(cmdbuf.data),
|
||||||
.out_syncs = (uint64_t)(uintptr_t)(out_syncs),
|
.cmdbuf_size = cmdbuf.size,
|
||||||
.commands = (uint64_t)(uintptr_t)(&commands[0]),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
int ret = dev->ops.submit(dev, &submit, &virt);
|
int ret = dev->ops.submit(dev, &submit, &virt);
|
||||||
|
|
@ -983,7 +888,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
fprintf(
|
fprintf(
|
||||||
stderr,
|
stderr,
|
||||||
"DRM_IOCTL_ASAHI_SUBMIT render failed: %m (%dx%d tile %dx%d layers %d samples %d)\n",
|
"DRM_IOCTL_ASAHI_SUBMIT render failed: %m (%dx%d tile %dx%d layers %d samples %d)\n",
|
||||||
c->fb_width, c->fb_height, c->utile_width, c->utile_height,
|
c->width_px, c->height_px, c->utile_width_px, c->utile_height_px,
|
||||||
c->layers, c->samples);
|
c->layers, c->samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1009,7 +914,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
shared_bos[i]->va->addr);
|
shared_bos[i]->va->addr);
|
||||||
|
|
||||||
/* Free the in_sync handle we just acquired */
|
/* Free the in_sync handle we just acquired */
|
||||||
ret = drmSyncobjDestroy(dev->fd, in_syncs[i].handle);
|
ret = drmSyncobjDestroy(dev->fd, syncs[i].handle);
|
||||||
assert(ret >= 0);
|
assert(ret >= 0);
|
||||||
/* And then import the out_sync sync file into it */
|
/* And then import the out_sync sync file into it */
|
||||||
ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd);
|
ret = agx_import_sync_file(dev, shared_bos[i], out_sync_fd);
|
||||||
|
|
@ -1039,21 +944,12 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
batch_debug(batch, "Writes to BO @ 0x%" PRIx64, bo->va->addr);
|
batch_debug(batch, "Writes to BO @ 0x%" PRIx64, bo->va->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(in_syncs);
|
free(syncs);
|
||||||
free(shared_bos);
|
free(shared_bos);
|
||||||
|
|
||||||
if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) {
|
if (dev->debug & (AGX_DBG_TRACE | AGX_DBG_SYNC | AGX_DBG_SCRATCH)) {
|
||||||
if (dev->debug & AGX_DBG_TRACE) {
|
if (dev->debug & AGX_DBG_TRACE) {
|
||||||
if (compute) {
|
agxdecode_drm_cmdbuf(dev->agxdecode, &dev->params, &cmdbuf, true);
|
||||||
agxdecode_drm_cmd_compute(dev->agxdecode, &dev->params, compute,
|
|
||||||
true);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (render) {
|
|
||||||
agxdecode_drm_cmd_render(dev->agxdecode, &dev->params, render,
|
|
||||||
true);
|
|
||||||
}
|
|
||||||
|
|
||||||
agxdecode_next_frame();
|
agxdecode_next_frame();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1077,6 +973,7 @@ agx_batch_submit(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
util_dynarray_fini(&cmdbuf);
|
||||||
agx_batch_mark_submitted(batch);
|
agx_batch_mark_submitted(batch);
|
||||||
|
|
||||||
if (virt.extres)
|
if (virt.extres)
|
||||||
|
|
@ -1155,9 +1052,6 @@ agx_batch_reset(struct agx_context *ctx, struct agx_batch *batch)
|
||||||
if (ctx->batch == batch)
|
if (ctx->batch == batch)
|
||||||
ctx->batch = NULL;
|
ctx->batch = NULL;
|
||||||
|
|
||||||
/* Elide printing stats */
|
|
||||||
batch->result = NULL;
|
|
||||||
|
|
||||||
agx_batch_cleanup(ctx, batch, true);
|
agx_batch_cleanup(ctx, batch, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@
|
||||||
#include "asahi/compiler/agx_compile.h"
|
#include "asahi/compiler/agx_compile.h"
|
||||||
#include "asahi/layout/layout.h"
|
#include "asahi/layout/layout.h"
|
||||||
#include "asahi/lib/decode.h"
|
#include "asahi/lib/decode.h"
|
||||||
#include "asahi/lib/unstable_asahi_drm.h"
|
#include "drm-uapi/asahi_drm.h"
|
||||||
#include "drm-uapi/drm_fourcc.h"
|
#include "drm-uapi/drm_fourcc.h"
|
||||||
#include "frontend/winsys_handle.h"
|
#include "frontend/winsys_handle.h"
|
||||||
#include "gallium/auxiliary/renderonly/renderonly.h"
|
#include "gallium/auxiliary/renderonly/renderonly.h"
|
||||||
|
|
@ -1199,26 +1199,6 @@ agx_flush_resource(struct pipe_context *pctx, struct pipe_resource *pres)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX_ATTACHMENTS 16
|
|
||||||
|
|
||||||
struct attachments {
|
|
||||||
struct drm_asahi_attachment list[MAX_ATTACHMENTS];
|
|
||||||
size_t count;
|
|
||||||
};
|
|
||||||
|
|
||||||
static void
|
|
||||||
asahi_add_attachment(struct attachments *att, struct agx_resource *rsrc,
|
|
||||||
struct pipe_surface *surf)
|
|
||||||
{
|
|
||||||
assert(att->count < MAX_ATTACHMENTS);
|
|
||||||
int idx = att->count++;
|
|
||||||
|
|
||||||
att->list[idx].size = rsrc->layout.size_B;
|
|
||||||
att->list[idx].pointer = rsrc->bo->va->addr;
|
|
||||||
att->list[idx].order = 1; // TODO: What does this do?
|
|
||||||
att->list[idx].flags = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_aligned(unsigned x, unsigned pot_alignment)
|
is_aligned(unsigned x, unsigned pot_alignment)
|
||||||
{
|
{
|
||||||
|
|
@ -1226,12 +1206,20 @@ is_aligned(unsigned x, unsigned pot_alignment)
|
||||||
return (x & (pot_alignment - 1)) == 0;
|
return (x & (pot_alignment - 1)) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
build_timestamp_offset(struct agx_batch *batch, unsigned offset)
|
||||||
|
{
|
||||||
|
return (agx_batch_idx(batch) * sizeof(struct agx_timestamps)) + offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define timestamp_offset(batch, offs) \
|
||||||
|
build_timestamp_offset(batch, offsetof(struct agx_timestamps, offs))
|
||||||
|
|
||||||
static void
|
static void
|
||||||
agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
struct attachments *att, struct agx_pool *pool,
|
struct agx_pool *pool, struct agx_batch *batch,
|
||||||
struct agx_batch *batch, struct pipe_framebuffer_state *framebuffer,
|
struct pipe_framebuffer_state *framebuffer, uint64_t encoder_ptr,
|
||||||
uint64_t encoder_ptr, uint64_t encoder_id, uint64_t cmd_ta_id,
|
uint64_t scissor_ptr, uint64_t depth_bias_ptr,
|
||||||
uint64_t cmd_3d_id, uint64_t scissor_ptr, uint64_t depth_bias_ptr,
|
|
||||||
uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear,
|
uint64_t visibility_result_ptr, struct asahi_bg_eot pipeline_clear,
|
||||||
struct asahi_bg_eot pipeline_load,
|
struct asahi_bg_eot pipeline_load,
|
||||||
struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures,
|
struct asahi_bg_eot pipeline_store, bool clear_pipeline_textures,
|
||||||
|
|
@ -1240,28 +1228,27 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
{
|
{
|
||||||
memset(c, 0, sizeof(*c));
|
memset(c, 0, sizeof(*c));
|
||||||
|
|
||||||
c->encoder_ptr = encoder_ptr;
|
c->vdm_ctrl_stream_base = encoder_ptr;
|
||||||
c->encoder_id = encoder_id;
|
|
||||||
c->cmd_3d_id = cmd_3d_id;
|
|
||||||
c->cmd_ta_id = cmd_ta_id;
|
|
||||||
|
|
||||||
c->fragment_usc_base = dev->shader_base;
|
|
||||||
c->vertex_usc_base = dev->shader_base;
|
|
||||||
|
|
||||||
/* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is
|
/* bit 0 specifies OpenGL clip behaviour. Since ARB_clip_control is
|
||||||
* advertised, we don't set it and lower in the vertex shader.
|
* advertised, we don't set it and lower in the vertex shader.
|
||||||
*/
|
*/
|
||||||
c->ppp_ctrl = 0x202;
|
c->ppp_ctrl = 0x202;
|
||||||
|
|
||||||
c->fb_width = framebuffer->width;
|
c->width_px = framebuffer->width;
|
||||||
c->fb_height = framebuffer->height;
|
c->height_px = framebuffer->height;
|
||||||
|
|
||||||
c->iogpu_unk_214 = 0xc000;
|
|
||||||
|
|
||||||
c->isp_bgobjvals = 0x300;
|
c->isp_bgobjvals = 0x300;
|
||||||
|
|
||||||
struct agx_resource *zres = NULL, *sres = NULL;
|
struct agx_resource *zres = NULL, *sres = NULL;
|
||||||
|
|
||||||
|
if (framebuffer->zsbuf) {
|
||||||
|
agx_pack(&c->isp_zls_pixels, CR_ISP_ZLS_PIXELS, cfg) {
|
||||||
|
cfg.x = c->width_px;
|
||||||
|
cfg.y = c->height_px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) {
|
agx_pack(&c->zls_ctrl, ZLS_CONTROL, zls_control) {
|
||||||
|
|
||||||
if (framebuffer->zsbuf) {
|
if (framebuffer->zsbuf) {
|
||||||
|
|
@ -1279,9 +1266,6 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
|
desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
|
||||||
desc->format == PIPE_FORMAT_S8_UINT);
|
desc->format == PIPE_FORMAT_S8_UINT);
|
||||||
|
|
||||||
c->depth_dimensions =
|
|
||||||
(framebuffer->width - 1) | ((framebuffer->height - 1) << 15);
|
|
||||||
|
|
||||||
if (util_format_has_depth(desc))
|
if (util_format_has_depth(desc))
|
||||||
zres = zsres;
|
zres = zsres;
|
||||||
else
|
else
|
||||||
|
|
@ -1297,11 +1281,8 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH);
|
zls_control.z_store_enable = (batch->resolve & PIPE_CLEAR_DEPTH);
|
||||||
zls_control.z_load_enable = !clear && load;
|
zls_control.z_load_enable = !clear && load;
|
||||||
|
|
||||||
c->depth_buffer_load = agx_map_texture_gpu(zres, first_layer) +
|
c->depth.base = agx_map_texture_gpu(zres, first_layer) +
|
||||||
ail_get_level_offset_B(&zres->layout, level);
|
ail_get_level_offset_B(&zres->layout, level);
|
||||||
|
|
||||||
c->depth_buffer_store = c->depth_buffer_load;
|
|
||||||
c->depth_buffer_partial = c->depth_buffer_load;
|
|
||||||
|
|
||||||
/* Main stride in pages */
|
/* Main stride in pages */
|
||||||
assert((zres->layout.depth_px == 1 ||
|
assert((zres->layout.depth_px == 1 ||
|
||||||
|
|
@ -1309,14 +1290,12 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
"Page aligned Z layers");
|
"Page aligned Z layers");
|
||||||
|
|
||||||
unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE;
|
unsigned stride_pages = zres->layout.layer_stride_B / AIL_PAGESIZE;
|
||||||
c->depth_buffer_load_stride = ((stride_pages - 1) << 14) | 1;
|
c->depth.stride = ((stride_pages - 1) << 14) | 1;
|
||||||
c->depth_buffer_store_stride = c->depth_buffer_load_stride;
|
|
||||||
c->depth_buffer_partial_stride = c->depth_buffer_load_stride;
|
|
||||||
|
|
||||||
assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile");
|
assert(zres->layout.tiling != AIL_TILING_LINEAR && "must tile");
|
||||||
|
|
||||||
if (zres->layout.compressed) {
|
if (zres->layout.compressed) {
|
||||||
c->depth_meta_buffer_load =
|
c->depth.comp_base =
|
||||||
agx_map_texture_gpu(zres, 0) +
|
agx_map_texture_gpu(zres, 0) +
|
||||||
zres->layout.metadata_offset_B +
|
zres->layout.metadata_offset_B +
|
||||||
(first_layer * zres->layout.compression_layer_stride_B) +
|
(first_layer * zres->layout.compression_layer_stride_B) +
|
||||||
|
|
@ -1328,14 +1307,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
"Cacheline aligned Z meta layers");
|
"Cacheline aligned Z meta layers");
|
||||||
unsigned stride_lines =
|
unsigned stride_lines =
|
||||||
zres->layout.compression_layer_stride_B / AIL_CACHELINE;
|
zres->layout.compression_layer_stride_B / AIL_CACHELINE;
|
||||||
c->depth_meta_buffer_load_stride = (stride_lines - 1) << 14;
|
c->depth.comp_stride = (stride_lines - 1) << 14;
|
||||||
|
|
||||||
c->depth_meta_buffer_store = c->depth_meta_buffer_load;
|
|
||||||
c->depth_meta_buffer_store_stride =
|
|
||||||
c->depth_meta_buffer_load_stride;
|
|
||||||
c->depth_meta_buffer_partial = c->depth_meta_buffer_load;
|
|
||||||
c->depth_meta_buffer_partial_stride =
|
|
||||||
c->depth_meta_buffer_load_stride;
|
|
||||||
|
|
||||||
zls_control.z_compress_1 = true;
|
zls_control.z_compress_1 = true;
|
||||||
zls_control.z_compress_2 = true;
|
zls_control.z_compress_2 = true;
|
||||||
|
|
@ -1346,7 +1318,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
c->isp_bgobjdepth =
|
c->isp_bgobjdepth =
|
||||||
(uint16_t)(SATURATE(clear_depth) * scale + 0.5f);
|
(uint16_t)(SATURATE(clear_depth) * scale + 0.5f);
|
||||||
zls_control.z_format = AGX_ZLS_FORMAT_16;
|
zls_control.z_format = AGX_ZLS_FORMAT_16;
|
||||||
c->iogpu_unk_214 |= 0x40000;
|
c->flags |= DRM_ASAHI_RENDER_DBIAS_IS_INT;
|
||||||
} else {
|
} else {
|
||||||
c->isp_bgobjdepth = fui(clear_depth);
|
c->isp_bgobjdepth = fui(clear_depth);
|
||||||
zls_control.z_format = AGX_ZLS_FORMAT_32F;
|
zls_control.z_format = AGX_ZLS_FORMAT_32F;
|
||||||
|
|
@ -1360,24 +1332,18 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL);
|
zls_control.s_store_enable = (batch->resolve & PIPE_CLEAR_STENCIL);
|
||||||
zls_control.s_load_enable = !clear && load;
|
zls_control.s_load_enable = !clear && load;
|
||||||
|
|
||||||
c->stencil_buffer_load =
|
c->stencil.base = agx_map_texture_gpu(sres, first_layer) +
|
||||||
agx_map_texture_gpu(sres, first_layer) +
|
ail_get_level_offset_B(&sres->layout, level);
|
||||||
ail_get_level_offset_B(&sres->layout, level);
|
|
||||||
|
|
||||||
c->stencil_buffer_store = c->stencil_buffer_load;
|
|
||||||
c->stencil_buffer_partial = c->stencil_buffer_load;
|
|
||||||
|
|
||||||
/* Main stride in pages */
|
/* Main stride in pages */
|
||||||
assert((sres->layout.depth_px == 1 ||
|
assert((sres->layout.depth_px == 1 ||
|
||||||
is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) &&
|
is_aligned(sres->layout.layer_stride_B, AIL_PAGESIZE)) &&
|
||||||
"Page aligned S layers");
|
"Page aligned S layers");
|
||||||
unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE;
|
unsigned stride_pages = sres->layout.layer_stride_B / AIL_PAGESIZE;
|
||||||
c->stencil_buffer_load_stride = ((stride_pages - 1) << 14) | 1;
|
c->stencil.stride = ((stride_pages - 1) << 14) | 1;
|
||||||
c->stencil_buffer_store_stride = c->stencil_buffer_load_stride;
|
|
||||||
c->stencil_buffer_partial_stride = c->stencil_buffer_load_stride;
|
|
||||||
|
|
||||||
if (sres->layout.compressed) {
|
if (sres->layout.compressed) {
|
||||||
c->stencil_meta_buffer_load =
|
c->stencil.comp_base =
|
||||||
agx_map_texture_gpu(sres, 0) +
|
agx_map_texture_gpu(sres, 0) +
|
||||||
sres->layout.metadata_offset_B +
|
sres->layout.metadata_offset_B +
|
||||||
(first_layer * sres->layout.compression_layer_stride_B) +
|
(first_layer * sres->layout.compression_layer_stride_B) +
|
||||||
|
|
@ -1389,14 +1355,7 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
"Cacheline aligned S meta layers");
|
"Cacheline aligned S meta layers");
|
||||||
unsigned stride_lines =
|
unsigned stride_lines =
|
||||||
sres->layout.compression_layer_stride_B / AIL_CACHELINE;
|
sres->layout.compression_layer_stride_B / AIL_CACHELINE;
|
||||||
c->stencil_meta_buffer_load_stride = (stride_lines - 1) << 14;
|
c->stencil.comp_stride = (stride_lines - 1) << 14;
|
||||||
|
|
||||||
c->stencil_meta_buffer_store = c->stencil_meta_buffer_load;
|
|
||||||
c->stencil_meta_buffer_store_stride =
|
|
||||||
c->stencil_meta_buffer_load_stride;
|
|
||||||
c->stencil_meta_buffer_partial = c->stencil_meta_buffer_load;
|
|
||||||
c->stencil_meta_buffer_partial_stride =
|
|
||||||
c->stencil_meta_buffer_load_stride;
|
|
||||||
|
|
||||||
zls_control.s_compress_1 = true;
|
zls_control.s_compress_1 = true;
|
||||||
zls_control.s_compress_2 = true;
|
zls_control.s_compress_2 = true;
|
||||||
|
|
@ -1407,71 +1366,48 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (clear_pipeline_textures)
|
|
||||||
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
|
|
||||||
else
|
|
||||||
c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
|
|
||||||
|
|
||||||
if (zres && !(batch->clear & PIPE_CLEAR_DEPTH))
|
|
||||||
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
|
|
||||||
|
|
||||||
if (sres && !(batch->clear & PIPE_CLEAR_STENCIL))
|
|
||||||
c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
|
|
||||||
|
|
||||||
if (dev->debug & AGX_DBG_NOCLUSTER)
|
if (dev->debug & AGX_DBG_NOCLUSTER)
|
||||||
c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING;
|
c->flags |= DRM_ASAHI_RENDER_NO_VERTEX_CLUSTERING;
|
||||||
|
|
||||||
/* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */
|
memcpy(&c->bg.rsrc_spec, &pipeline_clear.counts,
|
||||||
if (tib->nr_samples > 1 && framebuffer->zsbuf)
|
|
||||||
c->flags |= ASAHI_RENDER_MSAA_ZS;
|
|
||||||
|
|
||||||
memcpy(&c->load_pipeline_bind, &pipeline_clear.counts,
|
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
memcpy(&c->store_pipeline_bind, &pipeline_store.counts,
|
memcpy(&c->eot.rsrc_spec, &pipeline_store.counts,
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
memcpy(&c->partial_reload_pipeline_bind, &pipeline_load.counts,
|
memcpy(&c->partial_bg.rsrc_spec, &pipeline_load.counts,
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
memcpy(&c->partial_store_pipeline_bind, &pipeline_store.counts,
|
memcpy(&c->partial_eot.rsrc_spec, &pipeline_store.counts,
|
||||||
sizeof(struct agx_counts_packed));
|
sizeof(struct agx_counts_packed));
|
||||||
|
|
||||||
/* XXX is this correct? */
|
/* XXX is this correct? */
|
||||||
c->load_pipeline = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4);
|
c->bg.usc = pipeline_clear.usc | (framebuffer->nr_cbufs >= 4 ? 8 : 4);
|
||||||
c->store_pipeline = pipeline_store.usc | 4;
|
c->eot.usc = pipeline_store.usc | 4;
|
||||||
c->partial_reload_pipeline = pipeline_load.usc | 4;
|
c->partial_bg.usc = pipeline_load.usc | 4;
|
||||||
c->partial_store_pipeline = pipeline_store.usc | 4;
|
c->partial_eot.usc = pipeline_store.usc | 4;
|
||||||
|
|
||||||
c->utile_width = tib->tile_size.width;
|
c->utile_width_px = tib->tile_size.width;
|
||||||
c->utile_height = tib->tile_size.height;
|
c->utile_height_px = tib->tile_size.height;
|
||||||
|
|
||||||
c->samples = tib->nr_samples;
|
c->samples = tib->nr_samples;
|
||||||
c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1);
|
c->layers = MAX2(util_framebuffer_get_num_layers(framebuffer), 1);
|
||||||
|
|
||||||
c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl;
|
c->ppp_multisamplectl = batch->uniforms.ppp_multisamplectl;
|
||||||
c->sample_size = tib->sample_size_B;
|
c->sample_size_B = tib->sample_size_B;
|
||||||
|
|
||||||
/* XXX OR 0x80 with eMRT? */
|
|
||||||
c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(tib), 2048) / 2048;
|
|
||||||
|
|
||||||
float tan_60 = 1.732051f;
|
float tan_60 = 1.732051f;
|
||||||
c->merge_upper_x = fui(tan_60 / framebuffer->width);
|
c->isp_merge_upper_x = fui(tan_60 / framebuffer->width);
|
||||||
c->merge_upper_y = fui(tan_60 / framebuffer->height);
|
c->isp_merge_upper_y = fui(tan_60 / framebuffer->height);
|
||||||
|
|
||||||
c->scissor_array = scissor_ptr;
|
c->isp_scissor_base = scissor_ptr;
|
||||||
c->depth_bias_array = depth_bias_ptr;
|
c->isp_dbias_base = depth_bias_ptr;
|
||||||
c->visibility_result_buffer = visibility_result_ptr;
|
c->isp_oclqry_base = visibility_result_ptr;
|
||||||
|
|
||||||
c->vertex_sampler_array =
|
if (batch->sampler_heap.bo) {
|
||||||
batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0;
|
c->sampler_heap = batch->sampler_heap.bo->va->addr;
|
||||||
c->vertex_sampler_count = batch->sampler_heap.count;
|
c->sampler_count = batch->sampler_heap.count;
|
||||||
c->vertex_sampler_max = batch->sampler_heap.count + 1;
|
}
|
||||||
|
|
||||||
/* In the future we could split the heaps if useful */
|
|
||||||
c->fragment_sampler_array = c->vertex_sampler_array;
|
|
||||||
c->fragment_sampler_count = c->vertex_sampler_count;
|
|
||||||
c->fragment_sampler_max = c->vertex_sampler_max;
|
|
||||||
|
|
||||||
/* If a tile is empty, we do not want to process it, as the redundant
|
/* If a tile is empty, we do not want to process it, as the redundant
|
||||||
* roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
|
* roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
|
||||||
|
|
@ -1483,39 +1419,25 @@ agx_cmdbuf(struct agx_device *dev, struct drm_asahi_cmd_render *c,
|
||||||
* This case matters a LOT for performance in workloads that split batches.
|
* This case matters a LOT for performance in workloads that split batches.
|
||||||
*/
|
*/
|
||||||
if (batch->clear & batch->resolve)
|
if (batch->clear & batch->resolve)
|
||||||
c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES;
|
c->flags |= DRM_ASAHI_RENDER_PROCESS_EMPTY_TILES;
|
||||||
|
|
||||||
for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) {
|
|
||||||
if (!framebuffer->cbufs[i])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
asahi_add_attachment(att, agx_resource(framebuffer->cbufs[i]->texture),
|
|
||||||
framebuffer->cbufs[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (framebuffer->zsbuf) {
|
|
||||||
struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture);
|
|
||||||
|
|
||||||
asahi_add_attachment(att, rsrc, framebuffer->zsbuf);
|
|
||||||
|
|
||||||
if (rsrc->separate_stencil) {
|
|
||||||
asahi_add_attachment(att, rsrc->separate_stencil, framebuffer->zsbuf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
c->fragment_attachments = (uint64_t)(uintptr_t)&att->list[0];
|
|
||||||
c->fragment_attachment_count = att->count;
|
|
||||||
|
|
||||||
if (batch->vs_scratch) {
|
if (batch->vs_scratch) {
|
||||||
c->flags |= ASAHI_RENDER_VERTEX_SPILLS;
|
c->flags |= DRM_ASAHI_RENDER_VERTEX_SCRATCH;
|
||||||
c->vertex_helper_arg = batch->ctx->scratch_vs.buf->va->addr;
|
c->vertex_helper.data = batch->ctx->scratch_vs.buf->va->addr;
|
||||||
c->vertex_helper_cfg = batch->vs_preamble_scratch << 16;
|
c->vertex_helper.cfg = batch->vs_preamble_scratch << 16;
|
||||||
c->vertex_helper_program = agx_helper_program(&batch->ctx->bg_eot);
|
c->vertex_helper.binary = agx_helper_program(&batch->ctx->bg_eot);
|
||||||
}
|
}
|
||||||
if (batch->fs_scratch) {
|
if (batch->fs_scratch) {
|
||||||
c->fragment_helper_arg = batch->ctx->scratch_fs.buf->va->addr;
|
c->fragment_helper.data = batch->ctx->scratch_fs.buf->va->addr;
|
||||||
c->fragment_helper_cfg = batch->fs_preamble_scratch << 16;
|
c->fragment_helper.cfg = batch->fs_preamble_scratch << 16;
|
||||||
c->fragment_helper_program = agx_helper_program(&batch->ctx->bg_eot);
|
c->fragment_helper.binary = agx_helper_program(&batch->ctx->bg_eot);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (batch->timestamps.size > 0) {
|
||||||
|
c->ts_vtx.start.handle = batch->ctx->timestamp_handle;
|
||||||
|
c->ts_frag.end.handle = batch->ctx->timestamp_handle;
|
||||||
|
c->ts_vtx.start.offset = timestamp_offset(batch, vtx_start);
|
||||||
|
c->ts_frag.end.offset = timestamp_offset(batch, frag_end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1595,8 +1517,6 @@ static void
|
||||||
agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
|
agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
struct drm_asahi_cmd_compute *cmdbuf)
|
struct drm_asahi_cmd_compute *cmdbuf)
|
||||||
{
|
{
|
||||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
|
||||||
|
|
||||||
/* Finalize the encoder */
|
/* Finalize the encoder */
|
||||||
agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _)
|
agx_pack(batch->cdm.current, CDM_STREAM_TERMINATE, _)
|
||||||
;
|
;
|
||||||
|
|
@ -1606,27 +1526,14 @@ agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
if (batch->cs_scratch)
|
if (batch->cs_scratch)
|
||||||
agx_batch_add_bo(batch, ctx->scratch_cs.buf);
|
agx_batch_add_bo(batch, ctx->scratch_cs.buf);
|
||||||
|
|
||||||
unsigned cmdbuf_id = agx_get_global_id(dev);
|
|
||||||
unsigned encoder_id = agx_get_global_id(dev);
|
|
||||||
|
|
||||||
*cmdbuf = (struct drm_asahi_cmd_compute){
|
*cmdbuf = (struct drm_asahi_cmd_compute){
|
||||||
.flags = 0,
|
.cdm_ctrl_stream_base = batch->cdm.bo->va->addr,
|
||||||
.encoder_ptr = batch->cdm.bo->va->addr,
|
.cdm_ctrl_stream_end =
|
||||||
.encoder_end =
|
|
||||||
batch->cdm.bo->va->addr +
|
batch->cdm.bo->va->addr +
|
||||||
(batch->cdm.current - (uint8_t *)agx_bo_map(batch->cdm.bo)),
|
(batch->cdm.current - (uint8_t *)agx_bo_map(batch->cdm.bo)),
|
||||||
.usc_base = dev->shader_base,
|
.sampler_heap =
|
||||||
.helper_arg = 0,
|
|
||||||
.helper_cfg = 0,
|
|
||||||
.helper_program = 0,
|
|
||||||
.iogpu_unk_40 = 0,
|
|
||||||
.sampler_array =
|
|
||||||
batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0,
|
batch->sampler_heap.bo ? batch->sampler_heap.bo->va->addr : 0,
|
||||||
.sampler_count = batch->sampler_heap.count,
|
.sampler_count = batch->sampler_heap.count,
|
||||||
.sampler_max = batch->sampler_heap.count + 1,
|
|
||||||
.encoder_id = encoder_id,
|
|
||||||
.cmd_id = cmdbuf_id,
|
|
||||||
.unk_mask = 0xffffffff,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
if (batch->cs_scratch) {
|
if (batch->cs_scratch) {
|
||||||
|
|
@ -1635,16 +1542,23 @@ agx_flush_compute(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
// helper. Disable them for now.
|
// helper. Disable them for now.
|
||||||
|
|
||||||
// cmdbuf->iogpu_unk_40 = 0x1c;
|
// cmdbuf->iogpu_unk_40 = 0x1c;
|
||||||
cmdbuf->helper_arg = ctx->scratch_cs.buf->va->addr;
|
cmdbuf->helper.data = ctx->scratch_cs.buf->va->addr;
|
||||||
cmdbuf->helper_cfg = batch->cs_preamble_scratch << 16;
|
cmdbuf->helper.cfg = batch->cs_preamble_scratch << 16;
|
||||||
// cmdbuf->helper_cfg |= 0x40;
|
// cmdbuf->helper.cfg |= 0x40;
|
||||||
cmdbuf->helper_program = agx_helper_program(&batch->ctx->bg_eot);
|
cmdbuf->helper.binary = agx_helper_program(&batch->ctx->bg_eot);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (batch->timestamps.size > 0) {
|
||||||
|
cmdbuf->ts.start.handle = ctx->timestamp_handle;
|
||||||
|
cmdbuf->ts.end.handle = ctx->timestamp_handle;
|
||||||
|
cmdbuf->ts.start.offset = timestamp_offset(batch, comp_start);
|
||||||
|
cmdbuf->ts.end.offset = timestamp_offset(batch, comp_start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
agx_flush_render(struct agx_context *ctx, struct agx_batch *batch,
|
agx_flush_render(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
struct drm_asahi_cmd_render *cmdbuf, struct attachments *att)
|
struct drm_asahi_cmd_render *cmdbuf)
|
||||||
{
|
{
|
||||||
struct agx_device *dev = agx_device(ctx->base.screen);
|
struct agx_device *dev = agx_device(ctx->base.screen);
|
||||||
|
|
||||||
|
|
@ -1694,16 +1608,11 @@ agx_flush_render(struct agx_context *ctx, struct agx_batch *batch,
|
||||||
*/
|
*/
|
||||||
agx_batch_add_bo(batch, batch->vdm.bo);
|
agx_batch_add_bo(batch, batch->vdm.bo);
|
||||||
|
|
||||||
unsigned cmd_ta_id = agx_get_global_id(dev);
|
agx_cmdbuf(
|
||||||
unsigned cmd_3d_id = agx_get_global_id(dev);
|
dev, cmdbuf, &batch->pool, batch, &batch->key, batch->vdm.bo->va->addr,
|
||||||
unsigned encoder_id = agx_get_global_id(dev);
|
scissor, zbias, agx_get_occlusion_heap(batch), pipeline_background,
|
||||||
|
pipeline_background_partial, pipeline_store, clear_pipeline_textures,
|
||||||
agx_cmdbuf(dev, cmdbuf, att, &batch->pool, batch, &batch->key,
|
batch->clear_depth, batch->clear_stencil, &batch->tilebuffer_layout);
|
||||||
batch->vdm.bo->va->addr, encoder_id, cmd_ta_id, cmd_3d_id,
|
|
||||||
scissor, zbias, agx_get_occlusion_heap(batch),
|
|
||||||
pipeline_background, pipeline_background_partial, pipeline_store,
|
|
||||||
clear_pipeline_textures, batch->clear_depth, batch->clear_stencil,
|
|
||||||
&batch->tilebuffer_layout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
@ -1712,7 +1621,6 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
|
||||||
assert(agx_batch_is_active(batch));
|
assert(agx_batch_is_active(batch));
|
||||||
assert(!agx_batch_is_submitted(batch));
|
assert(!agx_batch_is_submitted(batch));
|
||||||
|
|
||||||
struct attachments att = {.count = 0};
|
|
||||||
struct drm_asahi_cmd_render render;
|
struct drm_asahi_cmd_render render;
|
||||||
struct drm_asahi_cmd_compute compute;
|
struct drm_asahi_cmd_compute compute;
|
||||||
bool has_vdm = false, has_cdm = false;
|
bool has_vdm = false, has_cdm = false;
|
||||||
|
|
@ -1723,7 +1631,7 @@ agx_flush_batch(struct agx_context *ctx, struct agx_batch *batch)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (batch->vdm.bo && (batch->clear || batch->initialized)) {
|
if (batch->vdm.bo && (batch->clear || batch->initialized)) {
|
||||||
agx_flush_render(ctx, batch, &render, &att);
|
agx_flush_render(ctx, batch, &render);
|
||||||
has_vdm = true;
|
has_vdm = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1761,8 +1669,6 @@ agx_destroy_context(struct pipe_context *pctx)
|
||||||
agx_bg_eot_cleanup(&ctx->bg_eot);
|
agx_bg_eot_cleanup(&ctx->bg_eot);
|
||||||
agx_destroy_meta_shaders(ctx);
|
agx_destroy_meta_shaders(ctx);
|
||||||
|
|
||||||
agx_bo_unreference(dev, ctx->result_buf);
|
|
||||||
|
|
||||||
/* Lock around the syncobj destruction, to avoid racing
|
/* Lock around the syncobj destruction, to avoid racing
|
||||||
* command submission in another context.
|
* command submission in another context.
|
||||||
**/
|
**/
|
||||||
|
|
@ -1778,6 +1684,9 @@ agx_destroy_context(struct pipe_context *pctx)
|
||||||
drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
|
drmSyncobjDestroy(dev->fd, ctx->batches.slots[i].syncobj);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dev->ops.bo_unbind_object(dev, ctx->timestamp_handle);
|
||||||
|
agx_bo_unreference(dev, ctx->timestamps);
|
||||||
|
|
||||||
u_rwlock_wrunlock(&screen->destroy_lock);
|
u_rwlock_wrunlock(&screen->destroy_lock);
|
||||||
|
|
||||||
pipe_resource_reference(&ctx->heap, NULL);
|
pipe_resource_reference(&ctx->heap, NULL);
|
||||||
|
|
@ -1841,21 +1750,18 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
|
||||||
}
|
}
|
||||||
pctx->const_uploader = pctx->stream_uploader;
|
pctx->const_uploader = pctx->stream_uploader;
|
||||||
|
|
||||||
uint32_t priority = 2;
|
enum drm_asahi_priority priority = DRM_ASAHI_PRIORITY_MEDIUM;
|
||||||
if (flags & PIPE_CONTEXT_PRIORITY_LOW)
|
|
||||||
priority = 3;
|
|
||||||
else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM)
|
|
||||||
priority = 2;
|
|
||||||
else if (flags & PIPE_CONTEXT_PRIORITY_HIGH)
|
|
||||||
priority = 1;
|
|
||||||
else if (flags & PIPE_CONTEXT_PRIORITY_REALTIME)
|
|
||||||
priority = 0;
|
|
||||||
|
|
||||||
ctx->queue_id = agx_create_command_queue(agx_device(screen),
|
if (flags & PIPE_CONTEXT_PRIORITY_LOW)
|
||||||
DRM_ASAHI_QUEUE_CAP_RENDER |
|
priority = DRM_ASAHI_PRIORITY_LOW;
|
||||||
DRM_ASAHI_QUEUE_CAP_BLIT |
|
else if (flags & PIPE_CONTEXT_PRIORITY_MEDIUM)
|
||||||
DRM_ASAHI_QUEUE_CAP_COMPUTE,
|
priority = DRM_ASAHI_PRIORITY_MEDIUM;
|
||||||
priority);
|
|
||||||
|
/* TODO: High/realtime need us to handle errors since we might not have
|
||||||
|
* permission. Sort this out later.
|
||||||
|
*/
|
||||||
|
|
||||||
|
ctx->queue_id = agx_create_command_queue(agx_device(screen), priority);
|
||||||
|
|
||||||
pctx->destroy = agx_destroy_context;
|
pctx->destroy = agx_destroy_context;
|
||||||
pctx->flush = agx_flush;
|
pctx->flush = agx_flush;
|
||||||
|
|
@ -1893,11 +1799,17 @@ agx_create_context(struct pipe_screen *screen, void *priv, unsigned flags)
|
||||||
ctx->blitter = util_blitter_create(pctx);
|
ctx->blitter = util_blitter_create(pctx);
|
||||||
ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx);
|
ctx->compute_blitter.blit_cs = asahi_blit_key_table_create(ctx);
|
||||||
|
|
||||||
ctx->result_buf =
|
struct agx_device *dev = agx_device(screen);
|
||||||
agx_bo_create(agx_device(screen),
|
size_t timestamps_size = sizeof(struct agx_timestamps) * AGX_MAX_BATCHES;
|
||||||
(2 * sizeof(union agx_batch_result)) * AGX_MAX_BATCHES, 0,
|
|
||||||
AGX_BO_WRITEBACK, "Batch result buffer");
|
/* The kernel requires that timestamp buffers are SHARED */
|
||||||
assert(ctx->result_buf);
|
ctx->timestamps =
|
||||||
|
agx_bo_create(dev, timestamps_size, 0, AGX_BO_WRITEBACK | AGX_BO_SHARED,
|
||||||
|
"Timestamp buffer");
|
||||||
|
assert(ctx->timestamps);
|
||||||
|
|
||||||
|
ret = agx_bind_timestamps(dev, ctx->timestamps, &ctx->timestamp_handle);
|
||||||
|
assert(!ret);
|
||||||
|
|
||||||
/* Sync object/FD used for NATIVE_FENCE_FD. */
|
/* Sync object/FD used for NATIVE_FENCE_FD. */
|
||||||
ctx->in_sync_fd = -1;
|
ctx->in_sync_fd = -1;
|
||||||
|
|
@ -2101,7 +2013,7 @@ agx_init_screen_caps(struct pipe_screen *pscreen)
|
||||||
caps->texture_barrier = true;
|
caps->texture_barrier = true;
|
||||||
|
|
||||||
/* Timer resolution is the length of a single tick in nanos */
|
/* Timer resolution is the length of a single tick in nanos */
|
||||||
caps->timer_resolution = agx_gpu_time_to_ns(agx_device(pscreen), 1);
|
caps->timer_resolution = agx_gpu_timestamp_to_ns(agx_device(pscreen), 1);
|
||||||
|
|
||||||
caps->sampler_view_target = true;
|
caps->sampler_view_target = true;
|
||||||
caps->texture_swizzle = true;
|
caps->texture_swizzle = true;
|
||||||
|
|
@ -2452,8 +2364,7 @@ agx_screen_get_fd(struct pipe_screen *pscreen)
|
||||||
static uint64_t
|
static uint64_t
|
||||||
agx_get_timestamp(struct pipe_screen *pscreen)
|
agx_get_timestamp(struct pipe_screen *pscreen)
|
||||||
{
|
{
|
||||||
struct agx_device *dev = agx_device(pscreen);
|
return agx_get_gpu_timestamp(agx_device(pscreen));
|
||||||
return agx_gpu_time_to_ns(dev, agx_get_gpu_timestamp(dev));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
|
@ -2488,13 +2399,6 @@ agx_screen_create(int fd, struct renderonly *ro,
|
||||||
struct agx_screen *agx_screen;
|
struct agx_screen *agx_screen;
|
||||||
struct pipe_screen *screen;
|
struct pipe_screen *screen;
|
||||||
|
|
||||||
/* Refuse to probe. There is no stable UAPI yet. Upstream Mesa cannot be used
|
|
||||||
* yet with Asahi. Do not try. Do not patch out this check. Do not teach
|
|
||||||
* others about patching this check. Do not distribute upstream Mesa with
|
|
||||||
* this check patched out.
|
|
||||||
*/
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
agx_screen = rzalloc(NULL, struct agx_screen);
|
agx_screen = rzalloc(NULL, struct agx_screen);
|
||||||
if (!agx_screen)
|
if (!agx_screen)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
|
||||||
|
|
@ -408,12 +408,12 @@ agx_get_query_result(struct pipe_context *pctx, struct pipe_query *pquery,
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case QUERY_COPY_TIMESTAMP:
|
case QUERY_COPY_TIMESTAMP:
|
||||||
vresult->u64 = agx_gpu_time_to_ns(dev, value);
|
vresult->u64 = agx_gpu_timestamp_to_ns(dev, value);
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
case QUERY_COPY_TIME_ELAPSED:
|
case QUERY_COPY_TIME_ELAPSED:
|
||||||
/* end - begin */
|
/* end - begin */
|
||||||
vresult->u64 = agx_gpu_time_to_ns(dev, ptr[0] - ptr[1]);
|
vresult->u64 = agx_gpu_timestamp_to_ns(dev, ptr[0] - ptr[1]);
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@
|
||||||
#include "asahi/lib/agx_tilebuffer.h"
|
#include "asahi/lib/agx_tilebuffer.h"
|
||||||
#include "asahi/lib/agx_uvs.h"
|
#include "asahi/lib/agx_uvs.h"
|
||||||
#include "asahi/lib/pool.h"
|
#include "asahi/lib/pool.h"
|
||||||
#include "asahi/lib/unstable_asahi_drm.h"
|
|
||||||
#include "asahi/libagx/geometry.h"
|
#include "asahi/libagx/geometry.h"
|
||||||
#include "compiler/shader_enums.h"
|
#include "compiler/shader_enums.h"
|
||||||
#include "gallium/auxiliary/util/u_blitter.h"
|
#include "gallium/auxiliary/util/u_blitter.h"
|
||||||
|
|
@ -355,9 +354,13 @@ struct agx_stage {
|
||||||
uint32_t valid_samplers;
|
uint32_t valid_samplers;
|
||||||
};
|
};
|
||||||
|
|
||||||
union agx_batch_result {
|
struct agx_timestamps {
|
||||||
struct drm_asahi_result_render render;
|
uint64_t vtx_start;
|
||||||
struct drm_asahi_result_compute compute;
|
uint64_t vtx_end;
|
||||||
|
uint64_t frag_start;
|
||||||
|
uint64_t frag_end;
|
||||||
|
uint64_t comp_start;
|
||||||
|
uint64_t comp_end;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This is a firmware limit. It should be possible to raise to 2048 in the
|
/* This is a firmware limit. It should be possible to raise to 2048 in the
|
||||||
|
|
@ -454,10 +457,6 @@ struct agx_batch {
|
||||||
/* Arrays of GPU pointers that should be written with the batch timestamps */
|
/* Arrays of GPU pointers that should be written with the batch timestamps */
|
||||||
struct util_dynarray timestamps;
|
struct util_dynarray timestamps;
|
||||||
|
|
||||||
/* Result buffer where the kernel places command execution information */
|
|
||||||
union agx_batch_result *result;
|
|
||||||
size_t result_off;
|
|
||||||
|
|
||||||
/* Actual pointer in a uniform */
|
/* Actual pointer in a uniform */
|
||||||
struct agx_bo *geom_params_bo, *geom_index_bo;
|
struct agx_bo *geom_params_bo, *geom_index_bo;
|
||||||
uint64_t geom_index;
|
uint64_t geom_index;
|
||||||
|
|
@ -646,7 +645,8 @@ struct agx_context {
|
||||||
uint32_t queue_id;
|
uint32_t queue_id;
|
||||||
|
|
||||||
struct agx_batch *batch;
|
struct agx_batch *batch;
|
||||||
struct agx_bo *result_buf;
|
struct agx_bo *timestamps;
|
||||||
|
uint32_t timestamp_handle;
|
||||||
|
|
||||||
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
|
struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS];
|
||||||
uint32_t vb_mask;
|
uint32_t vb_mask;
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue