mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 03:00:11 +01:00
ethos: Initial commit of a driver for the Arm Ethos-U65 NPU.
Supports all models in the test suite. No optimizations implemented yet. Acked-by: Christian Gmeiner <cgmeiner@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/36699>
This commit is contained in:
parent
b3262b37ce
commit
2581c3ab60
34 changed files with 6015 additions and 3 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
# The following files are opted into `ninja clang-format` and
|
# The following files are opted into `ninja clang-format` and
|
||||||
# enforcement in the CI.
|
# enforcement in the CI.
|
||||||
|
|
||||||
|
src/gallium/drivers/ethosu/**/*
|
||||||
src/gallium/drivers/i915
|
src/gallium/drivers/i915
|
||||||
src/gallium/drivers/r300/compiler/*
|
src/gallium/drivers/r300/compiler/*
|
||||||
src/gallium/drivers/rocket/**/*
|
src/gallium/drivers/rocket/**/*
|
||||||
|
|
|
||||||
262
include/drm-uapi/ethosu_accel.h
Normal file
262
include/drm-uapi/ethosu_accel.h
Normal file
|
|
@ -0,0 +1,262 @@
|
||||||
|
/* SPDX-License-Identifier: MIT */
|
||||||
|
/* Copyright (C) 2025 Arm, Ltd. */
|
||||||
|
#ifndef _ETHOSU_DRM_H_
|
||||||
|
#define _ETHOSU_DRM_H_
|
||||||
|
|
||||||
|
#include "drm.h"
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: IOCTL IDs
|
||||||
|
*
|
||||||
|
* enum drm_ethosu_ioctl_id - IOCTL IDs
|
||||||
|
*
|
||||||
|
* Place new ioctls at the end, don't re-order, don't replace or remove entries.
|
||||||
|
*
|
||||||
|
* These IDs are not meant to be used directly. Use the DRM_IOCTL_ETHOSU_xxx
|
||||||
|
* definitions instead.
|
||||||
|
*/
|
||||||
|
enum drm_ethosu_ioctl_id {
|
||||||
|
/** @DRM_ETHOSU_DEV_QUERY: Query device information. */
|
||||||
|
DRM_ETHOSU_DEV_QUERY = 0,
|
||||||
|
|
||||||
|
/** @DRM_ETHOSU_BO_CREATE: Create a buffer object. */
|
||||||
|
DRM_ETHOSU_BO_CREATE,
|
||||||
|
|
||||||
|
/** @DRM_ETHOSU_BO_WAIT: Wait on a buffer object's fence. */
|
||||||
|
DRM_ETHOSU_BO_WAIT,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @DRM_ETHOSU_BO_MMAP_OFFSET: Get the file offset to pass to
|
||||||
|
* mmap to map a GEM object.
|
||||||
|
*/
|
||||||
|
DRM_ETHOSU_BO_MMAP_OFFSET,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @DRM_ETHOSU_CMDSTREAM_BO_CREATE: Create a command stream buffer
|
||||||
|
* object.
|
||||||
|
*/
|
||||||
|
DRM_ETHOSU_CMDSTREAM_BO_CREATE,
|
||||||
|
|
||||||
|
/** @DRM_ETHOSU_SUBMIT: Submit a job and BOs to run. */
|
||||||
|
DRM_ETHOSU_SUBMIT,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: IOCTL arguments
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* enum drm_ethosu_dev_query_type - Query type
|
||||||
|
*
|
||||||
|
* Place new types at the end, don't re-order, don't remove or replace.
|
||||||
|
*/
|
||||||
|
enum drm_ethosu_dev_query_type {
|
||||||
|
/** @DRM_ETHOSU_DEV_QUERY_NPU_INFO: Query NPU information. */
|
||||||
|
DRM_ETHOSU_DEV_QUERY_NPU_INFO = 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_gpu_info - NPU information
|
||||||
|
*
|
||||||
|
* Structure grouping all queryable information relating to the NPU.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_npu_info {
|
||||||
|
/** @id : NPU ID. */
|
||||||
|
__u32 id;
|
||||||
|
#define DRM_ETHOSU_ARCH_MAJOR(x) ((x) >> 28)
|
||||||
|
#define DRM_ETHOSU_ARCH_MINOR(x) (((x) >> 20) & 0xff)
|
||||||
|
#define DRM_ETHOSU_ARCH_PATCH(x) (((x) >> 16) & 0xf)
|
||||||
|
#define DRM_ETHOSU_PRODUCT_MAJOR(x) (((x) >> 12) & 0xf)
|
||||||
|
#define DRM_ETHOSU_VERSION_MAJOR(x) (((x) >> 8) & 0xf)
|
||||||
|
#define DRM_ETHOSU_VERSION_MINOR(x) (((x) >> 4) & 0xff)
|
||||||
|
#define DRM_ETHOSU_VERSION_STATUS(x) ((x) & 0xf)
|
||||||
|
|
||||||
|
/** @gpu_rev: GPU revision. */
|
||||||
|
__u32 config;
|
||||||
|
|
||||||
|
__u32 sram_size;
|
||||||
|
};
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_dev_query - Arguments passed to DRM_ETHOSU_IOCTL_DEV_QUERY
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_dev_query {
|
||||||
|
/** @type: the query type (see drm_ethosu_dev_query_type). */
|
||||||
|
__u32 type;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @size: size of the type being queried.
|
||||||
|
*
|
||||||
|
* If pointer is NULL, size is updated by the driver to provide the
|
||||||
|
* output structure size. If pointer is not NULL, the driver will
|
||||||
|
* only copy min(size, actual_structure_size) bytes to the pointer,
|
||||||
|
* and update the size accordingly. This allows us to extend query
|
||||||
|
* types without breaking userspace.
|
||||||
|
*/
|
||||||
|
__u32 size;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @pointer: user pointer to a query type struct.
|
||||||
|
*
|
||||||
|
* Pointer can be NULL, in which case, nothing is copied, but the
|
||||||
|
* actual structure size is returned. If not NULL, it must point to
|
||||||
|
* a location that's large enough to hold size bytes.
|
||||||
|
*/
|
||||||
|
__u64 pointer;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* enum drm_ethosu_bo_flags - Buffer object flags, passed at creation time.
|
||||||
|
*/
|
||||||
|
enum drm_ethosu_bo_flags {
|
||||||
|
/**
|
||||||
|
* @DRM_ETHOSU_BO_NO_MMAP: The buffer object will never be CPU-mapped
|
||||||
|
* in userspace.
|
||||||
|
*/
|
||||||
|
DRM_ETHOSU_BO_NO_MMAP = (1 << 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_bo_create - Arguments passed to DRM_IOCTL_ETHOSU_BO_CREATE.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_bo_create {
|
||||||
|
/**
|
||||||
|
* @size: Requested size for the object
|
||||||
|
*
|
||||||
|
* The (page-aligned) allocated size for the object will be returned.
|
||||||
|
*/
|
||||||
|
__u64 size;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @flags: Flags. Must be a combination of drm_ethosu_bo_flags flags.
|
||||||
|
*/
|
||||||
|
__u32 flags;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @handle: Returned handle for the object.
|
||||||
|
*
|
||||||
|
* Object handles are nonzero.
|
||||||
|
*/
|
||||||
|
__u32 handle;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_bo_mmap_offset - Arguments passed to DRM_IOCTL_ETHOSU_BO_MMAP_OFFSET.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_bo_mmap_offset {
|
||||||
|
/** @handle: Handle of the object we want an mmap offset for. */
|
||||||
|
__u32 handle;
|
||||||
|
|
||||||
|
/** @pad: MBZ. */
|
||||||
|
__u32 pad;
|
||||||
|
|
||||||
|
/** @offset: The fake offset to use for subsequent mmap calls. */
|
||||||
|
__u64 offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_wait_bo - ioctl argument for waiting for
|
||||||
|
* completion of the last DRM_ETHOSU_SUBMIT on a BO.
|
||||||
|
*
|
||||||
|
* This is useful for cases where multiple processes might be
|
||||||
|
* rendering to a BO and you want to wait for all rendering to be
|
||||||
|
* completed.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_bo_wait {
|
||||||
|
__u32 handle;
|
||||||
|
__u32 pad;
|
||||||
|
__s64 timeout_ns; /* absolute */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct drm_ethosu_cmdstream_bo_create {
|
||||||
|
/* Size of the data argument. */
|
||||||
|
__u32 size;
|
||||||
|
|
||||||
|
/* Flags, currently must be 0. */
|
||||||
|
__u32 flags;
|
||||||
|
|
||||||
|
/* Pointer to the data. */
|
||||||
|
__u64 data;
|
||||||
|
|
||||||
|
/** Returned GEM handle for the BO. */
|
||||||
|
__u32 handle;
|
||||||
|
|
||||||
|
/* Pad, must be 0. */
|
||||||
|
__u32 pad;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_job - A job to be run on the NPU
|
||||||
|
*
|
||||||
|
* The kernel will schedule the execution of this job taking into account its
|
||||||
|
* dependencies with other jobs. All tasks in the same job will be executed
|
||||||
|
* sequentially on the same core, to benefit from memory residency in SRAM.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_job {
|
||||||
|
/** Input: BO handle for cmdstream. */
|
||||||
|
__u32 cmd_bo;
|
||||||
|
|
||||||
|
/** Input: Amount of SRAM to use. */
|
||||||
|
__u32 sram_size;
|
||||||
|
|
||||||
|
#define ETHOSU_MAX_REGIONS 8
|
||||||
|
/** Input: Array of BO handles for each region. */
|
||||||
|
__u32 region_bo_handles[ETHOSU_MAX_REGIONS];
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct drm_ethosu_submit - ioctl argument for submitting commands to the NPU.
|
||||||
|
*
|
||||||
|
* The kernel will schedule the execution of these jobs in dependency order.
|
||||||
|
*/
|
||||||
|
struct drm_ethosu_submit {
|
||||||
|
/** Input: Pointer to an array of struct drm_ethosu_job. */
|
||||||
|
__u64 jobs;
|
||||||
|
|
||||||
|
/** Input: Number of jobs passed in. */
|
||||||
|
__u32 job_count;
|
||||||
|
|
||||||
|
/** Reserved, must be zero. */
|
||||||
|
__u32 pad;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DRM_IOCTL_ETHOSU() - Build a ethosu IOCTL number
|
||||||
|
* @__access: Access type. Must be R, W or RW.
|
||||||
|
* @__id: One of the DRM_ETHOSU_xxx id.
|
||||||
|
* @__type: Suffix of the type being passed to the IOCTL.
|
||||||
|
*
|
||||||
|
* Don't use this macro directly, use the DRM_IOCTL_ETHOSU_xxx
|
||||||
|
* values instead.
|
||||||
|
*
|
||||||
|
* Return: An IOCTL number to be passed to ioctl() from userspace.
|
||||||
|
*/
|
||||||
|
#define DRM_IOCTL_ETHOSU(__access, __id, __type) \
|
||||||
|
DRM_IO ## __access(DRM_COMMAND_BASE + DRM_ETHOSU_ ## __id, \
|
||||||
|
struct drm_ethosu_ ## __type)
|
||||||
|
|
||||||
|
enum {
|
||||||
|
DRM_IOCTL_ETHOSU_DEV_QUERY =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, DEV_QUERY, dev_query),
|
||||||
|
DRM_IOCTL_ETHOSU_BO_CREATE =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, BO_CREATE, bo_create),
|
||||||
|
DRM_IOCTL_ETHOSU_BO_WAIT =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, BO_WAIT, bo_wait),
|
||||||
|
DRM_IOCTL_ETHOSU_BO_MMAP_OFFSET =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, BO_MMAP_OFFSET, bo_mmap_offset),
|
||||||
|
DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, CMDSTREAM_BO_CREATE, cmdstream_bo_create),
|
||||||
|
DRM_IOCTL_ETHOSU_SUBMIT =
|
||||||
|
DRM_IOCTL_ETHOSU(WR, SUBMIT, submit),
|
||||||
|
};
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _ETHOSU_DRM_H_ */
|
||||||
|
|
@ -186,7 +186,7 @@ elif gallium_drivers.contains('all')
|
||||||
gallium_drivers = [
|
gallium_drivers = [
|
||||||
'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915',
|
'r300', 'r600', 'radeonsi', 'crocus', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'i915',
|
||||||
'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris',
|
'nouveau', 'svga', 'tegra', 'virgl', 'lima', 'panfrost', 'llvmpipe', 'softpipe', 'iris',
|
||||||
'zink', 'd3d12', 'asahi', 'rocket'
|
'zink', 'd3d12', 'asahi', 'rocket', 'ethosu'
|
||||||
]
|
]
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
@ -214,6 +214,7 @@ with_gallium_zink = gallium_drivers.contains('zink')
|
||||||
with_gallium_d3d12 = gallium_drivers.contains('d3d12')
|
with_gallium_d3d12 = gallium_drivers.contains('d3d12')
|
||||||
with_gallium_asahi = gallium_drivers.contains('asahi')
|
with_gallium_asahi = gallium_drivers.contains('asahi')
|
||||||
with_gallium_rocket = gallium_drivers.contains('rocket')
|
with_gallium_rocket = gallium_drivers.contains('rocket')
|
||||||
|
with_gallium_ethosu = gallium_drivers.contains('ethosu')
|
||||||
foreach gallium_driver : gallium_drivers
|
foreach gallium_driver : gallium_drivers
|
||||||
pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper())
|
pre_args += '-DHAVE_@0@'.format(gallium_driver.to_upper())
|
||||||
endforeach
|
endforeach
|
||||||
|
|
|
||||||
|
|
@ -86,7 +86,7 @@ option(
|
||||||
value : ['auto'],
|
value : ['auto'],
|
||||||
choices : [
|
choices : [
|
||||||
'all', 'auto',
|
'all', 'auto',
|
||||||
'asahi', 'crocus', 'd3d12', 'etnaviv', 'freedreno', 'i915', 'iris',
|
'asahi', 'crocus', 'd3d12', 'ethosu', 'etnaviv', 'freedreno', 'i915', 'iris',
|
||||||
'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
|
'lima', 'llvmpipe', 'nouveau', 'panfrost', 'r300', 'r600', 'radeonsi',
|
||||||
'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink',
|
'rocket', 'softpipe', 'svga', 'tegra', 'v3d', 'vc4', 'virgl', 'zink',
|
||||||
],
|
],
|
||||||
|
|
|
||||||
2
src/gallium/drivers/ethosu/.clang-format
Normal file
2
src/gallium/drivers/ethosu/.clang-format
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
BasedOnStyle: InheritParentConfig
|
||||||
|
DisableFormat: false
|
||||||
0
src/gallium/drivers/ethosu/ci/ethos-imx93-fails.txt
Normal file
0
src/gallium/drivers/ethosu/ci/ethos-imx93-fails.txt
Normal file
0
src/gallium/drivers/ethosu/ci/ethos-imx93-flakes.txt
Normal file
0
src/gallium/drivers/ethosu/ci/ethos-imx93-flakes.txt
Normal file
14
src/gallium/drivers/ethosu/ci/ethos-imx93-skips.txt
Normal file
14
src/gallium/drivers/ethosu/ci/ethos-imx93-skips.txt
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
Add.Op/.*
|
||||||
|
AddQuant.Op/.*
|
||||||
|
Conv2D.Op/.*
|
||||||
|
DepthwiseConv2D.Op/.*
|
||||||
|
FullyConnected.Op/.*
|
||||||
|
|
||||||
|
# Don't support unfused Pad operations yet
|
||||||
|
Models.Op/yolox_000
|
||||||
|
Models.Op/yolox_003
|
||||||
|
Models.Op/yolox_012
|
||||||
|
Models.Op/yolox_027
|
||||||
|
Models.Op/yolox_042
|
||||||
|
Models.Op/yolox_077
|
||||||
|
Models.Op/yolox_086
|
||||||
75
src/gallium/drivers/ethosu/decode.py
Normal file
75
src/gallium/drivers/ethosu/decode.py
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
#
|
||||||
|
# Copyright © 2024-2025 Tomeu Vizoso
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import struct
|
||||||
|
from gen_parser import Parser, Reg, Enum, mask, Error
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--xml', type=str, required=True)
|
||||||
|
parser.add_argument('--dump', type=str, required=True)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
p = Parser()
|
||||||
|
|
||||||
|
try:
|
||||||
|
p.parse("", args.xml)
|
||||||
|
except Error as e:
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
regs = {}
|
||||||
|
for e in p.file:
|
||||||
|
if isinstance(e, Reg):
|
||||||
|
regs[e.offset] = e
|
||||||
|
|
||||||
|
domains = {}
|
||||||
|
for e in p.file:
|
||||||
|
if isinstance(e, Enum):
|
||||||
|
if e.name == "target":
|
||||||
|
for name, val in e.values:
|
||||||
|
domains[name] = val
|
||||||
|
|
||||||
|
f = open(args.dump, mode='rb')
|
||||||
|
for i in range(0, os.path.getsize(args.dump) // 8):
|
||||||
|
cmd = f.read(8)
|
||||||
|
(offset, value, target) = struct.unpack("<hIh", cmd)
|
||||||
|
if offset in regs.keys():
|
||||||
|
reg = regs[offset]
|
||||||
|
|
||||||
|
if (target & 0xfffffffe) != domains[reg.domain]:
|
||||||
|
print("WARNING: target 0x%x doesn't match register's domain 0x%x" % (target, domains[reg.domain]))
|
||||||
|
|
||||||
|
print("EMIT(REG_%s, " % regs[offset].full_name.upper(), end="")
|
||||||
|
first = True
|
||||||
|
if value == 0 or len(reg.bitset.fields) == 1:
|
||||||
|
print("0x%x" % value, end="")
|
||||||
|
else:
|
||||||
|
for field in reg.bitset.fields:
|
||||||
|
if field.type == "boolean":
|
||||||
|
if 1 << field.high & value:
|
||||||
|
if not first:
|
||||||
|
print(" | ", end="")
|
||||||
|
print("%s_%s" % (reg.full_name.upper(), field.name.upper()), end="")
|
||||||
|
first = False
|
||||||
|
elif field.type == "uint":
|
||||||
|
field_value = (value & mask(field.low, field.high)) >> field.low
|
||||||
|
if field_value != 0:
|
||||||
|
if not first:
|
||||||
|
print(" | ", end="")
|
||||||
|
print("%s_%s(%d)" % (reg.full_name.upper(), field.name.upper(), field_value), end="")
|
||||||
|
first = False
|
||||||
|
print(");")
|
||||||
|
else:
|
||||||
|
print("%x %x %x" % (target, offset, value))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
783
src/gallium/drivers/ethosu/ethosu_cmd.c
Normal file
783
src/gallium/drivers/ethosu/ethosu_cmd.c
Normal file
|
|
@ -0,0 +1,783 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include "util/macros.h"
|
||||||
|
#include "util/u_dynarray.h"
|
||||||
|
|
||||||
|
#include "ethosu_cmd.h"
|
||||||
|
#include "ethosu_coefs.h"
|
||||||
|
#include "ethosu_ml.h"
|
||||||
|
#include "ethosu_registers.h"
|
||||||
|
#include "ethosu_sched.h"
|
||||||
|
|
||||||
|
#define MAX_BLOCKDEP 3
|
||||||
|
#define MAX_OUTSTANDING_DMA_OPS 2
|
||||||
|
#define MAX_OUTSTANDING_NPU_OPS 2
|
||||||
|
|
||||||
|
enum ethosu_op_to_scale {
|
||||||
|
OP_NONE = 0,
|
||||||
|
OP_A = 1,
|
||||||
|
OP_B = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_ensure_cmdstream(struct ethosu_subgraph *subgraph)
|
||||||
|
{
|
||||||
|
if ((subgraph->cursor - subgraph->cmdstream) < (subgraph->cmdstream_used - 2))
|
||||||
|
return;
|
||||||
|
|
||||||
|
unsigned cur_size = subgraph->cursor - subgraph->cmdstream;
|
||||||
|
subgraph->cmdstream = realloc(subgraph->cmdstream, (subgraph->cmdstream_used + 32) * sizeof(*subgraph->cmdstream));
|
||||||
|
subgraph->cursor = subgraph->cmdstream + cur_size;
|
||||||
|
subgraph->cmdstream_used += 32;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define EMIT0(cmd, param) \
|
||||||
|
do { \
|
||||||
|
ethosu_ensure_cmdstream(subgraph); \
|
||||||
|
*(subgraph->cursor++) = cmd | (((param) & 0xFFFF) << 16); \
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_MSGS)) \
|
||||||
|
fprintf(stderr, "emit0(%s, 0x%x);\n", ethosu_get_cmd_name(0, cmd), (param) & 0xFFFF); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define EMIT1(cmd, param, offset) \
|
||||||
|
do { \
|
||||||
|
ethosu_ensure_cmdstream(subgraph); \
|
||||||
|
*(subgraph->cursor++) = cmd | 0x4000 | (((param) & 0xFFFF) << 16); \
|
||||||
|
*(subgraph->cursor++) = (offset) & 0xFFFFFFFF; \
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_MSGS)) \
|
||||||
|
fprintf(stderr, "emit1(%s, 0x%x, 0x%x);\n", ethosu_get_cmd_name(1, cmd), (param) & 0xFFFF, (int)(offset)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_addresses(
|
||||||
|
struct ethosu_subgraph *subgraph,
|
||||||
|
struct ethosu_feature_map *feature_map,
|
||||||
|
uint32_t cmd_base0, uint32_t cmd_base1, uint32_t cmd_base2, uint32_t cmd_base3)
|
||||||
|
{
|
||||||
|
EMIT1(cmd_base0, 0x0, feature_map->tiles.addresses[0]);
|
||||||
|
EMIT1(cmd_base1, 0x0, feature_map->tiles.addresses[1]);
|
||||||
|
EMIT1(cmd_base2, 0x0, feature_map->tiles.addresses[2]);
|
||||||
|
EMIT1(cmd_base3, 0x0, feature_map->tiles.addresses[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_tiles(
|
||||||
|
struct ethosu_subgraph *subgraph,
|
||||||
|
struct ethosu_feature_map *feature_map,
|
||||||
|
uint32_t cmd_height0, uint32_t cmd_height1, uint32_t cmd_width0)
|
||||||
|
{
|
||||||
|
EMIT0(cmd_height0, feature_map->tiles.height_0 - 1);
|
||||||
|
EMIT0(cmd_height1, feature_map->tiles.height_1 - 1);
|
||||||
|
EMIT0(cmd_width0, feature_map->tiles.width_0 - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_strides(
|
||||||
|
struct ethosu_subgraph *subgraph,
|
||||||
|
struct ethosu_feature_map *feature_map,
|
||||||
|
uint32_t cmd_stride_c, uint32_t cmd_stride_y, uint32_t cmd_stride_x)
|
||||||
|
{
|
||||||
|
unsigned elem_size = 1;
|
||||||
|
unsigned tensor_x, tensor_y, tensor_c;
|
||||||
|
struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, feature_map->tensor_idx);
|
||||||
|
|
||||||
|
if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
|
||||||
|
tensor_x = 16 * elem_size;
|
||||||
|
tensor_c = tensor_x * tensor->shape.width;
|
||||||
|
tensor_y = elem_size * tensor->shape.width * ALIGN(tensor->shape.depth, 16);
|
||||||
|
} else {
|
||||||
|
tensor_c = elem_size;
|
||||||
|
tensor_x = tensor->shape.depth * tensor_c;
|
||||||
|
tensor_y = tensor->shape.width * tensor_x;
|
||||||
|
}
|
||||||
|
|
||||||
|
EMIT1(cmd_stride_c, 0x0, tensor_c);
|
||||||
|
EMIT1(cmd_stride_y, 0x0, tensor_y);
|
||||||
|
EMIT1(cmd_stride_x, 0x0, tensor_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_ifm(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_map)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_IFM_REGION, IO_REGION);
|
||||||
|
emit_addresses(
|
||||||
|
subgraph,
|
||||||
|
feature_map,
|
||||||
|
NPU_SET_IFM_BASE0,
|
||||||
|
NPU_SET_IFM_BASE1,
|
||||||
|
NPU_SET_IFM_BASE2,
|
||||||
|
NPU_SET_IFM_BASE3);
|
||||||
|
|
||||||
|
emit_tiles(
|
||||||
|
subgraph, feature_map, NPU_SET_IFM_HEIGHT0_M1, NPU_SET_IFM_HEIGHT1_M1, NPU_SET_IFM_WIDTH0_M1);
|
||||||
|
|
||||||
|
EMIT0(NPU_SET_IFM_DEPTH_M1, feature_map->shape.depth - 1);
|
||||||
|
emit_strides(subgraph, feature_map, NPU_SET_IFM_STRIDE_C, NPU_SET_IFM_STRIDE_Y, NPU_SET_IFM_STRIDE_X);
|
||||||
|
EMIT0(NPU_SET_IFM_ZERO_POINT, feature_map->zero_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_ifm_precision(struct ethosu_subgraph *subgraph,
|
||||||
|
struct ethosu_feature_map *feature_map,
|
||||||
|
enum ethosu_op_to_scale op_to_scale, uint32_t precision_cmd)
|
||||||
|
{
|
||||||
|
struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, feature_map->tensor_idx);
|
||||||
|
unsigned prec = 0;
|
||||||
|
|
||||||
|
if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
|
||||||
|
prec |= NPU_SET_IFM_PRECISION_FORMAT(1);
|
||||||
|
|
||||||
|
if (feature_map->is_signed)
|
||||||
|
prec |= NPU_SET_IFM_PRECISION_ACTIVATION(1); // signed activation
|
||||||
|
|
||||||
|
prec |= NPU_SET_IFM_PRECISION_SCALE_MODE(op_to_scale);
|
||||||
|
|
||||||
|
EMIT0(precision_cmd, prec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_padding(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_IFM_PAD_TOP, operation->pad.top);
|
||||||
|
EMIT0(NPU_SET_IFM_PAD_LEFT, operation->pad.left);
|
||||||
|
EMIT0(NPU_SET_IFM_PAD_BOTTOM, operation->pad.bottom);
|
||||||
|
EMIT0(NPU_SET_IFM_PAD_RIGHT, operation->pad.right);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_ofm(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_map)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_OFM_REGION, IO_REGION);
|
||||||
|
emit_addresses(
|
||||||
|
subgraph,
|
||||||
|
feature_map,
|
||||||
|
NPU_SET_OFM_BASE0,
|
||||||
|
NPU_SET_OFM_BASE1,
|
||||||
|
NPU_SET_OFM_BASE2,
|
||||||
|
NPU_SET_OFM_BASE3);
|
||||||
|
emit_tiles(
|
||||||
|
subgraph, feature_map, NPU_SET_OFM_HEIGHT0_M1, NPU_SET_OFM_HEIGHT1_M1, NPU_SET_OFM_WIDTH0_M1);
|
||||||
|
EMIT0(NPU_SET_OFM_HEIGHT_M1, feature_map->shape.height - 1);
|
||||||
|
EMIT0(NPU_SET_OFM_WIDTH_M1, feature_map->shape.width - 1);
|
||||||
|
EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1);
|
||||||
|
emit_strides(subgraph, feature_map, NPU_SET_OFM_STRIDE_C, NPU_SET_OFM_STRIDE_Y, NPU_SET_OFM_STRIDE_X);
|
||||||
|
EMIT0(NPU_SET_OFM_ZERO_POINT, feature_map->zero_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_ofm_precision(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, operation->ofm.tensor_idx);
|
||||||
|
unsigned prec = 0;
|
||||||
|
|
||||||
|
if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
|
||||||
|
prec |= NPU_SET_OFM_PRECISION_FORMAT(1);
|
||||||
|
|
||||||
|
if (operation->ofm.is_signed)
|
||||||
|
prec |= NPU_SET_OFM_PRECISION_ACTIVATION(1);
|
||||||
|
|
||||||
|
if (operation->type == ETHOSU_OPERATION_TYPE_POOLING ||
|
||||||
|
operation->type == ETHOSU_OPERATION_TYPE_ELTWISE) {
|
||||||
|
prec |= NPU_SET_OFM_PRECISION_SCALE_MODE(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
prec |= NPU_SET_OFM_PRECISION_ROUND_MODE(operation->round_mode);
|
||||||
|
|
||||||
|
EMIT0(NPU_SET_OFM_PRECISION, prec);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_KERNEL_HEIGHT_M1, operation->kernel.height - 1);
|
||||||
|
EMIT0(NPU_SET_KERNEL_WIDTH_M1, operation->kernel.width - 1);
|
||||||
|
unsigned stride = (operation->kernel.stride_x - 1) & 1;
|
||||||
|
stride |= ((operation->kernel.stride_y - 1) & 1) << 1;
|
||||||
|
stride |= ((operation->kernel.stride_x - 1) >> 1) << 6;
|
||||||
|
stride |= ((operation->kernel.stride_y - 1) >> 1) << 9;
|
||||||
|
stride |= (operation->kernel.dilation_x - 1) << 3;
|
||||||
|
stride |= (operation->kernel.dilation_y - 1) << 4;
|
||||||
|
stride |= operation->conv.part_kernel_first << 2;
|
||||||
|
EMIT0(NPU_SET_KERNEL_STRIDE, stride);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_WEIGHT_REGION, operation->conv.weights.region);
|
||||||
|
EMIT1(NPU_SET_WEIGHT_BASE, 0x0, operation->conv.weights.address);
|
||||||
|
EMIT1(NPU_SET_WEIGHT_LENGTH, 0x0, operation->conv.weights.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_SCALE_REGION, operation->conv.scales.region);
|
||||||
|
EMIT1(NPU_SET_SCALE_BASE, 0x0, operation->conv.scales.address);
|
||||||
|
EMIT1(NPU_SET_SCALE_LENGTH, 0x0, operation->conv.scales.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_activation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_ACTIVATION, 0x0);
|
||||||
|
|
||||||
|
if (operation->ofm.is_signed) {
|
||||||
|
EMIT0(NPU_SET_ACTIVATION_MIN, 0xff80);
|
||||||
|
EMIT0(NPU_SET_ACTIVATION_MAX, 0x7f);
|
||||||
|
} else {
|
||||||
|
EMIT0(NPU_SET_ACTIVATION_MIN, 0x00);
|
||||||
|
EMIT0(NPU_SET_ACTIVATION_MAX, 0xff);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_OFM_BLK_HEIGHT_M1, operation->block_config.ofm_block.height - 1);
|
||||||
|
EMIT0(NPU_SET_OFM_BLK_WIDTH_M1, operation->block_config.ofm_block.width - 1);
|
||||||
|
EMIT0(NPU_SET_OFM_BLK_DEPTH_M1, operation->block_config.ofm_block.depth - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_shram_registers(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_IFM_IB_END, operation->block_config.shram_layout.ib_end);
|
||||||
|
EMIT0(NPU_SET_AB_START, operation->block_config.shram_layout.ab_start);
|
||||||
|
|
||||||
|
if (operation->type == ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||||
|
EMIT0(NPU_SET_IFM2_IB_START, operation->block_config.shram_layout.ib_start2);
|
||||||
|
|
||||||
|
EMIT0(NPU_SET_ACC_FORMAT, operation->block_config.acc_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, enum ethosu_op_to_scale op_to_scale)
|
||||||
|
{
|
||||||
|
emit_ifm(subgraph, &operation->ifm);
|
||||||
|
emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION);
|
||||||
|
EMIT0(NPU_SET_IFM_UPSCALE, operation->upscale);
|
||||||
|
|
||||||
|
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||||
|
emit_padding(subgraph, operation);
|
||||||
|
|
||||||
|
emit_ofm(subgraph, &operation->ofm);
|
||||||
|
|
||||||
|
emit_ofm_precision(subgraph, operation);
|
||||||
|
|
||||||
|
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||||
|
emit_kernel(subgraph, operation);
|
||||||
|
|
||||||
|
if (operation->type == ETHOSU_OPERATION_TYPE_CONVOLUTION) {
|
||||||
|
emit_weights(subgraph, operation);
|
||||||
|
emit_biases(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
emit_activation(subgraph, operation);
|
||||||
|
|
||||||
|
emit_block_config(subgraph, operation);
|
||||||
|
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||||
|
emit_shram_registers(subgraph, operation);
|
||||||
|
else
|
||||||
|
EMIT0(NPU_SET_ACC_FORMAT, 0x300); // FIXME should be based on # of MACs, only works for >=256 MACs
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
ethosu_allocate_feature_map(subgraph, &operation->ifm);
|
||||||
|
operation->ifm.tiles.height_0 = operation->ifm.shape.height;
|
||||||
|
operation->ifm.tiles.height_1 = operation->ifm.shape.height;
|
||||||
|
operation->ifm.tiles.width_0 = operation->ifm.shape.width;
|
||||||
|
|
||||||
|
ethosu_allocate_feature_map(subgraph, &operation->ofm);
|
||||||
|
operation->ofm.tiles.height_0 = operation->ofm.shape.height;
|
||||||
|
operation->ofm.tiles.height_1 = operation->ofm.shape.height;
|
||||||
|
operation->ofm.tiles.width_0 = operation->ofm.shape.width;
|
||||||
|
|
||||||
|
emit_common(subgraph, operation, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
quantise_pooling_scale(unsigned nr_kernel_elements, unsigned rescale_bits, unsigned *out_shift)
|
||||||
|
{
|
||||||
|
int k = 0;
|
||||||
|
long long N = 0;
|
||||||
|
|
||||||
|
frexp(nr_kernel_elements - 1, &k);
|
||||||
|
N = 31 - rescale_bits;
|
||||||
|
*out_shift = N + k;
|
||||||
|
|
||||||
|
return ((1LL << (N + k)) + (1LL << k)) / nr_kernel_elements;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
pooling_emit_ofm_scaling(
|
||||||
|
double input1_scale,
|
||||||
|
double output_scale,
|
||||||
|
unsigned kernel_height,
|
||||||
|
unsigned kernel_width,
|
||||||
|
uint32_t *out_shift)
|
||||||
|
{
|
||||||
|
double rescale = input1_scale / output_scale;
|
||||||
|
unsigned rescale_bits = 0;
|
||||||
|
unsigned scale;
|
||||||
|
|
||||||
|
if (kernel_height == 1 && kernel_width == 1) {
|
||||||
|
if (rescale > 1.0)
|
||||||
|
rescale_bits = 32 - __builtin_clz(ceil(rescale)) + 1;
|
||||||
|
else if (rescale < 1.0)
|
||||||
|
rescale_bits = -(32 - __builtin_clz(ceil(1 / rescale))) - 1;
|
||||||
|
}
|
||||||
|
scale = quantise_pooling_scale(kernel_height * kernel_width, rescale_bits, out_shift);
|
||||||
|
scale = ceil(scale * rescale);
|
||||||
|
return scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
unsigned scale;
|
||||||
|
unsigned scale_shift;
|
||||||
|
|
||||||
|
emit_common(subgraph, operation, false);
|
||||||
|
|
||||||
|
if (operation->pooling.avg) {
|
||||||
|
scale = pooling_emit_ofm_scaling(
|
||||||
|
operation->ifm.scale,
|
||||||
|
operation->ofm.scale,
|
||||||
|
operation->kernel.height,
|
||||||
|
operation->kernel.width,
|
||||||
|
&scale_shift);
|
||||||
|
|
||||||
|
EMIT1(NPU_SET_OFM_SCALE, scale_shift, scale);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_ifm2(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, bool has_scalar)
|
||||||
|
{
|
||||||
|
if (!has_scalar) {
|
||||||
|
EMIT0(NPU_SET_IFM2_REGION, IO_REGION);
|
||||||
|
emit_addresses(subgraph, &operation->ifm2, NPU_SET_IFM2_BASE0, NPU_SET_IFM2_BASE1, NPU_SET_IFM2_BASE2, NPU_SET_IFM2_BASE3);
|
||||||
|
emit_tiles(subgraph, &operation->ifm2, NPU_SET_IFM2_HEIGHT0_M1, NPU_SET_IFM2_HEIGHT1_M1, NPU_SET_IFM2_WIDTH0_M1);
|
||||||
|
emit_strides(subgraph, &operation->ifm2, NPU_SET_IFM2_STRIDE_C, NPU_SET_IFM2_STRIDE_Y, NPU_SET_IFM2_STRIDE_X);
|
||||||
|
}
|
||||||
|
EMIT0(NPU_SET_IFM2_ZERO_POINT, operation->ifm2.zero_point);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_ifm2_broadcast(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
unsigned ifm2_broadcast = 0;
|
||||||
|
|
||||||
|
EMIT0(NPU_SET_IFM2_BROADCAST, ifm2_broadcast);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
def generate_scaling_for_elementwise(emit: CommandStreamEmitter, npu_op: NpuElementWiseOperation) -> int:
|
||||||
|
input_scale = npu_op.ifm.quantization.scale_f32 if npu_op.ifm.quantization else None
|
||||||
|
input2_scale = npu_op.ifm2.quantization.scale_f32 if npu_op.ifm2.quantization else None
|
||||||
|
output_scale = npu_op.ofm.quantization.scale_f32 if npu_op.ofm.quantization else None
|
||||||
|
|
||||||
|
if npu_op.activation is not None and npu_op.activation.op_type in (
|
||||||
|
NpuActivationOp.SIGMOID,
|
||||||
|
NpuActivationOp.TANH,
|
||||||
|
):
|
||||||
|
output_scale = 1 / 0x3000
|
||||||
|
|
||||||
|
if npu_op.sub_op_type == NpuElementWiseOp.MUL:
|
||||||
|
if npu_op.rescale:
|
||||||
|
ofm_scale, shift = npu_op.rescale
|
||||||
|
elif None in (input_scale, input2_scale, output_scale):
|
||||||
|
ofm_scale = 1
|
||||||
|
shift = 0
|
||||||
|
else:
|
||||||
|
ofm_scale, shift = scaling.elementwise_mul_scale(input_scale, input2_scale, output_scale)
|
||||||
|
else: # Add/Sub
|
||||||
|
# Default operand scaling is no scaling
|
||||||
|
opa_scale = opb_scale = 1
|
||||||
|
opa_shift = 0
|
||||||
|
bitdepth = npu_op.ifm.data_type.size_in_bits()
|
||||||
|
use_advanced_scaling = False
|
||||||
|
if npu_op.rescale is not None:
|
||||||
|
# Explicit ofm scaling
|
||||||
|
ofm_scale, shift = npu_op.rescale
|
||||||
|
elif None in (input_scale, input2_scale, output_scale):
|
||||||
|
# No ofm scaling
|
||||||
|
ofm_scale = 1
|
||||||
|
shift = 0
|
||||||
|
elif input_scale == input2_scale and bitdepth == 16:
|
||||||
|
# int16 same scaling
|
||||||
|
opa_scale, opb_scale, ofm_scale, shift = scaling.simplified_elementwise_add_sub_scale(
|
||||||
|
input_scale, input2_scale, output_scale
|
||||||
|
)
|
||||||
|
# align the double rounding with that of advanced scaling
|
||||||
|
opa_scale //= 2
|
||||||
|
opb_scale //= 2
|
||||||
|
shift -= 1
|
||||||
|
opa_shift = 0 # Unused for this case
|
||||||
|
elif input_scale == input2_scale:
|
||||||
|
# Same scaling
|
||||||
|
opa_scale, opb_scale, ofm_scale, shift = scaling.simplified_elementwise_add_sub_scale(
|
||||||
|
input_scale, input2_scale, output_scale
|
||||||
|
)
|
||||||
|
opa_shift = 0 # Unused for this case
|
||||||
|
# For 8 bit we can't guarantee double rounding with simplified scaling will always be
|
||||||
|
# the same as with advanced scaling due to different shifts. When the ofm scale fulfils
|
||||||
|
# the following we know that double rounding will have no effect for advanced scaling
|
||||||
|
# no matter the input, so we can safely use simplified scaling with double rounding disabled.
|
||||||
|
use_advanced_scaling = int(ofm_scale) & 0xFFF != 0
|
||||||
|
else:
|
||||||
|
use_advanced_scaling = True
|
||||||
|
if use_advanced_scaling:
|
||||||
|
# Use advanced implementation only when input/output scales differ,
|
||||||
|
# or when we can't guarantee the absence of rounding errors
|
||||||
|
(
|
||||||
|
opa_scale,
|
||||||
|
opa_shift,
|
||||||
|
ofm_scale,
|
||||||
|
shift,
|
||||||
|
op_to_scale,
|
||||||
|
) = scaling.advanced_elementwise_add_sub_scale(input_scale, input2_scale, output_scale, bitdepth)
|
||||||
|
opb_scale = 0 # Unused for this case
|
||||||
|
if npu_op.reversed_operands:
|
||||||
|
# If the operand order is reversed we also have to swap which operand is scaled
|
||||||
|
if op_to_scale == scaling.OperandToScale.OPa:
|
||||||
|
op_to_scale = scaling.OperandToScale.OPb
|
||||||
|
else:
|
||||||
|
op_to_scale = scaling.OperandToScale.OPa
|
||||||
|
emit.cmd1_with_offset(cmd1.NPU_SET_OPA_SCALE, opa_scale, opa_shift)
|
||||||
|
emit.cmd1_with_offset(cmd1.NPU_SET_OPB_SCALE, opb_scale)
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
simplified_elementwise_add_sub_scale(
|
||||||
|
double input1_scale,
|
||||||
|
double input2_scale,
|
||||||
|
double output_scale,
|
||||||
|
uint32_t input_shift,
|
||||||
|
double *out_input1_rescale,
|
||||||
|
double *out_input2_rescale,
|
||||||
|
uint32_t *out_out_scale,
|
||||||
|
uint32_t *out_out_shift)
|
||||||
|
{
|
||||||
|
double max_input_scale = MAX2(input1_scale, input2_scale);
|
||||||
|
double input_shift_val = (double)(1LL << input_shift); /* Use 1LL for large shifts */
|
||||||
|
|
||||||
|
*out_input1_rescale = input1_scale * input_shift_val / (2.0 * max_input_scale);
|
||||||
|
*out_input2_rescale = input2_scale * input_shift_val / (2.0 * max_input_scale);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Be careful with division by zero or very small output_scale if output_scale
|
||||||
|
* can be zero or close to zero.
|
||||||
|
*/
|
||||||
|
double output_rescale_val;
|
||||||
|
if (output_scale == 0.0) {
|
||||||
|
/* Handle error or return specific value */
|
||||||
|
output_rescale_val = 0.0; /* Or INFINITY, depending on desired behavior */
|
||||||
|
} else {
|
||||||
|
output_rescale_val = (2.0 * max_input_scale) / (output_scale * input_shift_val);
|
||||||
|
}
|
||||||
|
|
||||||
|
*out_out_scale = ethosu_quantize_scale(output_rescale_val, out_out_shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
static enum ethosu_op_to_scale
|
||||||
|
eltwise_emit_ofm_scaling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
double max_input_scale = MAX2(operation->ifm.scale, operation->ifm2.scale);
|
||||||
|
double min_input_scale = MIN2(operation->ifm.scale, operation->ifm2.scale);
|
||||||
|
unsigned bitdepth = 8;
|
||||||
|
uint32_t input_shift = (bitdepth == 8) ? 20 : 15;
|
||||||
|
double input1_rescale_tmp;
|
||||||
|
double input2_rescale_tmp;
|
||||||
|
unsigned ofm_scale, ofm_shift;
|
||||||
|
unsigned opa_scale, opa_shift;
|
||||||
|
|
||||||
|
simplified_elementwise_add_sub_scale(
|
||||||
|
min_input_scale, max_input_scale, operation->ofm.scale, input_shift,
|
||||||
|
&input1_rescale_tmp, &input2_rescale_tmp,
|
||||||
|
&ofm_scale, &ofm_shift);
|
||||||
|
|
||||||
|
opa_scale = ethosu_quantize_scale(input1_rescale_tmp, &opa_shift);
|
||||||
|
|
||||||
|
EMIT1(NPU_SET_OPA_SCALE, opa_shift, opa_scale);
|
||||||
|
EMIT1(NPU_SET_OPB_SCALE, 0x0, 0x0);
|
||||||
|
EMIT1(NPU_SET_OFM_SCALE, ofm_shift, ofm_scale);
|
||||||
|
|
||||||
|
if (operation->ifm.scale < operation->ifm2.scale)
|
||||||
|
return OP_A;
|
||||||
|
else
|
||||||
|
return OP_B;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
bool has_scalar = false;
|
||||||
|
enum ethosu_op_to_scale op_to_scale = OP_NONE;
|
||||||
|
|
||||||
|
op_to_scale = eltwise_emit_ofm_scaling(subgraph, operation);
|
||||||
|
|
||||||
|
emit_common(subgraph, operation, op_to_scale);
|
||||||
|
|
||||||
|
emit_ifm2(subgraph, operation, has_scalar);
|
||||||
|
emit_ifm_precision(subgraph, &operation->ifm2, OP_NONE, NPU_SET_IFM2_PRECISION);
|
||||||
|
emit_ifm2_broadcast(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_dma(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
EMIT0(NPU_SET_DMA0_SRC_REGION, COEFS_REGION);
|
||||||
|
EMIT1(NPU_SET_DMA0_SRC, 0x0, operation->dma.address);
|
||||||
|
EMIT0(NPU_SET_DMA0_DST_REGION, SCRATCH_REGION);
|
||||||
|
EMIT1(NPU_SET_DMA0_DST, 0x0, 0x0);
|
||||||
|
EMIT1(NPU_SET_DMA0_LEN, 0x0, operation->dma.size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_operation_code(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
switch (operation->type) {
|
||||||
|
case ETHOSU_OPERATION_TYPE_CONVOLUTION:
|
||||||
|
|
||||||
|
if (operation->conv.depthwise)
|
||||||
|
EMIT0(NPU_OP_DEPTHWISE, 0x0);
|
||||||
|
else
|
||||||
|
EMIT0(NPU_OP_CONV, 0x0);
|
||||||
|
|
||||||
|
break;
|
||||||
|
case ETHOSU_OPERATION_TYPE_POOLING:
|
||||||
|
EMIT0(NPU_OP_POOL, operation->pooling.avg);
|
||||||
|
break;
|
||||||
|
case ETHOSU_OPERATION_TYPE_ELTWISE:
|
||||||
|
EMIT0(NPU_OP_ELEMENTWISE, 0x1);
|
||||||
|
break;
|
||||||
|
case ETHOSU_OPERATION_TYPE_DMA:
|
||||||
|
EMIT0(NPU_OP_DMA_START, 0x0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
emit_cmd_waits(struct ethosu_subgraph *subgraph, int npu_waits, int dma_waits)
|
||||||
|
{
|
||||||
|
if (npu_waits >= 0)
|
||||||
|
EMIT0(NPU_OP_KERNEL_WAIT, npu_waits);
|
||||||
|
|
||||||
|
if (dma_waits >= 0)
|
||||||
|
EMIT0(NPU_OP_DMA_WAIT, dma_waits);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
ethosu_intersects_accesses(struct ethosu_address_range *a, struct ethosu_address_range *b)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < MAX_MEMORY_ACCESSES; i++) {
|
||||||
|
for (int j = 0; j < MAX_MEMORY_ACCESSES; j++) {
|
||||||
|
if (a[i].size == 0 || b[j].size == 0)
|
||||||
|
continue;
|
||||||
|
if (a[i].region != b[j].region)
|
||||||
|
continue;
|
||||||
|
if (a[i].address < b[j].address + b[j].size &&
|
||||||
|
b[j].address < a[i].address + a[i].size)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
ethosu_operations_conflict(struct ethosu_subgraph *subgraph,
|
||||||
|
struct ethosu_operation *op1, struct ethosu_operation *op2)
|
||||||
|
{
|
||||||
|
/* True dependencies, or write -> read */
|
||||||
|
if (ethosu_intersects_accesses(op1->write_accesses, op2->read_accesses))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Anti-dependencies, or read -> write */
|
||||||
|
if (ethosu_intersects_accesses(op1->read_accesses, op2->write_accesses))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* Output dependencies, or write -> write */
|
||||||
|
if (ethosu_intersects_accesses(op1->write_accesses, op2->write_accesses))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* read -> read does not cause a conflict */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
get_wait_dependency(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation,
|
||||||
|
struct util_dynarray *outstanding_dma_ops,
|
||||||
|
struct util_dynarray *outstanding_npu_ops,
|
||||||
|
int *npu_waits, int *dma_waits)
|
||||||
|
{
|
||||||
|
unsigned kern_wait = -1;
|
||||||
|
unsigned dma_wait = -1;
|
||||||
|
struct util_dynarray *outstanding_ops = NULL;
|
||||||
|
|
||||||
|
if (operation->type == ETHOSU_OPERATION_TYPE_DMA) {
|
||||||
|
outstanding_ops = outstanding_npu_ops;
|
||||||
|
|
||||||
|
util_dynarray_append(outstanding_dma_ops, struct ethosu_operation *, operation);
|
||||||
|
|
||||||
|
unsigned dmap_ops = util_dynarray_num_elements(outstanding_dma_ops, struct ethosu_operation *);
|
||||||
|
if (dmap_ops > MAX_OUTSTANDING_DMA_OPS)
|
||||||
|
(void)util_dynarray_pop(outstanding_dma_ops, struct ethosu_operation *);
|
||||||
|
} else {
|
||||||
|
outstanding_ops = outstanding_dma_ops;
|
||||||
|
|
||||||
|
util_dynarray_append(outstanding_npu_ops, struct ethosu_operation *, operation);
|
||||||
|
|
||||||
|
unsigned npu_ops = util_dynarray_num_elements(outstanding_npu_ops, struct ethosu_operation *);
|
||||||
|
if (npu_ops > MAX_OUTSTANDING_NPU_OPS)
|
||||||
|
(void)util_dynarray_pop(outstanding_npu_ops, struct ethosu_operation *);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned waits = -1;
|
||||||
|
for (int idx = util_dynarray_num_elements(outstanding_ops, struct ethosu_operation *) - 1; idx >= 0; idx--) {
|
||||||
|
waits += 1;
|
||||||
|
struct ethosu_operation *other_op = *util_dynarray_element(outstanding_ops, struct ethosu_operation *, idx);
|
||||||
|
if (other_op == operation)
|
||||||
|
continue;
|
||||||
|
if (ethosu_operations_conflict(subgraph, other_op, operation)) {
|
||||||
|
if (operation->type == ETHOSU_OPERATION_TYPE_DMA)
|
||||||
|
kern_wait = waits;
|
||||||
|
else
|
||||||
|
dma_wait = waits;
|
||||||
|
// Current op needs to wait, and after it has waited,
|
||||||
|
// outstanding_ops[0..idx] are not outstanding any longer.
|
||||||
|
for (int i = 0; i <= idx; i++)
|
||||||
|
(void)util_dynarray_pop(outstanding_ops, struct ethosu_operation *);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*npu_waits = kern_wait;
|
||||||
|
*dma_waits = dma_wait;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
fill_memory_accesses(struct ethosu_subgraph *subgraph)
|
||||||
|
{
|
||||||
|
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
|
||||||
|
switch (operation->type) {
|
||||||
|
case ETHOSU_OPERATION_TYPE_DMA:
|
||||||
|
operation->read_accesses[0].region = COEFS_REGION;
|
||||||
|
operation->read_accesses[0].address = operation->dma.address;
|
||||||
|
operation->read_accesses[0].size = operation->dma.size;
|
||||||
|
|
||||||
|
operation->write_accesses[0].region = SCRATCH_REGION;
|
||||||
|
operation->write_accesses[0].address = 0x0;
|
||||||
|
operation->write_accesses[0].size = operation->dma.size;
|
||||||
|
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
operation->read_accesses[0].region = IO_REGION;
|
||||||
|
operation->read_accesses[0].address = operation->ifm.tiles.addresses[0];
|
||||||
|
operation->read_accesses[0].size = operation->ifm.shape.height * operation->ifm.shape.width * operation->ifm.shape.depth;
|
||||||
|
|
||||||
|
operation->read_accesses[1].region = IO_REGION;
|
||||||
|
operation->read_accesses[1].address = operation->ifm2.tiles.addresses[0];
|
||||||
|
operation->read_accesses[1].size = operation->ifm2.shape.height * operation->ifm2.shape.width * operation->ifm2.shape.depth;
|
||||||
|
|
||||||
|
operation->read_accesses[2].region = operation->conv.scales.region;
|
||||||
|
operation->read_accesses[2].address = operation->conv.scales.address;
|
||||||
|
operation->read_accesses[2].size = operation->conv.scales.size;
|
||||||
|
|
||||||
|
operation->read_accesses[3].region = operation->conv.weights.region;
|
||||||
|
operation->read_accesses[3].address = operation->conv.weights.address;
|
||||||
|
operation->read_accesses[3].size = operation->conv.weights.size;
|
||||||
|
|
||||||
|
operation->write_accesses[0].region = IO_REGION;
|
||||||
|
operation->write_accesses[0].address = operation->ofm.tiles.addresses[0];
|
||||||
|
operation->write_accesses[0].size = operation->ofm.shape.height * operation->ofm.shape.width * operation->ofm.shape.depth;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
if (!prev_op)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
// Check if the reserved shram will be used in current/prev op
|
||||||
|
bool prev_uses_lut = false; // prev_op->activation && prev_op->activation->op_type == NpuActivationOp.TABLE_LOOKUP;
|
||||||
|
bool curr_uses_lut = false; // operation->activation && operation->activation->op_type == NpuActivationOp.TABLE_LOOKUP;
|
||||||
|
if (prev_uses_lut && SHRAM_RESERVED_UNUSED_BANKS == 0 && !curr_uses_lut)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return MAX_BLOCKDEP; /* TODO: Check if there is actually overlap between the FMs */
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph)
|
||||||
|
{
|
||||||
|
struct ethosu_operation *prev_op = NULL;
|
||||||
|
struct util_dynarray outstanding_dma_ops;
|
||||||
|
struct util_dynarray outstanding_npu_ops;
|
||||||
|
|
||||||
|
util_dynarray_init(&outstanding_dma_ops, NULL);
|
||||||
|
util_dynarray_init(&outstanding_npu_ops, NULL);
|
||||||
|
|
||||||
|
subgraph->cmdstream_used = 32;
|
||||||
|
subgraph->cmdstream = calloc(subgraph->cmdstream_used, sizeof(*subgraph->cmdstream));
|
||||||
|
subgraph->cursor = subgraph->cmdstream;
|
||||||
|
|
||||||
|
fill_memory_accesses(subgraph);
|
||||||
|
|
||||||
|
/* Compile */
|
||||||
|
|
||||||
|
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||||
|
EMIT0(NPU_SET_PARALLEL_MODE, 0x0);
|
||||||
|
|
||||||
|
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
|
||||||
|
|
||||||
|
int npu_waits, dma_waits;
|
||||||
|
|
||||||
|
get_wait_dependency(subgraph, operation, &outstanding_dma_ops, &outstanding_npu_ops,
|
||||||
|
&npu_waits, &dma_waits);
|
||||||
|
|
||||||
|
switch (operation->type) {
|
||||||
|
case ETHOSU_OPERATION_TYPE_CONVOLUTION:
|
||||||
|
emit_convolution(subgraph, operation);
|
||||||
|
break;
|
||||||
|
case ETHOSU_OPERATION_TYPE_POOLING:
|
||||||
|
emit_pooling(subgraph, operation);
|
||||||
|
break;
|
||||||
|
case ETHOSU_OPERATION_TYPE_ELTWISE:
|
||||||
|
emit_eltwise(subgraph, operation);
|
||||||
|
break;
|
||||||
|
case ETHOSU_OPERATION_TYPE_DMA:
|
||||||
|
emit_dma(subgraph, operation);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (operation->type != ETHOSU_OPERATION_TYPE_DMA) {
|
||||||
|
unsigned blockdep = calc_blockdep(subgraph, prev_op, operation);
|
||||||
|
blockdep = MIN2(blockdep, MAX_BLOCKDEP);
|
||||||
|
EMIT0(NPU_SET_BLOCKDEP, blockdep);
|
||||||
|
|
||||||
|
prev_op = operation;
|
||||||
|
}
|
||||||
|
|
||||||
|
emit_cmd_waits(subgraph, npu_waits, dma_waits);
|
||||||
|
emit_operation_code(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
EMIT0(NPU_OP_STOP, 0xffff);
|
||||||
|
|
||||||
|
util_dynarray_fini(&outstanding_dma_ops);
|
||||||
|
util_dynarray_fini(&outstanding_npu_ops);
|
||||||
|
}
|
||||||
13
src/gallium/drivers/ethosu/ethosu_cmd.h
Normal file
13
src/gallium/drivers/ethosu/ethosu_cmd.h
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ETHOSU_CMD_H
|
||||||
|
#define ETHOSU_CMD_H
|
||||||
|
|
||||||
|
#include "ethosu_ml.h"
|
||||||
|
|
||||||
|
void ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph);
|
||||||
|
|
||||||
|
#endif /* ETHOSU_CMD_H */
|
||||||
133
src/gallium/drivers/ethosu/ethosu_coefs.c
Normal file
133
src/gallium/drivers/ethosu/ethosu_coefs.c
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "util/u_inlines.h"
|
||||||
|
|
||||||
|
#include "mlw_codec/mlw_encode.h"
|
||||||
|
#include "ethosu_coefs.h"
|
||||||
|
|
||||||
|
static void
|
||||||
|
fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, uint8_t **scales, long *scales_size, struct pipe_resource *bias_rsrc)
|
||||||
|
{
|
||||||
|
struct pipe_transfer *transfer_in;
|
||||||
|
int32_t *biases = pipe_buffer_map(subgraph->base.context, bias_rsrc,
|
||||||
|
PIPE_MAP_READ, &transfer_in);
|
||||||
|
unsigned idx = 0;
|
||||||
|
|
||||||
|
*scales_size = ALIGN(operation->ofm.shape.depth * 10, 16);
|
||||||
|
*scales = malloc(*scales_size);
|
||||||
|
memset(*scales, 0, *scales_size);
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < operation->ofm.shape.depth; i++) {
|
||||||
|
uint64_t bias = biases[i];
|
||||||
|
double conv_scale = ((double)operation->ifm.scale * (double)operation->kernel.scale) / (double)operation->ofm.scale;
|
||||||
|
uint32_t shift;
|
||||||
|
int scale = ethosu_quantize_scale(conv_scale, &shift);
|
||||||
|
|
||||||
|
(*scales)[idx++] = (bias >> (0 * 8)) & 0xFF;
|
||||||
|
(*scales)[idx++] = (bias >> (1 * 8)) & 0xFF;
|
||||||
|
(*scales)[idx++] = (bias >> (2 * 8)) & 0xFF;
|
||||||
|
(*scales)[idx++] = (bias >> (3 * 8)) & 0xFF;
|
||||||
|
(*scales)[idx++] = (bias >> (4 * 8)) & 0xFF;
|
||||||
|
|
||||||
|
(*scales)[idx++] = (scale >> (0 * 8)) & 0xFF;
|
||||||
|
(*scales)[idx++] = (scale >> (1 * 8)) & 0xFF;
|
||||||
|
(*scales)[idx++] = (scale >> (2 * 8)) & 0xFF;
|
||||||
|
(*scales)[idx++] = (scale >> (3 * 8)) & 0xFF;
|
||||||
|
|
||||||
|
(*scales)[idx++] = shift & 0x3F;
|
||||||
|
}
|
||||||
|
|
||||||
|
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
calculate_weights_strides(struct ethosu_operation *operation, int out_strides[4])
|
||||||
|
{
|
||||||
|
if (operation->kernel.depthwise) {
|
||||||
|
out_strides[0] = 1;
|
||||||
|
out_strides[1] = operation->ofm.shape.depth * operation->kernel.height;
|
||||||
|
out_strides[2] = operation->ofm.shape.depth;
|
||||||
|
out_strides[3] = operation->ofm.shape.depth * operation->kernel.width;
|
||||||
|
} else {
|
||||||
|
out_strides[3] = 1;
|
||||||
|
out_strides[2] = out_strides[3] * operation->ifm.shape.depth;
|
||||||
|
out_strides[1] = out_strides[2] * operation->kernel.width;
|
||||||
|
out_strides[0] = out_strides[1] * operation->kernel.height;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
fill_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, uint8_t **weights, long *weights_size, struct pipe_resource *weight_rsrc)
|
||||||
|
{
|
||||||
|
int brick_strides[4] = {0};
|
||||||
|
unsigned input_channels = operation->ifm.shape.depth;
|
||||||
|
|
||||||
|
if (operation->kernel.depthwise)
|
||||||
|
input_channels = 1;
|
||||||
|
|
||||||
|
calculate_weights_strides(operation, brick_strides);
|
||||||
|
|
||||||
|
struct pipe_transfer *transfer_in;
|
||||||
|
uint8_t *input_weights_8 = pipe_buffer_map(subgraph->base.context, weight_rsrc,
|
||||||
|
PIPE_MAP_READ, &transfer_in);
|
||||||
|
int16_t *input_weights = malloc(pipe_buffer_size(weight_rsrc) * sizeof(*input_weights));
|
||||||
|
for (int i = 0; i < pipe_buffer_size(weight_rsrc); i++) {
|
||||||
|
if (operation->kernel.is_signed)
|
||||||
|
input_weights[i] = (int8_t)input_weights_8[i] - operation->kernel.zero_point;
|
||||||
|
else
|
||||||
|
input_weights[i] = input_weights_8[i] - operation->kernel.zero_point;
|
||||||
|
}
|
||||||
|
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||||
|
|
||||||
|
long padded_size = 0;
|
||||||
|
*weights_size = mlw_reorder_encode(
|
||||||
|
IFM_UBLOCK.depth,
|
||||||
|
OFM_UBLOCK.depth,
|
||||||
|
operation->ofm.shape.depth,
|
||||||
|
operation->kernel.height,
|
||||||
|
operation->kernel.width,
|
||||||
|
input_channels,
|
||||||
|
brick_strides,
|
||||||
|
input_weights,
|
||||||
|
operation->block_config.ofm_block.depth,
|
||||||
|
operation->kernel.depthwise,
|
||||||
|
operation->conv.part_kernel_first,
|
||||||
|
8 /* ifm_bitdepth */,
|
||||||
|
8 /* decomp_h */,
|
||||||
|
8 /* decomp_w */,
|
||||||
|
weights,
|
||||||
|
&padded_size,
|
||||||
|
DBG_ENABLED(ETHOSU_DBG_MSGS));
|
||||||
|
|
||||||
|
free(input_weights);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fill_coefs(struct ethosu_subgraph *subgraph,
|
||||||
|
struct ethosu_operation *operation,
|
||||||
|
struct pipe_resource *bias_rsrc,
|
||||||
|
struct pipe_resource *weight_rsrc)
|
||||||
|
{
|
||||||
|
uint8_t *scales = NULL;
|
||||||
|
fill_scale_and_biases(subgraph, operation, &scales, &operation->conv.scales.size, bias_rsrc);
|
||||||
|
|
||||||
|
operation->conv.scales.region = COEFS_REGION;
|
||||||
|
operation->conv.scales.address = subgraph->coefs_used;
|
||||||
|
subgraph->coefs_used += ALIGN_POT(operation->conv.scales.size, 16);
|
||||||
|
subgraph->coefs = realloc(subgraph->coefs, subgraph->coefs_used);
|
||||||
|
memcpy(subgraph->coefs + operation->conv.scales.address, scales, operation->conv.scales.size);
|
||||||
|
free(scales);
|
||||||
|
|
||||||
|
uint8_t *weights = NULL;
|
||||||
|
fill_weights(subgraph, operation, &weights, &operation->conv.weights.size, weight_rsrc);
|
||||||
|
|
||||||
|
operation->conv.weights.region = COEFS_REGION;
|
||||||
|
operation->conv.weights.address = subgraph->coefs_used;
|
||||||
|
subgraph->coefs_used += ALIGN_POT(operation->conv.weights.size, 16);
|
||||||
|
subgraph->coefs = realloc(subgraph->coefs, subgraph->coefs_used);
|
||||||
|
memcpy(subgraph->coefs + operation->conv.weights.address, weights, operation->conv.weights.size);
|
||||||
|
free(weights);
|
||||||
|
}
|
||||||
17
src/gallium/drivers/ethosu/ethosu_coefs.h
Normal file
17
src/gallium/drivers/ethosu/ethosu_coefs.h
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ETHOSU_COEFS_H
|
||||||
|
#define ETHOSU_COEFS_H
|
||||||
|
|
||||||
|
#include "ethosu_ml.h"
|
||||||
|
|
||||||
|
void
|
||||||
|
fill_coefs(struct ethosu_subgraph *subgraph,
|
||||||
|
struct ethosu_operation *operation,
|
||||||
|
struct pipe_resource *bias_rsrc,
|
||||||
|
struct pipe_resource *weight_rsrc);
|
||||||
|
|
||||||
|
#endif /* ETHOSU_COEFS_H */
|
||||||
243
src/gallium/drivers/ethosu/ethosu_device.c
Normal file
243
src/gallium/drivers/ethosu/ethosu_device.c
Normal file
|
|
@ -0,0 +1,243 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ethosu_device.h"
|
||||||
|
#include "ethosu_ml.h"
|
||||||
|
|
||||||
|
#include "drm-uapi/ethosu_accel.h"
|
||||||
|
|
||||||
|
#include <xf86drm.h>
|
||||||
|
#include "util/os_mman.h"
|
||||||
|
#include "util/u_inlines.h"
|
||||||
|
#include "util/u_surface.h"
|
||||||
|
#include "util/u_transfer.h"
|
||||||
|
|
||||||
|
static const struct debug_named_value ethosu_debug_options[] = {
|
||||||
|
{"dbg_msgs", ETHOSU_DBG_MSGS, "Print debug messages"},
|
||||||
|
{"dump_bos", ETHOSU_DBG_DUMP_BOS, "Dump buffers for analysis"},
|
||||||
|
{"zero_bos", ETHOSU_DBG_ZERO, "Zero buffers for debugging"},
|
||||||
|
{"disable_nhcwb16", ETHOSU_DBG_DISABLE_NHCWB16, "Disable NHCWB16"},
|
||||||
|
{"disable_sram", ETHOSU_DBG_DISABLE_SRAM, "Disable SRAM"},
|
||||||
|
DEBUG_NAMED_VALUE_END};
|
||||||
|
|
||||||
|
DEBUG_GET_ONCE_FLAGS_OPTION(ethosu_debug, "ETHOSU_DEBUG", ethosu_debug_options, 0)
|
||||||
|
int ethosu_debug = 0;
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_destroy_screen(struct pipe_screen *pscreen)
|
||||||
|
{
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pscreen);
|
||||||
|
|
||||||
|
ralloc_free(screen);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_destroy_context(struct pipe_context *pctx)
|
||||||
|
{
|
||||||
|
struct ethosu_context *ctx = ethosu_context(pctx);
|
||||||
|
|
||||||
|
ralloc_free(ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *
|
||||||
|
ethosu_buffer_map(struct pipe_context *pctx,
|
||||||
|
struct pipe_resource *prsc, unsigned level,
|
||||||
|
unsigned usage, const struct pipe_box *box,
|
||||||
|
struct pipe_transfer **out_transfer)
|
||||||
|
{
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pctx->screen);
|
||||||
|
struct ethosu_resource *rsc = ethosu_resource(prsc);
|
||||||
|
struct drm_ethosu_bo_wait bo_wait = {0};
|
||||||
|
struct drm_ethosu_bo_mmap_offset bo_mmap_offset = {0};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
assert(level == 0);
|
||||||
|
assert(prsc->target == PIPE_BUFFER);
|
||||||
|
assert(box->y == 0);
|
||||||
|
assert(box->z == 0);
|
||||||
|
assert(box->height == 1);
|
||||||
|
assert(box->depth == 1);
|
||||||
|
|
||||||
|
struct pipe_transfer *transfer = rzalloc(NULL, struct pipe_transfer);
|
||||||
|
transfer->level = level;
|
||||||
|
transfer->usage = usage;
|
||||||
|
transfer->box = *box;
|
||||||
|
|
||||||
|
pipe_resource_reference(&transfer->resource, prsc);
|
||||||
|
|
||||||
|
bo_wait.handle = rsc->handle;
|
||||||
|
bo_wait.timeout_ns = INT64_MAX;
|
||||||
|
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_BO_WAIT, &bo_wait);
|
||||||
|
if (ret == -1)
|
||||||
|
goto free_transfer;
|
||||||
|
|
||||||
|
bo_mmap_offset.handle = rsc->handle;
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_BO_MMAP_OFFSET, &bo_mmap_offset);
|
||||||
|
if (ret == -1)
|
||||||
|
goto free_transfer;
|
||||||
|
|
||||||
|
uint8_t *map = os_mmap(NULL, prsc->width0, PROT_READ | PROT_WRITE, MAP_SHARED,
|
||||||
|
screen->fd, bo_mmap_offset.offset);
|
||||||
|
assert(map != MAP_FAILED);
|
||||||
|
if (map == MAP_FAILED)
|
||||||
|
goto free_transfer;
|
||||||
|
|
||||||
|
*out_transfer = transfer;
|
||||||
|
|
||||||
|
return map + box->x;
|
||||||
|
|
||||||
|
free_transfer:
|
||||||
|
pipe_resource_reference(&transfer->resource, NULL);
|
||||||
|
ralloc_free(transfer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_buffer_unmap(struct pipe_context *pctx,
|
||||||
|
struct pipe_transfer *transfer)
|
||||||
|
{
|
||||||
|
pipe_resource_reference(&transfer->resource, NULL);
|
||||||
|
ralloc_free(transfer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pipe_context *
|
||||||
|
ethosu_create_context(struct pipe_screen *screen,
|
||||||
|
void *priv, unsigned flags)
|
||||||
|
{
|
||||||
|
struct ethosu_context *ctx = rzalloc(NULL, struct ethosu_context);
|
||||||
|
struct pipe_context *pctx = &ctx->base;
|
||||||
|
|
||||||
|
if (!ctx)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
pctx->screen = screen;
|
||||||
|
pctx->priv = priv;
|
||||||
|
|
||||||
|
pctx->destroy = ethosu_destroy_context;
|
||||||
|
|
||||||
|
pctx->buffer_map = ethosu_buffer_map;
|
||||||
|
pctx->buffer_unmap = ethosu_buffer_unmap;
|
||||||
|
pctx->resource_copy_region = util_resource_copy_region;
|
||||||
|
pctx->buffer_subdata = u_default_buffer_subdata;
|
||||||
|
pctx->clear_buffer = u_default_clear_buffer;
|
||||||
|
|
||||||
|
pctx->ml_operation_supported = ethosu_ml_operation_supported;
|
||||||
|
pctx->ml_subgraph_create = ethosu_ml_subgraph_create;
|
||||||
|
pctx->ml_subgraph_invoke = ethosu_ml_subgraph_invoke;
|
||||||
|
pctx->ml_subgraph_read_output = ethosu_ml_subgraph_read_outputs;
|
||||||
|
pctx->ml_subgraph_destroy = ethosu_ml_subgraph_destroy;
|
||||||
|
|
||||||
|
return pctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct pipe_resource *
|
||||||
|
ethosu_resource_create(struct pipe_screen *pscreen,
|
||||||
|
const struct pipe_resource *templat)
|
||||||
|
{
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pscreen);
|
||||||
|
struct drm_ethosu_bo_create arg = {0};
|
||||||
|
struct ethosu_resource *rsc;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
assert(templat->target == PIPE_BUFFER);
|
||||||
|
assert(templat->height0 == 1);
|
||||||
|
assert(templat->depth0 == 1);
|
||||||
|
assert(templat->array_size == 1);
|
||||||
|
|
||||||
|
rsc = rzalloc(NULL, struct ethosu_resource);
|
||||||
|
if (!rsc)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
rsc->base = *templat;
|
||||||
|
rsc->base.screen = pscreen;
|
||||||
|
rsc->base.nr_samples = templat->nr_samples;
|
||||||
|
pipe_reference_init(&rsc->base.reference, 1);
|
||||||
|
|
||||||
|
rsc->bo_size = templat->width0;
|
||||||
|
|
||||||
|
arg.size = templat->width0;
|
||||||
|
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_BO_CREATE, &arg);
|
||||||
|
if (ret < 0)
|
||||||
|
goto free_rsc;
|
||||||
|
|
||||||
|
rsc->handle = arg.handle;
|
||||||
|
|
||||||
|
return &rsc->base;
|
||||||
|
|
||||||
|
free_rsc:
|
||||||
|
ralloc_free(rsc);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_resource_destroy(struct pipe_screen *pscreen,
|
||||||
|
struct pipe_resource *prsc)
|
||||||
|
{
|
||||||
|
struct ethosu_resource *rsc = ethosu_resource(prsc);
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pscreen);
|
||||||
|
struct drm_gem_close arg = {0};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
arg.handle = rsc->handle;
|
||||||
|
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
|
||||||
|
assert(ret >= 0);
|
||||||
|
|
||||||
|
ralloc_free(rsc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
ethosu_screen_get_fd(struct pipe_screen *pscreen)
|
||||||
|
{
|
||||||
|
return ethosu_screen(pscreen)->fd;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dev_query(struct ethosu_screen *screen)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct drm_ethosu_npu_info *info = &screen->info;
|
||||||
|
struct drm_ethosu_dev_query dev_query = {
|
||||||
|
.type = DRM_ETHOSU_DEV_QUERY_NPU_INFO,
|
||||||
|
.size = sizeof(*info),
|
||||||
|
.pointer = (uintptr_t)info,
|
||||||
|
};
|
||||||
|
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_DEV_QUERY, &dev_query);
|
||||||
|
assert(ret != -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_screen *
|
||||||
|
ethosu_screen_create(int fd,
|
||||||
|
const struct pipe_screen_config *config,
|
||||||
|
struct renderonly *ro)
|
||||||
|
{
|
||||||
|
struct ethosu_screen *ethosu_screen;
|
||||||
|
struct pipe_screen *screen;
|
||||||
|
|
||||||
|
ethosu_screen = rzalloc(NULL, struct ethosu_screen);
|
||||||
|
if (!ethosu_screen)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
screen = ðosu_screen->pscreen;
|
||||||
|
|
||||||
|
ethosu_debug = debug_get_option_ethosu_debug();
|
||||||
|
|
||||||
|
ethosu_screen->fd = fd;
|
||||||
|
dev_query(ethosu_screen);
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DISABLE_SRAM))
|
||||||
|
ethosu_screen->info.sram_size = 0;
|
||||||
|
|
||||||
|
screen->get_screen_fd = ethosu_screen_get_fd;
|
||||||
|
screen->destroy = ethosu_destroy_screen;
|
||||||
|
screen->context_create = ethosu_create_context;
|
||||||
|
screen->resource_create = ethosu_resource_create;
|
||||||
|
screen->resource_destroy = ethosu_resource_destroy;
|
||||||
|
|
||||||
|
return screen;
|
||||||
|
}
|
||||||
84
src/gallium/drivers/ethosu/ethosu_device.h
Normal file
84
src/gallium/drivers/ethosu/ethosu_device.h
Normal file
|
|
@ -0,0 +1,84 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "pipe/p_context.h"
|
||||||
|
#include "pipe/p_screen.h"
|
||||||
|
#include "pipe/p_state.h"
|
||||||
|
#include "renderonly/renderonly.h"
|
||||||
|
#include "util/log.h"
|
||||||
|
|
||||||
|
#include "drm-uapi/ethosu_accel.h"
|
||||||
|
|
||||||
|
#ifndef ETHOSU_SCREEN_H
|
||||||
|
#define ETHOSU_SCREEN_H
|
||||||
|
|
||||||
|
enum ethosu_dbg {
|
||||||
|
ETHOSU_DBG_MSGS = BITFIELD_BIT(0),
|
||||||
|
ETHOSU_DBG_DUMP_BOS = BITFIELD_BIT(1),
|
||||||
|
ETHOSU_DBG_ZERO = BITFIELD_BIT(2),
|
||||||
|
ETHOSU_DBG_DISABLE_NHCWB16 = BITFIELD_BIT(3),
|
||||||
|
ETHOSU_DBG_DISABLE_SRAM = BITFIELD_BIT(4),
|
||||||
|
};
|
||||||
|
|
||||||
|
extern int ethosu_debug;
|
||||||
|
|
||||||
|
#define DBG_ENABLED(flag) unlikely(ethosu_debug &(flag))
|
||||||
|
|
||||||
|
#define DBG(fmt, ...) \
|
||||||
|
do { \
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_MSGS)) \
|
||||||
|
mesa_logd("%s:%d: " fmt, __func__, __LINE__, \
|
||||||
|
##__VA_ARGS__); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
struct ethosu_screen {
|
||||||
|
struct pipe_screen pscreen;
|
||||||
|
|
||||||
|
int fd;
|
||||||
|
struct drm_ethosu_npu_info info;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct ethosu_screen *
|
||||||
|
ethosu_screen(struct pipe_screen *p)
|
||||||
|
{
|
||||||
|
return (struct ethosu_screen *)p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
ethosu_is_u65(struct ethosu_screen *e)
|
||||||
|
{
|
||||||
|
return DRM_ETHOSU_ARCH_MAJOR(e->info.id) == 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ethosu_context {
|
||||||
|
struct pipe_context base;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct ethosu_context *
|
||||||
|
ethosu_context(struct pipe_context *pctx)
|
||||||
|
{
|
||||||
|
return (struct ethosu_context *)pctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ethosu_resource {
|
||||||
|
struct pipe_resource base;
|
||||||
|
|
||||||
|
uint32_t handle;
|
||||||
|
uint64_t phys_addr;
|
||||||
|
uint64_t obj_addr;
|
||||||
|
uint64_t bo_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline struct ethosu_resource *
|
||||||
|
ethosu_resource(struct pipe_resource *p)
|
||||||
|
{
|
||||||
|
return (struct ethosu_resource *)p;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_screen *ethosu_screen_create(int fd,
|
||||||
|
const struct pipe_screen_config *config,
|
||||||
|
struct renderonly *ro);
|
||||||
|
|
||||||
|
#endif /* ETHOSU_SCREEN_H */
|
||||||
477
src/gallium/drivers/ethosu/ethosu_lower.c
Normal file
477
src/gallium/drivers/ethosu/ethosu_lower.c
Normal file
|
|
@ -0,0 +1,477 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ethosu_lower.h"
|
||||||
|
#include "ethosu_coefs.h"
|
||||||
|
#include "ethosu_sched.h"
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_depthwise(const struct pipe_ml_operation *poperation)
|
||||||
|
{
|
||||||
|
unsigned input_channels = poperation->input_tensors[0]->dims[3];
|
||||||
|
unsigned output_channels = poperation->output_tensors[0]->dims[3];
|
||||||
|
|
||||||
|
return poperation->conv.depthwise && input_channels > 1 &&
|
||||||
|
output_channels > 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
needed_total_padding(unsigned input_size, unsigned stride, unsigned filter_size)
|
||||||
|
{
|
||||||
|
if (input_size % stride == 0)
|
||||||
|
return MAX2(filter_size - stride, 0);
|
||||||
|
|
||||||
|
return MAX2(filter_size - (input_size % stride), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
ethosu_is_part_kernel_first(struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
// Determine which block traversal strategy has better DPU utilization
|
||||||
|
unsigned kernel_size = operation->kernel.height * operation->kernel.width;
|
||||||
|
unsigned depth = operation->ifm.shape.depth;
|
||||||
|
float depth_utilization = (float)depth / ethosu_round_up_to_multiple(depth, 32);
|
||||||
|
float part_kernel_utilization = ((float)depth / ethosu_round_up_to_multiple(depth, 8));
|
||||||
|
part_kernel_utilization *= (float)kernel_size / ethosu_round_up_to_multiple(kernel_size, 4);
|
||||||
|
|
||||||
|
if (operation->type != ETHOSU_OPERATION_TYPE_CONVOLUTION)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (operation->kernel.depthwise)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Part-kernel first is always better for ifm depths <= 8
|
||||||
|
if (part_kernel_utilization >= depth_utilization || depth <= 8)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
set_feature_maps(struct pipe_tensor *input_tensor,
|
||||||
|
struct pipe_tensor *output_tensor,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->ifm.tensor_idx = input_tensor->index;
|
||||||
|
operation->ifm.shape.height = input_tensor->dims[1];
|
||||||
|
operation->ifm.shape.width = input_tensor->dims[2];
|
||||||
|
operation->ifm.shape.depth = input_tensor->dims[3];
|
||||||
|
operation->ifm.zero_point = input_tensor->zero_point;
|
||||||
|
operation->ifm.scale = input_tensor->scale;
|
||||||
|
operation->ifm.is_signed = input_tensor->is_signed;
|
||||||
|
|
||||||
|
operation->ofm.tensor_idx = output_tensor->index;
|
||||||
|
operation->ofm.shape.height = output_tensor->dims[1];
|
||||||
|
operation->ofm.shape.width = output_tensor->dims[2];
|
||||||
|
operation->ofm.shape.depth = output_tensor->dims[3];
|
||||||
|
operation->ofm.zero_point = output_tensor->zero_point;
|
||||||
|
operation->ofm.scale = output_tensor->scale;
|
||||||
|
operation->ofm.is_signed = output_tensor->is_signed;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct pipe_ml_operation *
|
||||||
|
ethosu_find_first_consumer(const struct pipe_ml_operation *poperations,
|
||||||
|
unsigned count,
|
||||||
|
unsigned tensor_index)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < count; i++) {
|
||||||
|
const struct pipe_ml_operation *poperation = &poperations[i];
|
||||||
|
for (unsigned j = 0; j < poperation->input_count; j++)
|
||||||
|
if (poperation->input_tensors[j]->index == tensor_index)
|
||||||
|
return poperation;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
allocate_feature_maps(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
ethosu_allocate_feature_map(subgraph, &operation->ifm);
|
||||||
|
operation->ifm.tiles.height_0 = operation->ifm.shape.height;
|
||||||
|
operation->ifm.tiles.height_1 = operation->ifm.shape.height;
|
||||||
|
operation->ifm.tiles.width_0 = operation->ifm.shape.width;
|
||||||
|
|
||||||
|
ethosu_allocate_feature_map(subgraph, &operation->ofm);
|
||||||
|
operation->ofm.tiles.height_0 = operation->ofm.shape.height;
|
||||||
|
operation->ofm.tiles.height_1 = operation->ofm.shape.height;
|
||||||
|
operation->ofm.tiles.width_0 = operation->ofm.shape.width;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct pipe_ml_operation *
|
||||||
|
ethosu_find_first_producer(const struct pipe_ml_operation *poperations, unsigned count,
|
||||||
|
unsigned tensor_index)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < count; i++) {
|
||||||
|
const struct pipe_ml_operation *poperation = &poperations[i];
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < poperation->output_count; j++) {
|
||||||
|
if (poperation->output_tensors[j]->index == tensor_index)
|
||||||
|
return poperation;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperation,
|
||||||
|
struct pipe_tensor *input_tensor,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->type = ETHOSU_OPERATION_TYPE_CONVOLUTION;
|
||||||
|
|
||||||
|
operation->conv.depthwise = is_depthwise(poperation);
|
||||||
|
// operation->padding_same = poperation->conv.padding_same;
|
||||||
|
// operation->stride = poperation->conv.stride_x;
|
||||||
|
|
||||||
|
set_feature_maps(input_tensor, poperation->output_tensors[0], operation);
|
||||||
|
|
||||||
|
operation->kernel.height = poperation->conv.weight_tensor->dims[1];
|
||||||
|
operation->kernel.width = poperation->conv.weight_tensor->dims[2];
|
||||||
|
operation->kernel.stride_y = poperation->conv.stride_y;
|
||||||
|
operation->kernel.stride_x = poperation->conv.stride_x;
|
||||||
|
operation->kernel.dilation_y = 1;
|
||||||
|
operation->kernel.dilation_x = 1;
|
||||||
|
operation->kernel.depthwise = is_depthwise(poperation);
|
||||||
|
operation->kernel.scale = poperation->conv.weight_tensor->scale;
|
||||||
|
operation->kernel.zero_point = poperation->conv.weight_tensor->zero_point;
|
||||||
|
operation->kernel.is_signed = poperation->conv.weight_tensor->is_signed;
|
||||||
|
|
||||||
|
operation->conv.part_kernel_first = ethosu_is_part_kernel_first(operation);
|
||||||
|
|
||||||
|
if (poperation->conv.padding_same) {
|
||||||
|
unsigned vert = needed_total_padding(input_tensor->dims[1], poperation->conv.stride_y, poperation->conv.weight_tensor->dims[1]);
|
||||||
|
unsigned horiz = needed_total_padding(input_tensor->dims[2], poperation->conv.stride_x, poperation->conv.weight_tensor->dims[2]);
|
||||||
|
|
||||||
|
operation->pad.top = vert / 2;
|
||||||
|
operation->pad.left = horiz / 2;
|
||||||
|
operation->pad.bottom = (vert + 1) / 2;
|
||||||
|
operation->pad.right = (horiz + 1) / 2;
|
||||||
|
} else {
|
||||||
|
operation->pad.top = 0;
|
||||||
|
operation->pad.left = 0;
|
||||||
|
operation->pad.bottom = 0;
|
||||||
|
operation->pad.right = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
allocate_feature_maps(subgraph, operation);
|
||||||
|
|
||||||
|
ethosu_sched_operation(subgraph, operation);
|
||||||
|
fill_coefs(subgraph, operation, poperation->conv.bias_tensor->resource, poperation->conv.weight_tensor->resource);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_lower_pooling(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperation,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->type = ETHOSU_OPERATION_TYPE_POOLING;
|
||||||
|
operation->pooling.avg = poperation->pooling.type == PIPE_ML_POOLING_TYPE_AVG;
|
||||||
|
|
||||||
|
set_feature_maps(poperation->input_tensors[0], poperation->output_tensors[0], operation);
|
||||||
|
|
||||||
|
operation->kernel.height = poperation->pooling.filter_height;
|
||||||
|
operation->kernel.width = poperation->pooling.filter_width;
|
||||||
|
operation->kernel.stride_y = poperation->pooling.stride_y;
|
||||||
|
operation->kernel.stride_x = poperation->pooling.stride_x;
|
||||||
|
operation->kernel.dilation_y = 1;
|
||||||
|
operation->kernel.dilation_x = 1;
|
||||||
|
|
||||||
|
if (poperation->pooling.padding_same) {
|
||||||
|
unsigned vert = needed_total_padding(operation->ifm.shape.height, poperation->pooling.stride_y, poperation->pooling.filter_height);
|
||||||
|
unsigned horiz = needed_total_padding(operation->ifm.shape.width, poperation->pooling.stride_x, poperation->pooling.filter_width);
|
||||||
|
|
||||||
|
operation->pad.top = vert / 2;
|
||||||
|
operation->pad.left = horiz / 2;
|
||||||
|
operation->pad.bottom = (vert + 1) / 2;
|
||||||
|
operation->pad.right = (horiz + 1) / 2;
|
||||||
|
} else {
|
||||||
|
operation->pad.top = 0;
|
||||||
|
operation->pad.left = 0;
|
||||||
|
operation->pad.bottom = 0;
|
||||||
|
operation->pad.right = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
allocate_feature_maps(subgraph, operation);
|
||||||
|
ethosu_sched_operation(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperation,
|
||||||
|
unsigned input_idx,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->type = ETHOSU_OPERATION_TYPE_POOLING;
|
||||||
|
operation->pooling.avg = true;
|
||||||
|
|
||||||
|
set_feature_maps(poperation->input_tensors[input_idx], poperation->output_tensors[0], operation);
|
||||||
|
operation->ofm.shape.depth = operation->ifm.shape.depth;
|
||||||
|
|
||||||
|
operation->round_mode = ETHOSU_ROUNDING_NATURAL;
|
||||||
|
|
||||||
|
operation->kernel.height = 1;
|
||||||
|
operation->kernel.width = 1;
|
||||||
|
operation->kernel.stride_y = 1;
|
||||||
|
operation->kernel.stride_x = 1;
|
||||||
|
operation->kernel.dilation_y = 1;
|
||||||
|
operation->kernel.dilation_x = 1;
|
||||||
|
|
||||||
|
allocate_feature_maps(subgraph, operation);
|
||||||
|
for (unsigned i = 0; i < input_idx; i++) {
|
||||||
|
struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, operation->ofm.tensor_idx);
|
||||||
|
|
||||||
|
if (tensor->layout == ETHOSU_LAYOUT_NHWC)
|
||||||
|
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[3];
|
||||||
|
else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16)
|
||||||
|
operation->ofm.tiles.addresses[0] += poperation->input_tensors[i]->dims[2] * ALIGN(poperation->input_tensors[i]->dims[3], 16);
|
||||||
|
else
|
||||||
|
assert(0 && "Unsupported layout");
|
||||||
|
}
|
||||||
|
|
||||||
|
ethosu_sched_operation(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_lower_resize(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperation,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->type = ETHOSU_OPERATION_TYPE_POOLING;
|
||||||
|
operation->pooling.avg = true;
|
||||||
|
|
||||||
|
set_feature_maps(poperation->input_tensors[0], poperation->output_tensors[0], operation);
|
||||||
|
operation->ifm.zero_point = 0;
|
||||||
|
operation->ofm.zero_point = 0;
|
||||||
|
|
||||||
|
operation->kernel.height = 1;
|
||||||
|
operation->kernel.width = 1;
|
||||||
|
operation->kernel.stride_y = 1;
|
||||||
|
operation->kernel.stride_x = 1;
|
||||||
|
operation->kernel.dilation_y = 1;
|
||||||
|
operation->kernel.dilation_x = 1;
|
||||||
|
|
||||||
|
operation->upscale = true;
|
||||||
|
|
||||||
|
allocate_feature_maps(subgraph, operation);
|
||||||
|
ethosu_sched_operation(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_lower_strided_slice(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperation,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->type = ETHOSU_OPERATION_TYPE_POOLING;
|
||||||
|
operation->pooling.avg = true;
|
||||||
|
|
||||||
|
set_feature_maps(poperation->input_tensors[0], poperation->output_tensors[0], operation);
|
||||||
|
operation->ifm.shape = operation->ofm.shape;
|
||||||
|
operation->ifm.zero_point = 0;
|
||||||
|
operation->ofm.zero_point = 0;
|
||||||
|
|
||||||
|
operation->kernel.height = 1;
|
||||||
|
operation->kernel.width = 1;
|
||||||
|
operation->kernel.stride_y = 1;
|
||||||
|
operation->kernel.stride_x = 1;
|
||||||
|
operation->kernel.dilation_y = 1;
|
||||||
|
operation->kernel.dilation_x = 1;
|
||||||
|
|
||||||
|
allocate_feature_maps(subgraph, operation);
|
||||||
|
|
||||||
|
unsigned augmented_coord[5];
|
||||||
|
augmented_coord[0] = 0;
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
augmented_coord[i + 1] = poperation->slice.begin[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned augmented_strides[5];
|
||||||
|
augmented_strides[0] = operation->ifm.shape.depth * operation->ifm.shape.width * operation->ifm.shape.height;
|
||||||
|
augmented_strides[1] = 1;
|
||||||
|
augmented_strides[2] = operation->ifm.shape.depth * operation->ifm.shape.width;
|
||||||
|
augmented_strides[3] = operation->ifm.shape.depth;
|
||||||
|
augmented_strides[4] = 1;
|
||||||
|
|
||||||
|
unsigned address_offset = 0;
|
||||||
|
for (int i = 0; i < 5; ++i)
|
||||||
|
address_offset += augmented_coord[i] * augmented_strides[i];
|
||||||
|
|
||||||
|
operation->ifm.tiles.addresses[0] += address_offset;
|
||||||
|
|
||||||
|
ethosu_sched_operation(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_lower_add(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperation,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->type = ETHOSU_OPERATION_TYPE_ELTWISE;
|
||||||
|
|
||||||
|
set_feature_maps(poperation->input_tensors[0], poperation->output_tensors[0], operation);
|
||||||
|
|
||||||
|
operation->ifm2.tensor_idx = poperation->input_tensors[1]->index;
|
||||||
|
operation->ifm2.shape.height = poperation->input_tensors[1]->dims[1];
|
||||||
|
operation->ifm2.shape.width = poperation->input_tensors[1]->dims[2];
|
||||||
|
operation->ifm2.shape.depth = poperation->input_tensors[1]->dims[3];
|
||||||
|
operation->ifm2.zero_point = poperation->input_tensors[1]->zero_point;
|
||||||
|
operation->ifm2.scale = poperation->input_tensors[1]->scale;
|
||||||
|
operation->ifm2.is_signed = poperation->input_tensors[1]->is_signed;
|
||||||
|
|
||||||
|
operation->kernel.height = 1;
|
||||||
|
operation->kernel.width = 1;
|
||||||
|
operation->kernel.stride_y = 1;
|
||||||
|
operation->kernel.stride_x = 1;
|
||||||
|
operation->kernel.dilation_y = 1;
|
||||||
|
operation->kernel.dilation_x = 1;
|
||||||
|
|
||||||
|
allocate_feature_maps(subgraph, operation);
|
||||||
|
|
||||||
|
ethosu_allocate_feature_map(subgraph, &operation->ifm2);
|
||||||
|
operation->ifm2.tiles.height_0 = operation->ifm2.shape.height;
|
||||||
|
operation->ifm2.tiles.height_1 = operation->ifm2.shape.height;
|
||||||
|
operation->ifm2.tiles.width_0 = operation->ifm2.shape.width;
|
||||||
|
|
||||||
|
ethosu_sched_operation(subgraph, operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ethosu_lower_dma(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperation,
|
||||||
|
struct ethosu_operation *conv_operation,
|
||||||
|
struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->type = ETHOSU_OPERATION_TYPE_DMA;
|
||||||
|
|
||||||
|
operation->dma.address = conv_operation->conv.scales.address;
|
||||||
|
operation->dma.size = conv_operation->conv.scales.size + conv_operation->conv.weights.size;
|
||||||
|
|
||||||
|
conv_operation->conv.scales.region = SCRATCH_REGION;
|
||||||
|
conv_operation->conv.scales.address = 0;
|
||||||
|
|
||||||
|
conv_operation->conv.weights.region = SCRATCH_REGION;
|
||||||
|
conv_operation->conv.weights.address = conv_operation->conv.scales.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
register_tensors(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperations,
|
||||||
|
unsigned count)
|
||||||
|
{
|
||||||
|
for (unsigned i = 0; i < count; i++) {
|
||||||
|
const struct pipe_ml_operation *poperation = &poperations[i];
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < poperation->input_count; j++) {
|
||||||
|
struct pipe_tensor *ptensor = poperation->input_tensors[j];
|
||||||
|
ethosu_register_tensor(subgraph, ptensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < poperation->output_count; j++) {
|
||||||
|
struct pipe_tensor *ptensor = poperation->output_tensors[j];
|
||||||
|
ethosu_register_tensor(subgraph, ptensor);
|
||||||
|
|
||||||
|
if (!DBG_ENABLED(ETHOSU_DBG_DISABLE_NHCWB16)) {
|
||||||
|
struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, ptensor->index);
|
||||||
|
if (tensor->shape.depth % 16 == 0 &&
|
||||||
|
ethosu_find_first_consumer(poperations, count, ptensor->index)) {
|
||||||
|
tensor->layout = ETHOSU_LAYOUT_NHCWB16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_lower_graph(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperations, unsigned count)
|
||||||
|
{
|
||||||
|
register_tensors(subgraph, poperations, count);
|
||||||
|
|
||||||
|
/* Lower */
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
struct ethosu_operation operation = {0};
|
||||||
|
|
||||||
|
switch (poperations[i].type) {
|
||||||
|
|
||||||
|
case PIPE_ML_OPERATION_TYPE_CONVOLUTION: {
|
||||||
|
struct pipe_tensor *input_tensor = poperations[i].input_tensors[0];
|
||||||
|
const struct pipe_ml_operation *producer = ethosu_find_first_producer(poperations, count, input_tensor->index);
|
||||||
|
bool padded_input = producer && producer->type == PIPE_ML_OPERATION_TYPE_PAD;
|
||||||
|
|
||||||
|
if (padded_input) {
|
||||||
|
input_tensor = producer->input_tensors[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
ethosu_lower_convolution(subgraph, &poperations[i], input_tensor, &operation);
|
||||||
|
|
||||||
|
if (padded_input) {
|
||||||
|
operation.pad.top = 1;
|
||||||
|
operation.pad.left = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (operation.conv.scales.size + operation.conv.weights.size <=
|
||||||
|
ethosu_screen(subgraph->base.context->screen)->info.sram_size) {
|
||||||
|
struct ethosu_operation dma_operation = {0};
|
||||||
|
ethosu_lower_dma(subgraph, &poperations[i], &operation, &dma_operation);
|
||||||
|
|
||||||
|
util_dynarray_append(&subgraph->operations, struct ethosu_operation,
|
||||||
|
dma_operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
util_dynarray_append(&subgraph->operations, struct ethosu_operation,
|
||||||
|
operation);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PIPE_ML_OPERATION_TYPE_ADD: {
|
||||||
|
ethosu_lower_add(subgraph, &poperations[i], &operation);
|
||||||
|
util_dynarray_append(&subgraph->operations, struct ethosu_operation,
|
||||||
|
operation);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PIPE_ML_OPERATION_TYPE_POOLING: {
|
||||||
|
ethosu_lower_pooling(subgraph, &poperations[i], &operation);
|
||||||
|
util_dynarray_append(&subgraph->operations, struct ethosu_operation,
|
||||||
|
operation);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PIPE_ML_OPERATION_TYPE_STRIDED_SLICE: {
|
||||||
|
ethosu_lower_strided_slice(subgraph, &poperations[i], &operation);
|
||||||
|
util_dynarray_append(&subgraph->operations, struct ethosu_operation,
|
||||||
|
operation);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PIPE_ML_OPERATION_TYPE_CONCATENATION: {
|
||||||
|
for (int j = 0; j < poperations[i].input_count; j++) {
|
||||||
|
ethosu_lower_concatenation(subgraph, &poperations[i], j, &operation);
|
||||||
|
util_dynarray_append(&subgraph->operations, struct ethosu_operation,
|
||||||
|
operation);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PIPE_ML_OPERATION_TYPE_RESIZE: {
|
||||||
|
ethosu_lower_resize(subgraph, &poperations[i], &operation);
|
||||||
|
util_dynarray_append(&subgraph->operations, struct ethosu_operation,
|
||||||
|
operation);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case PIPE_ML_OPERATION_TYPE_PAD: {
|
||||||
|
// Just ignore the pad operation for now, as it will be handled by its consumers
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
DBG("poperation->type %d\n", poperations[i].type);
|
||||||
|
UNREACHABLE("Unsupported ML operation type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
15
src/gallium/drivers/ethosu/ethosu_lower.h
Normal file
15
src/gallium/drivers/ethosu/ethosu_lower.h
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ETHOSU_LOWER_H
|
||||||
|
#define ETHOSU_LOWER_H
|
||||||
|
|
||||||
|
#include "ethosu_ml.h"
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_lower_graph(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_ml_operation *poperations, unsigned count);
|
||||||
|
|
||||||
|
#endif /* ETHOSU_LOWER_H */
|
||||||
363
src/gallium/drivers/ethosu/ethosu_ml.c
Normal file
363
src/gallium/drivers/ethosu/ethosu_ml.c
Normal file
|
|
@ -0,0 +1,363 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "pipe/p_defines.h"
|
||||||
|
#include "pipe/p_screen.h"
|
||||||
|
#include "pipe/p_state.h"
|
||||||
|
#include "util/macros.h"
|
||||||
|
#include "util/u_dynarray.h"
|
||||||
|
#include "util/u_inlines.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <xf86drm.h>
|
||||||
|
|
||||||
|
#include "drm-uapi/ethosu_accel.h"
|
||||||
|
|
||||||
|
#include "ethosu_cmd.h"
|
||||||
|
#include "ethosu_lower.h"
|
||||||
|
#include "ethosu_ml.h"
|
||||||
|
|
||||||
|
struct ethosu_block IFM_UBLOCK = {2, 2, 8};
|
||||||
|
struct ethosu_block OFM_UBLOCK = {2, 2, 8};
|
||||||
|
struct ethosu_block ARCH_OFM_BLOCK_MAX = {64, 32, 128};
|
||||||
|
struct ethosu_block SUB_KERNEL_MAX = {8, 8, 65536};
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_dump_buffer(const uint8_t *ptr, char *name, int operation_nr,
|
||||||
|
int suboperation_nr, int offset, unsigned size)
|
||||||
|
{
|
||||||
|
char buffer[255];
|
||||||
|
|
||||||
|
snprintf(buffer, sizeof(buffer), "mesa-%s-%03u-%03u.bin", name, operation_nr,
|
||||||
|
suboperation_nr);
|
||||||
|
|
||||||
|
FILE *f = fopen(buffer, "wb");
|
||||||
|
assert(f);
|
||||||
|
fwrite(ptr + offset, 1, size, f);
|
||||||
|
if (ferror(f)) {
|
||||||
|
DBG("Error in writing to file: %s\n", strerror(errno));
|
||||||
|
}
|
||||||
|
fflush(f);
|
||||||
|
fclose(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_register_tensor(struct ethosu_subgraph *subgraph,
|
||||||
|
const struct pipe_tensor *ptensor)
|
||||||
|
{
|
||||||
|
struct ethosu_tensor new_tensor = {0};
|
||||||
|
new_tensor.index = ptensor->index;
|
||||||
|
new_tensor.shape.height = ptensor->dims[1];
|
||||||
|
new_tensor.shape.width = ptensor->dims[2];
|
||||||
|
new_tensor.shape.depth = ptensor->dims[3];
|
||||||
|
new_tensor.layout = ETHOSU_LAYOUT_NHWC;
|
||||||
|
util_dynarray_append(&subgraph->tensors, struct ethosu_tensor, new_tensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_map)
|
||||||
|
{
|
||||||
|
struct ethosu_tensor *tensor = ethosu_find_tensor(subgraph, feature_map->tensor_idx);
|
||||||
|
unsigned size;
|
||||||
|
|
||||||
|
if (tensor->layout == ETHOSU_LAYOUT_NHWC) {
|
||||||
|
size = tensor->shape.width * tensor->shape.height * tensor->shape.depth;
|
||||||
|
} else if (tensor->layout == ETHOSU_LAYOUT_NHCWB16) {
|
||||||
|
size = tensor->shape.width * tensor->shape.height * ALIGN(tensor->shape.depth, 16);
|
||||||
|
} else {
|
||||||
|
assert(0 && "Unsupported layout");
|
||||||
|
size = 0; // This should never happen
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(tensor);
|
||||||
|
|
||||||
|
if (tensor->size > 0) {
|
||||||
|
feature_map->tiles.addresses[0] = tensor->offset;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tensor->offset = subgraph->io_used;
|
||||||
|
tensor->size = size;
|
||||||
|
subgraph->io_used += ALIGN_POT(size, 16);
|
||||||
|
|
||||||
|
feature_map->tiles.addresses[0] = tensor->offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ethosu_tensor *
|
||||||
|
ethosu_find_tensor(struct ethosu_subgraph *subgraph, unsigned tensor_idx)
|
||||||
|
{
|
||||||
|
util_dynarray_foreach (&subgraph->tensors, struct ethosu_tensor, tensor) {
|
||||||
|
if (tensor->index == tensor_idx) {
|
||||||
|
return tensor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ethosu_round_up_to_multiple(int a, int b)
|
||||||
|
{
|
||||||
|
return ((a + b - 1) / b) * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ethosu_round_up_divide(int a, int b)
|
||||||
|
{
|
||||||
|
return (a + b - 1) / b;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
ethosu_quantize_scale(double scale, uint32_t *shift)
|
||||||
|
{
|
||||||
|
int exponent = 0;
|
||||||
|
double significand = frexp(scale, &exponent);
|
||||||
|
uint32_t quantized_scale = round(significand * (double)(1LL << 31));
|
||||||
|
*shift = 31 - exponent;
|
||||||
|
if (*shift > 63) {
|
||||||
|
if (quantized_scale > exp2(*shift - 63)) {
|
||||||
|
quantized_scale = quantized_scale >> (*shift - 63);
|
||||||
|
*shift = 63;
|
||||||
|
} else {
|
||||||
|
// Not possible to get back within bounds, set scale and shift to 0
|
||||||
|
// as the shift would shift away all relevant bits anyway.
|
||||||
|
quantized_scale = 0;
|
||||||
|
*shift = 0;
|
||||||
|
}
|
||||||
|
} else if (*shift < 0 && quantized_scale < exp2(*shift + 32)) {
|
||||||
|
quantized_scale = quantized_scale << (0 - *shift);
|
||||||
|
*shift = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return quantized_scale;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
tensor_quantization_supported(struct pipe_tensor *tensor)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Per-axis quantization not supported, for details see:
|
||||||
|
* https://ai.google.dev/edge/litert/models/quantization_spec#per-axis_vs_per-tensor
|
||||||
|
*/
|
||||||
|
return tensor->scales == NULL && tensor->zero_points == NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ethosu_ml_operation_supported(struct pipe_context *pcontext,
|
||||||
|
const struct pipe_ml_operation *operation)
|
||||||
|
{
|
||||||
|
bool supported = false;
|
||||||
|
|
||||||
|
switch (operation->type) {
|
||||||
|
case PIPE_ML_OPERATION_TYPE_CONVOLUTION: {
|
||||||
|
struct pipe_tensor *input_tensor = operation->input_tensors[0];
|
||||||
|
struct pipe_tensor *weight_tensor = operation->conv.weight_tensor;
|
||||||
|
struct pipe_tensor *bias_tensor = operation->conv.bias_tensor;
|
||||||
|
struct pipe_tensor *output_tensor = operation->output_tensors[0];
|
||||||
|
|
||||||
|
// Dilation and per-axis quantization not yet implemented
|
||||||
|
if (tensor_quantization_supported(input_tensor) &&
|
||||||
|
tensor_quantization_supported(weight_tensor) &&
|
||||||
|
tensor_quantization_supported(bias_tensor) &&
|
||||||
|
tensor_quantization_supported(output_tensor) &&
|
||||||
|
operation->conv.dilation_width_factor == 1 &&
|
||||||
|
operation->conv.dilation_height_factor == 1)
|
||||||
|
supported = true;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PIPE_ML_OPERATION_TYPE_ADD:
|
||||||
|
supported = operation->input_tensors[0]->resource == NULL &&
|
||||||
|
operation->input_tensors[1]->resource == NULL;
|
||||||
|
break;
|
||||||
|
case PIPE_ML_OPERATION_TYPE_POOLING:
|
||||||
|
case PIPE_ML_OPERATION_TYPE_STRIDED_SLICE:
|
||||||
|
case PIPE_ML_OPERATION_TYPE_PAD:
|
||||||
|
case PIPE_ML_OPERATION_TYPE_RESIZE:
|
||||||
|
supported = true;
|
||||||
|
break;
|
||||||
|
case PIPE_ML_OPERATION_TYPE_CONCATENATION:
|
||||||
|
supported = operation->conc.axis == 3 ||
|
||||||
|
operation->conc.axis == -1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
supported = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return supported;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct pipe_ml_subgraph *
|
||||||
|
ethosu_ml_subgraph_create(struct pipe_context *pcontext,
|
||||||
|
const struct pipe_ml_operation *poperations,
|
||||||
|
unsigned count)
|
||||||
|
{
|
||||||
|
struct pipe_screen *pscreen = pcontext->screen;
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pscreen);
|
||||||
|
struct ethosu_subgraph *subgraph;
|
||||||
|
|
||||||
|
subgraph = calloc(1, sizeof(*subgraph));
|
||||||
|
subgraph->base.context = pcontext;
|
||||||
|
|
||||||
|
util_dynarray_init(&subgraph->tensors, NULL);
|
||||||
|
util_dynarray_init(&subgraph->operations, NULL);
|
||||||
|
|
||||||
|
ethosu_lower_graph(subgraph, poperations, count);
|
||||||
|
|
||||||
|
ethosu_emit_cmdstream(subgraph);
|
||||||
|
|
||||||
|
struct drm_ethosu_cmdstream_bo_create cmd_bo_create = {
|
||||||
|
.size = (subgraph->cursor - subgraph->cmdstream) * sizeof(*subgraph->cursor),
|
||||||
|
.data = (uintptr_t)subgraph->cmdstream,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS))
|
||||||
|
ethosu_dump_buffer((uint8_t *)subgraph->cmdstream, "cmdstream", 0, 0, 0, (subgraph->cursor - subgraph->cmdstream) * sizeof(*subgraph->cursor));
|
||||||
|
|
||||||
|
int ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE, &cmd_bo_create);
|
||||||
|
assert(ret == 0);
|
||||||
|
|
||||||
|
free(subgraph->cmdstream);
|
||||||
|
|
||||||
|
subgraph->cmdstream_bo = cmd_bo_create.handle;
|
||||||
|
|
||||||
|
if (subgraph->coefs_used > 0) {
|
||||||
|
subgraph->coefs_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->coefs_used);
|
||||||
|
pipe_buffer_write(subgraph->base.context, subgraph->coefs_rsrc, 0, subgraph->coefs_used, subgraph->coefs);
|
||||||
|
|
||||||
|
free(subgraph->coefs);
|
||||||
|
subgraph->coefs = NULL;
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) {
|
||||||
|
struct pipe_transfer *transfer_in;
|
||||||
|
uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->coefs_rsrc,
|
||||||
|
PIPE_MAP_READ, &transfer_in);
|
||||||
|
ethosu_dump_buffer(buf, "coefs", 0, 0, 0, pipe_buffer_size(subgraph->coefs_rsrc));
|
||||||
|
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph->io_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->io_used);
|
||||||
|
|
||||||
|
return &subgraph->base;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
||||||
|
struct pipe_ml_subgraph *psubgraph,
|
||||||
|
unsigned inputs_count, unsigned input_idxs[],
|
||||||
|
void *inputs[], bool is_signed[])
|
||||||
|
{
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
|
||||||
|
struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph);
|
||||||
|
struct drm_ethosu_submit submit = {0};
|
||||||
|
struct drm_ethosu_job job = {0};
|
||||||
|
struct timespec start, end;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < inputs_count; i++) {
|
||||||
|
struct ethosu_tensor *input = ethosu_find_tensor(subgraph, input_idxs[i]);
|
||||||
|
assert(input);
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS))
|
||||||
|
ethosu_dump_buffer(inputs[i], "input", 0, 0, 0, input->size);
|
||||||
|
|
||||||
|
pipe_buffer_write(pcontext, subgraph->io_rsrc, input->offset, input->size, inputs[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) {
|
||||||
|
struct pipe_transfer *transfer_in;
|
||||||
|
uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc,
|
||||||
|
PIPE_MAP_READ, &transfer_in);
|
||||||
|
ethosu_dump_buffer(buf, "io-before", 0, 0, 0, pipe_buffer_size(subgraph->io_rsrc));
|
||||||
|
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||||
|
}
|
||||||
|
|
||||||
|
job.cmd_bo = subgraph->cmdstream_bo;
|
||||||
|
|
||||||
|
if (subgraph->coefs_rsrc) {
|
||||||
|
job.region_bo_handles[COEFS_REGION] = ethosu_resource(subgraph->coefs_rsrc)->handle;
|
||||||
|
if (!DBG_ENABLED(ETHOSU_DBG_DISABLE_SRAM)) {
|
||||||
|
job.region_bo_handles[SCRATCH_REGION] = 0;
|
||||||
|
job.sram_size = screen->info.sram_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
job.region_bo_handles[IO_REGION] = ethosu_resource(subgraph->io_rsrc)->handle;
|
||||||
|
|
||||||
|
submit.jobs = (uintptr_t)&job;
|
||||||
|
submit.job_count = 1;
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_MSGS))
|
||||||
|
clock_gettime(CLOCK_MONOTONIC_RAW, &start);
|
||||||
|
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_SUBMIT, &submit);
|
||||||
|
assert(ret == 0);
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_MSGS)) {
|
||||||
|
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
|
||||||
|
long long duration_ns = (long long)(end.tv_sec - start.tv_sec) * 1000000000LL + (end.tv_nsec - start.tv_nsec);
|
||||||
|
DBG("Submission took %lld ms\n", duration_ns / 1000000);
|
||||||
|
|
||||||
|
/* Force a sync */
|
||||||
|
struct pipe_transfer *transfer_in;
|
||||||
|
pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc, PIPE_MAP_READ, &transfer_in);
|
||||||
|
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||||
|
|
||||||
|
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
|
||||||
|
duration_ns = (long long)(end.tv_sec - start.tv_sec) * 1000000000LL + (end.tv_nsec - start.tv_nsec);
|
||||||
|
DBG("Execution took %lld ms\n", duration_ns / 1000000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
||||||
|
struct pipe_ml_subgraph *psubgraph,
|
||||||
|
unsigned outputs_count,
|
||||||
|
unsigned output_idxs[], void *outputsv[],
|
||||||
|
bool is_signed[])
|
||||||
|
{
|
||||||
|
struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph);
|
||||||
|
uint8_t **outputs = (uint8_t **)outputsv;
|
||||||
|
|
||||||
|
for (int i = 0; i < outputs_count; i++) {
|
||||||
|
struct ethosu_tensor *output = ethosu_find_tensor(subgraph, output_idxs[i]);
|
||||||
|
|
||||||
|
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) {
|
||||||
|
struct pipe_transfer *transfer_in;
|
||||||
|
uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc,
|
||||||
|
PIPE_MAP_READ, &transfer_in);
|
||||||
|
ethosu_dump_buffer(buf, "io-after", 0, 0, 0, pipe_buffer_size(subgraph->io_rsrc));
|
||||||
|
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||||
|
}
|
||||||
|
|
||||||
|
pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_ml_subgraph_destroy(struct pipe_context *pcontext,
|
||||||
|
struct pipe_ml_subgraph *psubgraph)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
struct drm_gem_close arg = {0};
|
||||||
|
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
|
||||||
|
struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph);
|
||||||
|
|
||||||
|
pipe_resource_reference(&subgraph->io_rsrc, NULL);
|
||||||
|
pipe_resource_reference(&subgraph->coefs_rsrc, NULL);
|
||||||
|
|
||||||
|
arg.handle = subgraph->cmdstream_bo;
|
||||||
|
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
|
||||||
|
assert(ret >= 0);
|
||||||
|
|
||||||
|
util_dynarray_fini(&subgraph->operations);
|
||||||
|
util_dynarray_fini(&subgraph->tensors);
|
||||||
|
|
||||||
|
free(subgraph);
|
||||||
|
}
|
||||||
229
src/gallium/drivers/ethosu/ethosu_ml.h
Normal file
229
src/gallium/drivers/ethosu/ethosu_ml.h
Normal file
|
|
@ -0,0 +1,229 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ETHOSU_ML_H
|
||||||
|
#define ETHOSU_ML_H
|
||||||
|
|
||||||
|
#include <util/u_dynarray.h>
|
||||||
|
|
||||||
|
#include "ethosu_device.h"
|
||||||
|
|
||||||
|
#define SHRAM_BANKS 48
|
||||||
|
#define SHRAM_RESERVED_OUTPUT_BANKS 2
|
||||||
|
#define SHRAM_RESERVED_UNUSED_BANKS 2
|
||||||
|
#define SHRAM_RESERVED_END_BANKS 2
|
||||||
|
#define SHRAM_TOTAL_BANKS SHRAM_BANKS
|
||||||
|
#define SHRAM_BANK_SIZE_BYTES 1024
|
||||||
|
#define ACC_BITS 32 /* Use for now always 32-bit accumulators */
|
||||||
|
#define IFM_GRANULE 8
|
||||||
|
#define ACC_GRANULE 16
|
||||||
|
#define ARCH_SPLIT_DEPTH 16
|
||||||
|
#define BANK_SIZE_BYTES 1024
|
||||||
|
#define IFM_GRANULE 8
|
||||||
|
|
||||||
|
extern struct ethosu_block ARCH_OFM_BLOCK_MAX;
|
||||||
|
extern struct ethosu_block SUB_KERNEL_MAX;
|
||||||
|
extern struct ethosu_block IFM_UBLOCK;
|
||||||
|
extern struct ethosu_block OFM_UBLOCK;
|
||||||
|
|
||||||
|
#define COEFS_REGION 0
|
||||||
|
#define IO_REGION 1
|
||||||
|
#define SCRATCH_REGION 2
|
||||||
|
|
||||||
|
struct ethosu_block {
|
||||||
|
unsigned width;
|
||||||
|
unsigned height;
|
||||||
|
unsigned depth;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ethosu_operation_type {
|
||||||
|
ETHOSU_OPERATION_TYPE_CONVOLUTION,
|
||||||
|
ETHOSU_OPERATION_TYPE_POOLING,
|
||||||
|
ETHOSU_OPERATION_TYPE_ELTWISE,
|
||||||
|
ETHOSU_OPERATION_TYPE_DMA,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_tile_box {
|
||||||
|
unsigned height_0; /* The height of tile 0 */
|
||||||
|
unsigned height_1; /* The height of tile 1, 0 if unused */
|
||||||
|
unsigned width_0; /* The width of tile 0, and tile 2 (if used) */
|
||||||
|
unsigned addresses[4]; /* A list of 4 addresses, set unused addresses to 0 */
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ethosu_layout {
|
||||||
|
ETHOSU_LAYOUT_NHWC,
|
||||||
|
ETHOSU_LAYOUT_NHCWB16,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ethosu_rounding_mode {
|
||||||
|
ETHOSU_ROUNDING_DOUBLE = 0,
|
||||||
|
ETHOSU_ROUNDING_TRUNCATE,
|
||||||
|
ETHOSU_ROUNDING_NATURAL,
|
||||||
|
};
|
||||||
|
struct ethosu_feature_map {
|
||||||
|
unsigned tensor_idx;
|
||||||
|
struct ethosu_block shape;
|
||||||
|
bool is_signed;
|
||||||
|
struct ethosu_tile_box tiles;
|
||||||
|
unsigned zero_point;
|
||||||
|
float scale;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_kernel {
|
||||||
|
unsigned height;
|
||||||
|
unsigned width;
|
||||||
|
unsigned stride_y;
|
||||||
|
unsigned stride_x;
|
||||||
|
unsigned dilation_y;
|
||||||
|
unsigned dilation_x;
|
||||||
|
bool depthwise;
|
||||||
|
bool is_signed;
|
||||||
|
unsigned zero_point;
|
||||||
|
float scale;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_padding {
|
||||||
|
unsigned top;
|
||||||
|
unsigned left;
|
||||||
|
unsigned bottom;
|
||||||
|
unsigned right;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_address_range {
|
||||||
|
unsigned region;
|
||||||
|
unsigned address;
|
||||||
|
long size;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_shram_layout {
|
||||||
|
unsigned ib_start;
|
||||||
|
unsigned ib_end;
|
||||||
|
unsigned ib_start2;
|
||||||
|
unsigned ab_start;
|
||||||
|
unsigned lut_start;
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ethosu_acc_type {
|
||||||
|
ETHOSU_ACC_TYPE_INT_32BIT = 0,
|
||||||
|
ETHOSU_ACC_TYPE_INT_40BIT,
|
||||||
|
ETHOSU_ACC_TYPE_FP_S5_10,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_block_config {
|
||||||
|
struct ethosu_block ifm_block;
|
||||||
|
struct ethosu_block ofm_block;
|
||||||
|
struct ethosu_shram_layout shram_layout;
|
||||||
|
unsigned bank_size;
|
||||||
|
enum ethosu_acc_type acc_type;
|
||||||
|
bool is_partkernel;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MAX_MEMORY_ACCESSES 5 /* IFM, IFM2, Scales, Weights, LUT*/
|
||||||
|
|
||||||
|
struct ethosu_operation {
|
||||||
|
enum ethosu_operation_type type;
|
||||||
|
|
||||||
|
struct ethosu_block_config block_config;
|
||||||
|
|
||||||
|
union {
|
||||||
|
struct {
|
||||||
|
struct ethosu_address_range weights;
|
||||||
|
struct ethosu_address_range scales;
|
||||||
|
bool part_kernel_first;
|
||||||
|
bool depthwise;
|
||||||
|
} conv;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
bool avg; /* true for avg, false for max */
|
||||||
|
} pooling;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
unsigned lut_bytes;
|
||||||
|
} eltwise;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
unsigned address;
|
||||||
|
long size;
|
||||||
|
} dma;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_feature_map ifm;
|
||||||
|
struct ethosu_feature_map ifm2;
|
||||||
|
struct ethosu_feature_map ofm;
|
||||||
|
|
||||||
|
struct ethosu_kernel kernel;
|
||||||
|
struct ethosu_padding pad;
|
||||||
|
bool upscale;
|
||||||
|
enum ethosu_rounding_mode round_mode;
|
||||||
|
|
||||||
|
struct ethosu_address_range read_accesses[MAX_MEMORY_ACCESSES];
|
||||||
|
struct ethosu_address_range write_accesses[MAX_MEMORY_ACCESSES];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_tensor {
|
||||||
|
unsigned index;
|
||||||
|
unsigned offset;
|
||||||
|
unsigned size;
|
||||||
|
struct ethosu_block shape;
|
||||||
|
enum ethosu_layout layout;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ethosu_subgraph {
|
||||||
|
struct pipe_ml_subgraph base;
|
||||||
|
|
||||||
|
struct util_dynarray operations; /* ethosu_operation */
|
||||||
|
struct util_dynarray tensors; /* ethosu_tensor* */
|
||||||
|
|
||||||
|
unsigned cmdstream_used;
|
||||||
|
uint32_t *cmdstream;
|
||||||
|
uint32_t *cursor;
|
||||||
|
uint32_t cmdstream_bo;
|
||||||
|
|
||||||
|
struct pipe_resource *io_rsrc;
|
||||||
|
unsigned io_used;
|
||||||
|
|
||||||
|
uint8_t *coefs;
|
||||||
|
struct pipe_resource *coefs_rsrc;
|
||||||
|
unsigned coefs_used;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool
|
||||||
|
ethosu_ml_operation_supported(struct pipe_context *pcontext, const struct pipe_ml_operation *operation);
|
||||||
|
|
||||||
|
struct pipe_ml_subgraph *
|
||||||
|
ethosu_ml_subgraph_create(struct pipe_context *pcontext,
|
||||||
|
const struct pipe_ml_operation *poperations,
|
||||||
|
unsigned count);
|
||||||
|
|
||||||
|
void ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
||||||
|
struct pipe_ml_subgraph *psubgraph,
|
||||||
|
unsigned inputs_count, unsigned input_idxs[],
|
||||||
|
void *inputs[], bool is_signed[]);
|
||||||
|
|
||||||
|
void ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
||||||
|
struct pipe_ml_subgraph *psubgraph,
|
||||||
|
unsigned outputs_count,
|
||||||
|
unsigned output_idxs[], void *outputs[],
|
||||||
|
bool is_signed[]);
|
||||||
|
|
||||||
|
void ethosu_ml_subgraph_destroy(struct pipe_context *context,
|
||||||
|
struct pipe_ml_subgraph *psubgraph);
|
||||||
|
|
||||||
|
void ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_map);
|
||||||
|
|
||||||
|
void ethosu_register_tensor(struct ethosu_subgraph *subgraph, const struct pipe_tensor *ptensor);
|
||||||
|
|
||||||
|
struct ethosu_tensor *ethosu_find_tensor(struct ethosu_subgraph *subgraph, unsigned tensor_idx);
|
||||||
|
|
||||||
|
void ethosu_dump_buffer(const uint8_t *ptr, char *name, int operation_nr,
|
||||||
|
int suboperation_nr, int offset, unsigned size);
|
||||||
|
|
||||||
|
int ethosu_round_up_to_multiple(int a, int b);
|
||||||
|
|
||||||
|
int ethosu_round_up_divide(int a, int b);
|
||||||
|
|
||||||
|
int ethosu_quantize_scale(double scale, uint32_t *shift);
|
||||||
|
|
||||||
|
#endif /* ETHOSU_ML_H */
|
||||||
193
src/gallium/drivers/ethosu/ethosu_sched.c
Normal file
193
src/gallium/drivers/ethosu/ethosu_sched.c
Normal file
|
|
@ -0,0 +1,193 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ethosu_sched.h"
|
||||||
|
|
||||||
|
static int
|
||||||
|
required_input_size(int value, int stride, int border)
|
||||||
|
{
|
||||||
|
return (value - 1) * stride + border;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ethosu_block
|
||||||
|
_get_ifm_blocksize(struct ethosu_operation *operation, struct ethosu_block ofm_block)
|
||||||
|
{
|
||||||
|
struct ethosu_block ifm_block = {0};
|
||||||
|
|
||||||
|
// IFM block height
|
||||||
|
int h = required_input_size(ofm_block.height, operation->kernel.stride_y, MIN2(operation->kernel.height, SUB_KERNEL_MAX.height));
|
||||||
|
h = ALIGN(h, OFM_UBLOCK.height);
|
||||||
|
|
||||||
|
// IFM block width
|
||||||
|
int w = required_input_size(ofm_block.width, operation->kernel.stride_x, MIN2(operation->kernel.width, SUB_KERNEL_MAX.width));
|
||||||
|
w = ALIGN(w, OFM_UBLOCK.width);
|
||||||
|
|
||||||
|
ifm_block.height = h;
|
||||||
|
ifm_block.width = w;
|
||||||
|
ifm_block.depth = ofm_block.depth;
|
||||||
|
|
||||||
|
return ifm_block;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_block, struct ethosu_block ifm_block, struct ethosu_shram_layout *layout)
|
||||||
|
{
|
||||||
|
int ifm_bytes = ifm_block.width * ifm_block.height * ALIGN(ifm_block.depth, 8);
|
||||||
|
int ifm_banks = ALIGN(DIV_ROUND_UP(ifm_bytes, BANK_SIZE_BYTES) * 2, IFM_GRANULE);
|
||||||
|
int lut_bytes = operation->type == ETHOSU_OPERATION_TYPE_ELTWISE ? operation->eltwise.lut_bytes : 0;
|
||||||
|
int lut_banks = MAX2(DIV_ROUND_UP(lut_bytes, 1024), SHRAM_RESERVED_END_BANKS);
|
||||||
|
int lut_start = SHRAM_TOTAL_BANKS - lut_banks;
|
||||||
|
int ifm_end = SHRAM_RESERVED_OUTPUT_BANKS + ifm_banks;
|
||||||
|
int ifm2_start = ifm_end;
|
||||||
|
int acc_start = lut_start;
|
||||||
|
|
||||||
|
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE) {
|
||||||
|
int acc_bytes = (ofm_block.width * ofm_block.height * ALIGN(ofm_block.depth, 8) * 32) / 8;
|
||||||
|
int acc_banks = ALIGN(DIV_ROUND_UP(acc_bytes, BANK_SIZE_BYTES) * 2, ACC_GRANULE);
|
||||||
|
acc_start -= acc_banks;
|
||||||
|
} else {
|
||||||
|
int ifm2_banks = ifm_banks; /* TODO: Fix for scalar eltwise */
|
||||||
|
|
||||||
|
if (ifm2_start + ifm2_banks > acc_start)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ifm_end = acc_start;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ifm_end > acc_start)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
layout->ib_start = SHRAM_RESERVED_OUTPUT_BANKS;
|
||||||
|
layout->ib_start2 = ifm2_start;
|
||||||
|
layout->ib_end = ifm_end;
|
||||||
|
layout->ab_start = acc_start;
|
||||||
|
layout->lut_start = lut_start;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ethosu_block_config
|
||||||
|
find_block_config(struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
struct ethosu_block_config config = {};
|
||||||
|
struct ethosu_block search_space = ARCH_OFM_BLOCK_MAX;
|
||||||
|
float ofm_elements = operation->ofm.shape.width * operation->ofm.shape.height * operation->ofm.shape.depth;
|
||||||
|
float ifm_elements = operation->ifm.shape.width * operation->ifm.shape.height * operation->ifm.shape.depth;
|
||||||
|
bool is_pooling = operation->type == ETHOSU_OPERATION_TYPE_POOLING;
|
||||||
|
bool is_depthwise = operation->conv.depthwise;
|
||||||
|
bool is_equal_depth = is_pooling || is_depthwise || operation->type == ETHOSU_OPERATION_TYPE_ELTWISE;
|
||||||
|
bool is_convolution = operation->type == ETHOSU_OPERATION_TYPE_CONVOLUTION;
|
||||||
|
float best_cost = FLT_MAX;
|
||||||
|
unsigned best_coverage = UINT_MAX;
|
||||||
|
|
||||||
|
search_space.width = MIN2(search_space.width, operation->ofm.shape.width);
|
||||||
|
search_space.height = MIN2(search_space.height, operation->ofm.shape.height);
|
||||||
|
search_space.depth = MIN2(search_space.depth, operation->ofm.shape.depth);
|
||||||
|
|
||||||
|
unsigned depth = MAX2(OFM_UBLOCK.depth, MIN2(search_space.depth, ARCH_SPLIT_DEPTH));
|
||||||
|
|
||||||
|
if (depth < operation->ofm.shape.depth) {
|
||||||
|
depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
search_space.width = ALIGN(search_space.width, OFM_UBLOCK.width);
|
||||||
|
search_space.height = ALIGN(search_space.height, OFM_UBLOCK.height);
|
||||||
|
search_space.depth = ALIGN(search_space.depth, OFM_UBLOCK.depth);
|
||||||
|
|
||||||
|
while (depth <= search_space.depth) {
|
||||||
|
bool wont_fit[search_space.height + 1][search_space.width + 1];
|
||||||
|
memset(wont_fit, 0, sizeof(wont_fit));
|
||||||
|
|
||||||
|
for (unsigned height = OFM_UBLOCK.height; height <= search_space.height; height += OFM_UBLOCK.height) {
|
||||||
|
for (unsigned width = OFM_UBLOCK.width; width <= search_space.width; width += OFM_UBLOCK.width) {
|
||||||
|
|
||||||
|
if (wont_fit[height][width])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
struct ethosu_block ofm_block = {height, width, depth};
|
||||||
|
struct ethosu_block ifm_block = _get_ifm_blocksize(operation, ofm_block);
|
||||||
|
|
||||||
|
if (!is_equal_depth)
|
||||||
|
ifm_block.depth = ALIGN(MIN2(operation->ifm.shape.depth, operation->conv.part_kernel_first ? 16 : 32), IFM_UBLOCK.depth);
|
||||||
|
|
||||||
|
// Try to fit the blocks in SHRAM
|
||||||
|
struct ethosu_shram_layout layout = {0};
|
||||||
|
if (try_block_config(operation, ofm_block, ifm_block, &layout)) {
|
||||||
|
|
||||||
|
struct ethosu_block full_blocks = {DIV_ROUND_UP(operation->ofm.shape.width, ofm_block.width),
|
||||||
|
DIV_ROUND_UP(operation->ofm.shape.height, ofm_block.height),
|
||||||
|
DIV_ROUND_UP(operation->ofm.shape.depth, ofm_block.depth)};
|
||||||
|
float blocks[3] = {operation->ofm.shape.width / (float)ofm_block.width,
|
||||||
|
operation->ofm.shape.height / (float)ofm_block.height,
|
||||||
|
operation->ofm.shape.depth / (float)ofm_block.depth};
|
||||||
|
|
||||||
|
float weight_area = is_convolution ? operation->kernel.width * operation->kernel.height : 0;
|
||||||
|
float weight_fetch = weight_area * operation->ifm.shape.depth * full_blocks.width * full_blocks.height;
|
||||||
|
if (!is_depthwise)
|
||||||
|
weight_fetch *= blocks[2] * ofm_block.depth;
|
||||||
|
|
||||||
|
float ifm_fetch = ifm_block.width * ifm_block.height * operation->ifm.shape.depth * blocks[0] * blocks[1];
|
||||||
|
if (!is_equal_depth)
|
||||||
|
ifm_fetch *= full_blocks.depth;
|
||||||
|
|
||||||
|
float relative_cost = 0;
|
||||||
|
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||||
|
relative_cost = (ifm_fetch + weight_fetch) / ofm_elements;
|
||||||
|
else
|
||||||
|
relative_cost = ofm_elements / (height * width * depth);
|
||||||
|
|
||||||
|
if (ifm_elements < ifm_block.width * ifm_block.height * ifm_block.depth * 2)
|
||||||
|
relative_cost /= 2.0f;
|
||||||
|
|
||||||
|
if (relative_cost <= best_cost) {
|
||||||
|
bool choose_this = false;
|
||||||
|
|
||||||
|
if (relative_cost == best_cost) {
|
||||||
|
struct ethosu_block coverage_shape = {
|
||||||
|
MIN2(ifm_block.height, operation->ifm.shape.height),
|
||||||
|
MIN2(ifm_block.width, operation->ifm.shape.width),
|
||||||
|
MIN2(ifm_block.depth, operation->ifm.shape.depth)};
|
||||||
|
float coverage = (float)(operation->ifm.shape.width * operation->ifm.shape.height) /
|
||||||
|
(float)MAX2(1, coverage_shape.width * coverage_shape.height);
|
||||||
|
|
||||||
|
if (coverage <= best_coverage && (height <= 4 && width <= 4)) {
|
||||||
|
best_coverage = coverage;
|
||||||
|
choose_this = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
best_coverage = UINT_MAX;
|
||||||
|
choose_this = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (choose_this) {
|
||||||
|
config.shram_layout = layout;
|
||||||
|
config.ifm_block = ifm_block;
|
||||||
|
config.ofm_block.height = height;
|
||||||
|
config.ofm_block.width = width;
|
||||||
|
config.ofm_block.depth = depth;
|
||||||
|
|
||||||
|
best_cost = relative_cost;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
wont_fit[height][width] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
depth += OFM_UBLOCK.depth;
|
||||||
|
if (depth < operation->ofm.shape.depth) {
|
||||||
|
depth = ALIGN(depth, ARCH_SPLIT_DEPTH);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||||
|
{
|
||||||
|
operation->block_config = find_block_config(operation);
|
||||||
|
}
|
||||||
13
src/gallium/drivers/ethosu/ethosu_sched.h
Normal file
13
src/gallium/drivers/ethosu/ethosu_sched.h
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2025 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ETHOSU_SCHED_H
|
||||||
|
#define ETHOSU_SCHED_H
|
||||||
|
|
||||||
|
#include "ethosu_ml.h"
|
||||||
|
|
||||||
|
void ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation);
|
||||||
|
|
||||||
|
#endif /* ETHOSU_SCHED_H */
|
||||||
125
src/gallium/drivers/ethosu/gen_header.py
Normal file
125
src/gallium/drivers/ethosu/gen_header.py
Normal file
|
|
@ -0,0 +1,125 @@
|
||||||
|
#!/usr/bin/python3
|
||||||
|
#
|
||||||
|
# Copyright © 2019-2024 Google, Inc.
|
||||||
|
# Copyright © 2024-2025 Tomeu Vizoso
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
from gen_parser import Parser, Reg, Enum, mask, Error
|
||||||
|
|
||||||
|
|
||||||
|
def dump_c(args, guard, func):
|
||||||
|
p = Parser()
|
||||||
|
|
||||||
|
try:
|
||||||
|
p.parse(args.rnn, args.xml)
|
||||||
|
except Error as e:
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
print("#ifndef %s\n#define %s\n" % (guard, guard))
|
||||||
|
|
||||||
|
print("""/* Autogenerated file, DO NOT EDIT manually!
|
||||||
|
|
||||||
|
This file was generated by the rules-ng-ng gen_header.py tool in this git repository:
|
||||||
|
http://gitlab.freedesktop.org/mesa/mesa/
|
||||||
|
git clone https://gitlab.freedesktop.org/mesa/mesa.git
|
||||||
|
|
||||||
|
The rules-ng-ng source files this header was generated from are:
|
||||||
|
""")
|
||||||
|
maxlen = 0
|
||||||
|
for filepath in p.xml_files:
|
||||||
|
maxlen = max(maxlen, len(filepath))
|
||||||
|
for filepath in p.xml_files:
|
||||||
|
pad = " " * (maxlen - len(filepath))
|
||||||
|
filesize = str(os.path.getsize(filepath))
|
||||||
|
filesize = " " * (7 - len(filesize)) + filesize
|
||||||
|
filetime = time.ctime(os.path.getmtime(filepath))
|
||||||
|
print("- " + filepath + pad + " (" + filesize + " bytes, from " + filetime + ")")
|
||||||
|
if p.copyright_year:
|
||||||
|
current_year = str(datetime.date.today().year)
|
||||||
|
print()
|
||||||
|
print("Copyright (C) %s-%s by the following authors:" % (p.copyright_year, current_year))
|
||||||
|
for author in p.authors:
|
||||||
|
print("- " + author)
|
||||||
|
if p.license:
|
||||||
|
print(p.license)
|
||||||
|
print("*/")
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("#ifdef __KERNEL__")
|
||||||
|
print("#include <linux/bug.h>")
|
||||||
|
print("#define assert(x) BUG_ON(!(x))")
|
||||||
|
print("#else")
|
||||||
|
print("#include <assert.h>")
|
||||||
|
print("#endif")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("#ifdef __cplusplus")
|
||||||
|
print("#define __struct_cast(X)")
|
||||||
|
print("#else")
|
||||||
|
print("#define __struct_cast(X) (struct X)")
|
||||||
|
print("#endif")
|
||||||
|
print()
|
||||||
|
|
||||||
|
func(p)
|
||||||
|
|
||||||
|
print("\n#endif /* %s */" % guard)
|
||||||
|
|
||||||
|
|
||||||
|
def dump_c_defines(args):
|
||||||
|
guard = str.replace(os.path.basename(args.xml), '.', '_').upper()
|
||||||
|
dump_c(args, guard, lambda p: p.dump())
|
||||||
|
|
||||||
|
|
||||||
|
def dump_c_pack_structs(args):
|
||||||
|
guard = str.replace(os.path.basename(args.xml), '.', '_').upper() + '_STRUCTS'
|
||||||
|
dump_c(args, guard, lambda p: p.dump_structs())
|
||||||
|
|
||||||
|
|
||||||
|
def dump_py_defines(args):
|
||||||
|
p = Parser()
|
||||||
|
|
||||||
|
try:
|
||||||
|
p.parse(args.rnn, args.xml)
|
||||||
|
except Error as e:
|
||||||
|
print(e, file=sys.stderr)
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
file_name = os.path.splitext(os.path.basename(args.xml))[0]
|
||||||
|
|
||||||
|
print("from enum import IntEnum")
|
||||||
|
print("class %sRegs(IntEnum):" % file_name.upper())
|
||||||
|
|
||||||
|
os.path.basename(args.xml)
|
||||||
|
|
||||||
|
p.dump_regs_py()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--rnn', type=str, required=True)
|
||||||
|
parser.add_argument('--xml', type=str, required=True)
|
||||||
|
|
||||||
|
subparsers = parser.add_subparsers(required=True)
|
||||||
|
|
||||||
|
parser_c_defines = subparsers.add_parser('c-defines')
|
||||||
|
parser_c_defines.set_defaults(func=dump_c_defines)
|
||||||
|
|
||||||
|
parser_c_pack_structs = subparsers.add_parser('c-pack-structs')
|
||||||
|
parser_c_pack_structs.set_defaults(func=dump_c_pack_structs)
|
||||||
|
|
||||||
|
parser_py_defines = subparsers.add_parser('py-defines')
|
||||||
|
parser_py_defines.set_defaults(func=dump_py_defines)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
args.func(args)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
745
src/gallium/drivers/ethosu/gen_parser.py
Normal file
745
src/gallium/drivers/ethosu/gen_parser.py
Normal file
|
|
@ -0,0 +1,745 @@
|
||||||
|
import xml.parsers.expat
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import collections
|
||||||
|
|
||||||
|
class Error(Exception):
|
||||||
|
def __init__(self, message):
|
||||||
|
self.message = message
|
||||||
|
|
||||||
|
class Enum(object):
|
||||||
|
def __init__(self, name):
|
||||||
|
self.name = name
|
||||||
|
self.values = []
|
||||||
|
|
||||||
|
def has_name(self, name):
|
||||||
|
for (n, value) in self.values:
|
||||||
|
if n == name:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
use_hex = False
|
||||||
|
for (name, value) in self.values:
|
||||||
|
if value > 0x1000:
|
||||||
|
use_hex = True
|
||||||
|
|
||||||
|
print("enum %s {" % self.name)
|
||||||
|
for (name, value) in self.values:
|
||||||
|
if use_hex:
|
||||||
|
print("\t%s = 0x%08x," % (name, value))
|
||||||
|
else:
|
||||||
|
print("\t%s = %d," % (name, value))
|
||||||
|
print("};\n")
|
||||||
|
|
||||||
|
def dump_pack_struct(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Field(object):
|
||||||
|
def __init__(self, name, low, high, shr, type, parser):
|
||||||
|
self.name = name
|
||||||
|
self.low = low
|
||||||
|
self.high = high
|
||||||
|
self.shr = shr
|
||||||
|
self.type = type
|
||||||
|
|
||||||
|
builtin_types = [ None, "a3xx_regid", "boolean", "uint", "hex", "int", "fixed", "ufixed", "float", "address", "waddress" ]
|
||||||
|
|
||||||
|
maxpos = parser.current_bitsize - 1
|
||||||
|
|
||||||
|
if low < 0 or low > maxpos:
|
||||||
|
raise parser.error("low attribute out of range: %d" % low)
|
||||||
|
if high < 0 or high > maxpos:
|
||||||
|
raise parser.error("high attribute out of range: %d" % high)
|
||||||
|
if high < low:
|
||||||
|
raise parser.error("low is greater than high: low=%d, high=%d" % (low, high))
|
||||||
|
if self.type == "boolean" and not low == high:
|
||||||
|
raise parser.error("booleans should be 1 bit fields")
|
||||||
|
elif self.type == "float" and not (high - low == 31 or high - low == 15):
|
||||||
|
raise parser.error("floats should be 16 or 32 bit fields")
|
||||||
|
elif not self.type in builtin_types and not self.type in parser.enums:
|
||||||
|
raise parser.error("unknown type '%s'" % self.type)
|
||||||
|
|
||||||
|
def ctype(self, var_name):
|
||||||
|
if self.type == None:
|
||||||
|
type = "uint32_t"
|
||||||
|
val = var_name
|
||||||
|
elif self.type == "boolean":
|
||||||
|
type = "bool"
|
||||||
|
val = var_name
|
||||||
|
elif self.type == "uint" or self.type == "hex" or self.type == "a3xx_regid":
|
||||||
|
type = "uint32_t"
|
||||||
|
val = var_name
|
||||||
|
elif self.type == "int":
|
||||||
|
type = "int32_t"
|
||||||
|
val = var_name
|
||||||
|
elif self.type == "fixed":
|
||||||
|
type = "float"
|
||||||
|
val = "((int32_t)(%s * %d.0))" % (var_name, 1 << self.radix)
|
||||||
|
elif self.type == "ufixed":
|
||||||
|
type = "float"
|
||||||
|
val = "((uint32_t)(%s * %d.0))" % (var_name, 1 << self.radix)
|
||||||
|
elif self.type == "float" and self.high - self.low == 31:
|
||||||
|
type = "float"
|
||||||
|
val = "fui(%s)" % var_name
|
||||||
|
elif self.type == "float" and self.high - self.low == 15:
|
||||||
|
type = "float"
|
||||||
|
val = "_mesa_float_to_half(%s)" % var_name
|
||||||
|
elif self.type in [ "address", "waddress" ]:
|
||||||
|
type = "uint64_t"
|
||||||
|
val = var_name
|
||||||
|
else:
|
||||||
|
type = "enum %s" % self.type
|
||||||
|
val = var_name
|
||||||
|
|
||||||
|
if self.shr > 0:
|
||||||
|
val = "(%s >> %d)" % (val, self.shr)
|
||||||
|
|
||||||
|
return (type, val)
|
||||||
|
|
||||||
|
def tab_to(name, value):
|
||||||
|
tab_count = (68 - (len(name) & ~7)) // 8
|
||||||
|
if tab_count <= 0:
|
||||||
|
tab_count = 1
|
||||||
|
print(name + ('\t' * tab_count) + value)
|
||||||
|
|
||||||
|
def mask(low, high):
|
||||||
|
return ((0xffffffffffffffff >> (64 - (high + 1 - low))) << low)
|
||||||
|
|
||||||
|
def field_name(reg, f):
|
||||||
|
if f.name:
|
||||||
|
name = f.name.lower()
|
||||||
|
else:
|
||||||
|
# We hit this path when a reg is defined with no bitset fields, ie.
|
||||||
|
# <reg32 offset="0x88db" name="RB_BLIT_DST_ARRAY_PITCH" low="0" high="28" shr="6" type="uint"/>
|
||||||
|
name = reg.name.lower()
|
||||||
|
|
||||||
|
if (name in [ "double", "float", "int" ]) or not (name[0].isalpha()):
|
||||||
|
name = "_" + name
|
||||||
|
|
||||||
|
return name
|
||||||
|
|
||||||
|
class Bitset(object):
|
||||||
|
def __init__(self, name, template):
|
||||||
|
self.name = name
|
||||||
|
self.inline = False
|
||||||
|
if template:
|
||||||
|
self.fields = template.fields[:]
|
||||||
|
else:
|
||||||
|
self.fields = []
|
||||||
|
|
||||||
|
# Get address field if there is one in the bitset, else return None:
|
||||||
|
def get_address_field(self):
|
||||||
|
for f in self.fields:
|
||||||
|
if f.type in [ "address", "waddress" ]:
|
||||||
|
return f
|
||||||
|
return None
|
||||||
|
|
||||||
|
def dump_regpair_builder(self, reg):
|
||||||
|
print("#ifndef NDEBUG")
|
||||||
|
known_mask = 0
|
||||||
|
for f in self.fields:
|
||||||
|
known_mask |= mask(f.low, f.high)
|
||||||
|
if f.type in [ "boolean", "address", "waddress" ]:
|
||||||
|
continue
|
||||||
|
type, val = f.ctype("fields.%s" % field_name(reg, f))
|
||||||
|
print(" assert((%-40s & 0x%08x) == 0);" % (val, 0xffffffff ^ mask(0 , f.high - f.low)))
|
||||||
|
print(" assert((%-40s & 0x%08x) == 0);" % ("fields.unknown", known_mask))
|
||||||
|
print("#endif\n")
|
||||||
|
|
||||||
|
print(" return (struct fd_reg_pair) {")
|
||||||
|
if reg.array:
|
||||||
|
print(" .reg = REG_%s(__i)," % reg.full_name)
|
||||||
|
else:
|
||||||
|
print(" .reg = REG_%s," % reg.full_name)
|
||||||
|
|
||||||
|
print(" .value =")
|
||||||
|
for f in self.fields:
|
||||||
|
if f.type in [ "address", "waddress" ]:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
type, val = f.ctype("fields.%s" % field_name(reg, f))
|
||||||
|
print(" (%-40s << %2d) |" % (val, f.low))
|
||||||
|
value_name = "dword"
|
||||||
|
if reg.bit_size == 64:
|
||||||
|
value_name = "qword"
|
||||||
|
print(" fields.unknown | fields.%s," % (value_name,))
|
||||||
|
|
||||||
|
address = self.get_address_field()
|
||||||
|
if address:
|
||||||
|
print(" .bo = fields.bo,")
|
||||||
|
print(" .is_address = true,")
|
||||||
|
if f.type == "waddress":
|
||||||
|
print(" .bo_write = true,")
|
||||||
|
print(" .bo_offset = fields.bo_offset,")
|
||||||
|
print(" .bo_shift = %d," % address.shr)
|
||||||
|
print(" .bo_low = %d," % address.low)
|
||||||
|
|
||||||
|
print(" };")
|
||||||
|
|
||||||
|
def dump_pack_struct(self, reg=None):
|
||||||
|
if not reg:
|
||||||
|
return
|
||||||
|
|
||||||
|
prefix = reg.full_name
|
||||||
|
|
||||||
|
print("struct %s {" % prefix)
|
||||||
|
for f in self.fields:
|
||||||
|
if f.type in [ "address", "waddress" ]:
|
||||||
|
tab_to(" __bo_type", "bo;")
|
||||||
|
tab_to(" uint32_t", "bo_offset;")
|
||||||
|
continue
|
||||||
|
name = field_name(reg, f)
|
||||||
|
|
||||||
|
type, val = f.ctype("var")
|
||||||
|
|
||||||
|
tab_to(" %s" % type, "%s;" % name)
|
||||||
|
if reg.bit_size == 64:
|
||||||
|
tab_to(" uint64_t", "unknown;")
|
||||||
|
tab_to(" uint64_t", "qword;")
|
||||||
|
else:
|
||||||
|
tab_to(" uint32_t", "unknown;")
|
||||||
|
tab_to(" uint32_t", "dword;")
|
||||||
|
print("};\n")
|
||||||
|
|
||||||
|
if reg.array:
|
||||||
|
print("static inline struct fd_reg_pair\npack_%s(uint32_t __i, struct %s fields)\n{" %
|
||||||
|
(prefix, prefix))
|
||||||
|
else:
|
||||||
|
print("static inline struct fd_reg_pair\npack_%s(struct %s fields)\n{" %
|
||||||
|
(prefix, prefix))
|
||||||
|
|
||||||
|
self.dump_regpair_builder(reg)
|
||||||
|
|
||||||
|
print("\n}\n")
|
||||||
|
|
||||||
|
if self.get_address_field():
|
||||||
|
skip = ", { .reg = 0 }"
|
||||||
|
else:
|
||||||
|
skip = ""
|
||||||
|
|
||||||
|
if reg.array:
|
||||||
|
print("#define %s(__i, ...) pack_%s(__i, __struct_cast(%s) { __VA_ARGS__ })%s\n" %
|
||||||
|
(prefix, prefix, prefix, skip))
|
||||||
|
else:
|
||||||
|
print("#define %s(...) pack_%s(__struct_cast(%s) { __VA_ARGS__ })%s\n" %
|
||||||
|
(prefix, prefix, prefix, skip))
|
||||||
|
|
||||||
|
|
||||||
|
def dump(self, prefix=None):
|
||||||
|
if prefix == None:
|
||||||
|
prefix = self.name
|
||||||
|
for f in self.fields:
|
||||||
|
if f.name:
|
||||||
|
name = prefix + "_" + f.name.upper()
|
||||||
|
else:
|
||||||
|
name = prefix
|
||||||
|
|
||||||
|
if not f.name and f.low == 0 and f.shr == 0 and not f.type in ["float", "fixed", "ufixed"]:
|
||||||
|
pass
|
||||||
|
elif f.type == "boolean" or (f.type == None and f.low == f.high):
|
||||||
|
tab_to("#define %s" % name, "0x%08x" % (1 << f.low))
|
||||||
|
else:
|
||||||
|
tab_to("#define %s__MASK" % name, "0x%08x" % mask(f.low, f.high))
|
||||||
|
tab_to("#define %s__SHIFT" % name, "%d" % f.low)
|
||||||
|
type, val = f.ctype("val")
|
||||||
|
|
||||||
|
print("static inline uint32_t %s(%s val)\n{" % (name, type))
|
||||||
|
if f.shr > 0:
|
||||||
|
print("\tassert(!(val & 0x%x));" % mask(0, f.shr - 1))
|
||||||
|
print("\treturn ((%s) << %s__SHIFT) & %s__MASK;\n}" % (val, name, name))
|
||||||
|
print()
|
||||||
|
|
||||||
|
class Array(object):
|
||||||
|
def __init__(self, attrs, domain, variant):
|
||||||
|
if "name" in attrs:
|
||||||
|
self.name = attrs["name"]
|
||||||
|
else:
|
||||||
|
self.name = ""
|
||||||
|
self.domain = domain
|
||||||
|
self.variant = variant
|
||||||
|
self.offset = int(attrs["offset"], 0)
|
||||||
|
self.stride = int(attrs["stride"], 0)
|
||||||
|
self.length = int(attrs["length"], 0)
|
||||||
|
if "usage" in attrs:
|
||||||
|
self.usages = attrs["usage"].split(',')
|
||||||
|
else:
|
||||||
|
self.usages = None
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
print("#define _%s(i0) (0x%08x + 0x%x*(i0))\n" % (self.name, self.offset, self.stride))
|
||||||
|
|
||||||
|
def dump_pack_struct(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def dump_regpair_builder(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Reg(object):
|
||||||
|
def __init__(self, attrs, domain, array, bit_size):
|
||||||
|
self.name = attrs["name"]
|
||||||
|
self.domain = domain
|
||||||
|
self.array = array
|
||||||
|
self.offset = int(attrs["offset"], 0)
|
||||||
|
self.type = None
|
||||||
|
self.bit_size = bit_size
|
||||||
|
if array:
|
||||||
|
self.name = array.name + "_" + self.name
|
||||||
|
self.full_name = self.name
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
if self.array:
|
||||||
|
offset = self.array.offset + self.offset
|
||||||
|
print("static inline uint32_t %s(uint32_t i0) { return 0x%08x + 0x%x*i0; }" % (self.full_name, offset, self.array.stride))
|
||||||
|
else:
|
||||||
|
tab_to("#define %s" % self.full_name, "0x%08x" % self.offset)
|
||||||
|
|
||||||
|
if self.bitset.inline:
|
||||||
|
self.bitset.dump(self.full_name)
|
||||||
|
|
||||||
|
def dump_pack_struct(self):
|
||||||
|
if self.bitset.inline:
|
||||||
|
self.bitset.dump_pack_struct(self)
|
||||||
|
|
||||||
|
def dump_regpair_builder(self):
|
||||||
|
if self.bitset.inline:
|
||||||
|
self.bitset.dump_regpair_builder(self)
|
||||||
|
|
||||||
|
def dump_py(self):
|
||||||
|
print("\tREG_%s = 0x%08x" % (self.full_name, self.offset))
|
||||||
|
|
||||||
|
|
||||||
|
class Parser(object):
|
||||||
|
def __init__(self):
|
||||||
|
self.current_array = None
|
||||||
|
self.current_domain = None
|
||||||
|
self.current_prefix = None
|
||||||
|
self.current_prefix_type = None
|
||||||
|
self.current_stripe = None
|
||||||
|
self.current_bitset = None
|
||||||
|
self.current_bitsize = 32
|
||||||
|
# The varset attribute on the domain specifies the enum which
|
||||||
|
# specifies all possible hw variants:
|
||||||
|
self.current_varset = None
|
||||||
|
# Regs that have multiple variants.. we only generated the C++
|
||||||
|
# template based struct-packers for these
|
||||||
|
self.variant_regs = {}
|
||||||
|
# Information in which contexts regs are used, to be used in
|
||||||
|
# debug options
|
||||||
|
self.usage_regs = collections.defaultdict(list)
|
||||||
|
self.bitsets = {}
|
||||||
|
self.enums = {}
|
||||||
|
self.variants = set()
|
||||||
|
self.file = []
|
||||||
|
self.xml_files = []
|
||||||
|
self.copyright_year = None
|
||||||
|
self.authors = []
|
||||||
|
self.license = None
|
||||||
|
|
||||||
|
def error(self, message):
|
||||||
|
parser, filename = self.stack[-1]
|
||||||
|
return Error("%s:%d:%d: %s" % (filename, parser.CurrentLineNumber, parser.CurrentColumnNumber, message))
|
||||||
|
|
||||||
|
def prefix(self, variant=None):
|
||||||
|
if self.current_prefix_type == "variant" and variant:
|
||||||
|
return variant
|
||||||
|
elif self.current_stripe:
|
||||||
|
return self.current_stripe + "_" + self.current_domain
|
||||||
|
elif self.current_prefix:
|
||||||
|
return self.current_prefix + "_" + self.current_domain
|
||||||
|
else:
|
||||||
|
return self.current_domain
|
||||||
|
|
||||||
|
def parse_field(self, name, attrs):
|
||||||
|
try:
|
||||||
|
if "pos" in attrs:
|
||||||
|
high = low = int(attrs["pos"], 0)
|
||||||
|
elif "high" in attrs and "low" in attrs:
|
||||||
|
high = int(attrs["high"], 0)
|
||||||
|
low = int(attrs["low"], 0)
|
||||||
|
else:
|
||||||
|
low = 0
|
||||||
|
high = self.current_bitsize - 1
|
||||||
|
|
||||||
|
if "type" in attrs:
|
||||||
|
type = attrs["type"]
|
||||||
|
else:
|
||||||
|
type = None
|
||||||
|
|
||||||
|
if "shr" in attrs:
|
||||||
|
shr = int(attrs["shr"], 0)
|
||||||
|
else:
|
||||||
|
shr = 0
|
||||||
|
|
||||||
|
b = Field(name, low, high, shr, type, self)
|
||||||
|
|
||||||
|
if type == "fixed" or type == "ufixed":
|
||||||
|
b.radix = int(attrs["radix"], 0)
|
||||||
|
|
||||||
|
self.current_bitset.fields.append(b)
|
||||||
|
except ValueError as e:
|
||||||
|
raise self.error(e)
|
||||||
|
|
||||||
|
def parse_varset(self, attrs):
|
||||||
|
# Inherit the varset from the enclosing domain if not overriden:
|
||||||
|
varset = self.current_varset
|
||||||
|
if "varset" in attrs:
|
||||||
|
varset = self.enums[attrs["varset"]]
|
||||||
|
return varset
|
||||||
|
|
||||||
|
def parse_variants(self, attrs):
|
||||||
|
if not "variants" in attrs:
|
||||||
|
return None
|
||||||
|
variant = attrs["variants"].split(",")[0]
|
||||||
|
if "-" in variant:
|
||||||
|
variant = variant[:variant.index("-")]
|
||||||
|
|
||||||
|
varset = self.parse_varset(attrs)
|
||||||
|
|
||||||
|
assert varset.has_name(variant)
|
||||||
|
|
||||||
|
return variant
|
||||||
|
|
||||||
|
def add_all_variants(self, reg, attrs, parent_variant):
|
||||||
|
# TODO this should really handle *all* variants, including dealing
|
||||||
|
# with open ended ranges (ie. "A2XX,A4XX-") (we have the varset
|
||||||
|
# enum now to make that possible)
|
||||||
|
variant = self.parse_variants(attrs)
|
||||||
|
if not variant:
|
||||||
|
variant = parent_variant
|
||||||
|
|
||||||
|
if reg.name not in self.variant_regs:
|
||||||
|
self.variant_regs[reg.name] = {}
|
||||||
|
else:
|
||||||
|
# All variants must be same size:
|
||||||
|
v = next(iter(self.variant_regs[reg.name]))
|
||||||
|
assert self.variant_regs[reg.name][v].bit_size == reg.bit_size
|
||||||
|
|
||||||
|
self.variant_regs[reg.name][variant] = reg
|
||||||
|
|
||||||
|
def add_all_usages(self, reg, usages):
|
||||||
|
if not usages:
|
||||||
|
return
|
||||||
|
|
||||||
|
for usage in usages:
|
||||||
|
self.usage_regs[usage].append(reg)
|
||||||
|
|
||||||
|
self.variants.add(reg.domain)
|
||||||
|
|
||||||
|
def do_validate(self, schemafile):
|
||||||
|
try:
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
parser, filename = self.stack[-1]
|
||||||
|
dirname = os.path.dirname(filename)
|
||||||
|
|
||||||
|
# we expect this to look like <namespace url> schema.xsd.. I think
|
||||||
|
# technically it is supposed to be just a URL, but that doesn't
|
||||||
|
# quite match up to what we do.. Just skip over everything up to
|
||||||
|
# and including the first whitespace character:
|
||||||
|
schemafile = schemafile[schemafile.rindex(" ")+1:]
|
||||||
|
|
||||||
|
# this is a bit cheezy, but the xml file to validate could be
|
||||||
|
# in a child director, ie. we don't really know where the schema
|
||||||
|
# file is, the way the rnn C code does. So if it doesn't exist
|
||||||
|
# just look one level up
|
||||||
|
if not os.path.exists(dirname + "/" + schemafile):
|
||||||
|
schemafile = "../" + schemafile
|
||||||
|
|
||||||
|
if not os.path.exists(dirname + "/" + schemafile):
|
||||||
|
raise self.error("Cannot find schema for: " + filename)
|
||||||
|
|
||||||
|
xmlschema_doc = etree.parse(dirname + "/" + schemafile)
|
||||||
|
xmlschema = etree.XMLSchema(xmlschema_doc)
|
||||||
|
|
||||||
|
xml_doc = etree.parse(filename)
|
||||||
|
if not xmlschema.validate(xml_doc):
|
||||||
|
error_str = str(xmlschema.error_log.filter_from_errors()[0])
|
||||||
|
raise self.error("Schema validation failed for: " + filename + "\n" + error_str)
|
||||||
|
except ImportError:
|
||||||
|
print("lxml not found, skipping validation", file=sys.stderr)
|
||||||
|
|
||||||
|
def do_parse(self, filename):
|
||||||
|
filepath = os.path.abspath(filename)
|
||||||
|
if filepath in self.xml_files:
|
||||||
|
return
|
||||||
|
self.xml_files.append(filepath)
|
||||||
|
file = open(filename, "rb")
|
||||||
|
parser = xml.parsers.expat.ParserCreate()
|
||||||
|
self.stack.append((parser, filename))
|
||||||
|
parser.StartElementHandler = self.start_element
|
||||||
|
parser.EndElementHandler = self.end_element
|
||||||
|
parser.CharacterDataHandler = self.character_data
|
||||||
|
parser.buffer_text = True
|
||||||
|
parser.ParseFile(file)
|
||||||
|
self.stack.pop()
|
||||||
|
file.close()
|
||||||
|
|
||||||
|
def parse(self, rnn_path, filename):
|
||||||
|
self.path = rnn_path
|
||||||
|
self.stack = []
|
||||||
|
self.do_parse(filename)
|
||||||
|
|
||||||
|
def parse_reg(self, attrs, bit_size):
|
||||||
|
self.current_bitsize = bit_size
|
||||||
|
if "type" in attrs and attrs["type"] in self.bitsets:
|
||||||
|
bitset = self.bitsets[attrs["type"]]
|
||||||
|
if bitset.inline:
|
||||||
|
self.current_bitset = Bitset(attrs["name"], bitset)
|
||||||
|
self.current_bitset.inline = True
|
||||||
|
else:
|
||||||
|
self.current_bitset = bitset
|
||||||
|
else:
|
||||||
|
self.current_bitset = Bitset(attrs["name"], None)
|
||||||
|
self.current_bitset.inline = True
|
||||||
|
if "type" in attrs:
|
||||||
|
self.parse_field(None, attrs)
|
||||||
|
|
||||||
|
variant = self.parse_variants(attrs)
|
||||||
|
if not variant and self.current_array:
|
||||||
|
variant = self.current_array.variant
|
||||||
|
|
||||||
|
self.current_reg = Reg(attrs, self.prefix(variant), self.current_array, bit_size)
|
||||||
|
self.current_reg.bitset = self.current_bitset
|
||||||
|
|
||||||
|
if len(self.stack) == 1:
|
||||||
|
self.file.append(self.current_reg)
|
||||||
|
|
||||||
|
if variant is not None:
|
||||||
|
self.add_all_variants(self.current_reg, attrs, variant)
|
||||||
|
|
||||||
|
usages = None
|
||||||
|
if "usage" in attrs:
|
||||||
|
usages = attrs["usage"].split(',')
|
||||||
|
elif self.current_array:
|
||||||
|
usages = self.current_array.usages
|
||||||
|
|
||||||
|
self.add_all_usages(self.current_reg, usages)
|
||||||
|
|
||||||
|
def start_element(self, name, attrs):
|
||||||
|
self.cdata = ""
|
||||||
|
if name == "import":
|
||||||
|
filename = attrs["file"]
|
||||||
|
self.do_parse(os.path.join(self.path, filename))
|
||||||
|
elif name == "domain":
|
||||||
|
self.current_domain = attrs["name"]
|
||||||
|
if "prefix" in attrs:
|
||||||
|
self.current_prefix = self.parse_variants(attrs)
|
||||||
|
self.current_prefix_type = attrs["prefix"]
|
||||||
|
else:
|
||||||
|
self.current_prefix = None
|
||||||
|
self.current_prefix_type = None
|
||||||
|
if "varset" in attrs:
|
||||||
|
self.current_varset = self.enums[attrs["varset"]]
|
||||||
|
elif name == "stripe":
|
||||||
|
self.current_stripe = self.parse_variants(attrs)
|
||||||
|
elif name == "enum":
|
||||||
|
self.current_enum_value = 0
|
||||||
|
self.current_enum = Enum(attrs["name"])
|
||||||
|
self.enums[attrs["name"]] = self.current_enum
|
||||||
|
if len(self.stack) == 1:
|
||||||
|
self.file.append(self.current_enum)
|
||||||
|
elif name == "value":
|
||||||
|
if "value" in attrs:
|
||||||
|
value = int(attrs["value"], 0)
|
||||||
|
else:
|
||||||
|
value = self.current_enum_value
|
||||||
|
self.current_enum.values.append((attrs["name"], value))
|
||||||
|
elif name == "reg32":
|
||||||
|
self.parse_reg(attrs, 32)
|
||||||
|
elif name == "reg64":
|
||||||
|
self.parse_reg(attrs, 64)
|
||||||
|
elif name == "array":
|
||||||
|
self.current_bitsize = 32
|
||||||
|
variant = self.parse_variants(attrs)
|
||||||
|
self.current_array = Array(attrs, self.prefix(variant), variant)
|
||||||
|
if len(self.stack) == 1:
|
||||||
|
self.file.append(self.current_array)
|
||||||
|
elif name == "bitset":
|
||||||
|
self.current_bitset = Bitset(attrs["name"], None)
|
||||||
|
if "inline" in attrs and attrs["inline"] == "yes":
|
||||||
|
self.current_bitset.inline = True
|
||||||
|
self.bitsets[self.current_bitset.name] = self.current_bitset
|
||||||
|
if len(self.stack) == 1 and not self.current_bitset.inline:
|
||||||
|
self.file.append(self.current_bitset)
|
||||||
|
elif name == "bitfield" and self.current_bitset:
|
||||||
|
self.parse_field(attrs["name"], attrs)
|
||||||
|
elif name == "database":
|
||||||
|
self.do_validate(attrs["xsi:schemaLocation"])
|
||||||
|
elif name == "copyright":
|
||||||
|
self.copyright_year = attrs["year"]
|
||||||
|
elif name == "author":
|
||||||
|
self.authors.append(attrs["name"] + " <" + attrs["email"] + "> " + attrs["name"])
|
||||||
|
|
||||||
|
def end_element(self, name):
|
||||||
|
if name == "domain":
|
||||||
|
self.current_domain = None
|
||||||
|
self.current_prefix = None
|
||||||
|
self.current_prefix_type = None
|
||||||
|
elif name == "stripe":
|
||||||
|
self.current_stripe = None
|
||||||
|
elif name == "bitset":
|
||||||
|
self.current_bitset = None
|
||||||
|
elif name == "reg32":
|
||||||
|
self.current_reg = None
|
||||||
|
elif name == "array":
|
||||||
|
self.current_array = None
|
||||||
|
elif name == "enum":
|
||||||
|
self.current_enum = None
|
||||||
|
elif name == "license":
|
||||||
|
self.license = self.cdata
|
||||||
|
|
||||||
|
def character_data(self, data):
|
||||||
|
self.cdata += data
|
||||||
|
|
||||||
|
def dump_reg_usages(self):
|
||||||
|
d = collections.defaultdict(list)
|
||||||
|
for usage, regs in self.usage_regs.items():
|
||||||
|
for reg in regs:
|
||||||
|
variants = self.variant_regs.get(reg.name)
|
||||||
|
if variants:
|
||||||
|
for variant, vreg in variants.items():
|
||||||
|
if reg == vreg:
|
||||||
|
d[(usage, variant)].append(reg)
|
||||||
|
else:
|
||||||
|
for variant in self.variants:
|
||||||
|
d[(usage, variant)].append(reg)
|
||||||
|
|
||||||
|
print("#ifdef __cplusplus")
|
||||||
|
|
||||||
|
for usage, regs in self.usage_regs.items():
|
||||||
|
print("template<chip CHIP> constexpr inline uint16_t %s_REGS[] = {};" % (usage.upper()))
|
||||||
|
|
||||||
|
for (usage, variant), regs in d.items():
|
||||||
|
offsets = []
|
||||||
|
|
||||||
|
for reg in regs:
|
||||||
|
if reg.array:
|
||||||
|
for i in range(reg.array.length):
|
||||||
|
offsets.append(reg.array.offset + reg.offset + i * reg.array.stride)
|
||||||
|
if reg.bit_size == 64:
|
||||||
|
offsets.append(offsets[-1] + 1)
|
||||||
|
else:
|
||||||
|
offsets.append(reg.offset)
|
||||||
|
if reg.bit_size == 64:
|
||||||
|
offsets.append(offsets[-1] + 1)
|
||||||
|
|
||||||
|
offsets.sort()
|
||||||
|
|
||||||
|
print("template<> constexpr inline uint16_t %s_REGS<%s>[] = {" % (usage.upper(), variant))
|
||||||
|
for offset in offsets:
|
||||||
|
print("\t%s," % hex(offset))
|
||||||
|
print("};")
|
||||||
|
|
||||||
|
print("#endif")
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
enums = []
|
||||||
|
bitsets = []
|
||||||
|
regs = []
|
||||||
|
for e in self.file:
|
||||||
|
if isinstance(e, Enum):
|
||||||
|
enums.append(e)
|
||||||
|
elif isinstance(e, Bitset):
|
||||||
|
bitsets.append(e)
|
||||||
|
else:
|
||||||
|
regs.append(e)
|
||||||
|
|
||||||
|
for e in enums + bitsets + regs:
|
||||||
|
e.dump()
|
||||||
|
|
||||||
|
self.dump_reg_usages()
|
||||||
|
|
||||||
|
print("static inline char *ethosu_get_cmd_name(unsigned domain, uint32_t cmd) {")
|
||||||
|
for e in regs:
|
||||||
|
if e.array:
|
||||||
|
continue
|
||||||
|
domain = 0 if e.domain == "CMD0" else 1
|
||||||
|
print(" if (domain == %d && cmd == 0x%08x) return \"%s\";" % (domain, e.offset, e.full_name))
|
||||||
|
print(" return NULL;")
|
||||||
|
print("}\n")
|
||||||
|
|
||||||
|
def dump_regs_py(self):
|
||||||
|
regs = []
|
||||||
|
for e in self.file:
|
||||||
|
if isinstance(e, Reg):
|
||||||
|
regs.append(e)
|
||||||
|
|
||||||
|
for e in regs:
|
||||||
|
e.dump_py()
|
||||||
|
|
||||||
|
|
||||||
|
def dump_reg_variants(self, regname, variants):
|
||||||
|
# Don't bother for things that only have a single variant:
|
||||||
|
if len(variants) == 1:
|
||||||
|
return
|
||||||
|
print("#ifdef __cplusplus")
|
||||||
|
print("struct __%s {" % regname)
|
||||||
|
# TODO be more clever.. we should probably figure out which
|
||||||
|
# fields have the same type in all variants (in which they
|
||||||
|
# appear) and stuff everything else in a variant specific
|
||||||
|
# sub-structure.
|
||||||
|
seen_fields = []
|
||||||
|
bit_size = 32
|
||||||
|
array = False
|
||||||
|
address = None
|
||||||
|
for variant in variants.keys():
|
||||||
|
print(" /* %s fields: */" % variant)
|
||||||
|
reg = variants[variant]
|
||||||
|
bit_size = reg.bit_size
|
||||||
|
array = reg.array
|
||||||
|
for f in reg.bitset.fields:
|
||||||
|
fld_name = field_name(reg, f)
|
||||||
|
if fld_name in seen_fields:
|
||||||
|
continue
|
||||||
|
seen_fields.append(fld_name)
|
||||||
|
name = fld_name.lower()
|
||||||
|
if f.type in [ "address", "waddress" ]:
|
||||||
|
if address:
|
||||||
|
continue
|
||||||
|
address = f
|
||||||
|
tab_to(" __bo_type", "bo;")
|
||||||
|
tab_to(" uint32_t", "bo_offset;")
|
||||||
|
continue
|
||||||
|
type, val = f.ctype("var")
|
||||||
|
tab_to(" %s" %type, "%s;" %name)
|
||||||
|
print(" /* fallback fields: */")
|
||||||
|
if bit_size == 64:
|
||||||
|
tab_to(" uint64_t", "unknown;")
|
||||||
|
tab_to(" uint64_t", "qword;")
|
||||||
|
else:
|
||||||
|
tab_to(" uint32_t", "unknown;")
|
||||||
|
tab_to(" uint32_t", "dword;")
|
||||||
|
print("};")
|
||||||
|
# TODO don't hardcode the varset enum name
|
||||||
|
varenum = "chip"
|
||||||
|
print("template <%s %s>" % (varenum, varenum.upper()))
|
||||||
|
print("static inline struct fd_reg_pair")
|
||||||
|
xtra = ""
|
||||||
|
xtravar = ""
|
||||||
|
if array:
|
||||||
|
xtra = "int __i, "
|
||||||
|
xtravar = "__i, "
|
||||||
|
print("__%s(%sstruct __%s fields) {" % (regname, xtra, regname))
|
||||||
|
for variant in variants.keys():
|
||||||
|
print(" if (%s == %s) {" % (varenum.upper(), variant))
|
||||||
|
reg = variants[variant]
|
||||||
|
reg.dump_regpair_builder()
|
||||||
|
print(" } else")
|
||||||
|
print(" assert(!\"invalid variant\");")
|
||||||
|
print("}")
|
||||||
|
|
||||||
|
if bit_size == 64:
|
||||||
|
skip = ", { .reg = 0 }"
|
||||||
|
else:
|
||||||
|
skip = ""
|
||||||
|
|
||||||
|
print("#define %s(VARIANT, %s...) __%s<VARIANT>(%s{__VA_ARGS__})%s" % (regname, xtravar, regname, xtravar, skip))
|
||||||
|
print("#endif /* __cplusplus */")
|
||||||
|
|
||||||
|
def dump_structs(self):
|
||||||
|
for e in self.file:
|
||||||
|
e.dump_pack_struct()
|
||||||
|
|
||||||
|
for regname in self.variant_regs:
|
||||||
|
self.dump_reg_variants(regname, self.variant_regs[regname])
|
||||||
33
src/gallium/drivers/ethosu/meson.build
Normal file
33
src/gallium/drivers/ethosu/meson.build
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
# Copyright 2019 Google, Inc
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
ethosu_registers = custom_target(
|
||||||
|
'ethosu_registers.h',
|
||||||
|
input : ['gen_parser.py', 'gen_header.py', 'registers.xml'],
|
||||||
|
output : 'ethosu_registers.h',
|
||||||
|
command : [prog_python, '@INPUT1@', '--rnn', '.', '--xml', '@INPUT2@', 'c-defines'],
|
||||||
|
capture : true,
|
||||||
|
)
|
||||||
|
|
||||||
|
files_ethosu = files(
|
||||||
|
'ethosu_cmd.c',
|
||||||
|
'ethosu_coefs.c',
|
||||||
|
'ethosu_device.c',
|
||||||
|
'ethosu_lower.c',
|
||||||
|
'ethosu_ml.c',
|
||||||
|
'ethosu_sched.c',
|
||||||
|
'mlw_codec/mlw_encode.c',
|
||||||
|
)
|
||||||
|
|
||||||
|
libethosu = static_library(
|
||||||
|
'ethosu',
|
||||||
|
[files_ethosu, ethosu_registers],
|
||||||
|
include_directories : [inc_gallium_aux, inc_gallium, inc_include, inc_src],
|
||||||
|
gnu_symbol_visibility : 'hidden',
|
||||||
|
dependencies : [idep_mesautil, dep_libdrm],
|
||||||
|
)
|
||||||
|
|
||||||
|
driver_ethosu = declare_dependency(
|
||||||
|
compile_args : '-DGALLIUM_ETHOSU',
|
||||||
|
link_with : [libethosuwinsys, libethosu]
|
||||||
|
)
|
||||||
29
src/gallium/drivers/ethosu/mlw_codec/mlw_common.h
Normal file
29
src/gallium/drivers/ethosu/mlw_codec/mlw_common.h
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright 2020, 2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifndef MLW_COMMON_H
|
||||||
|
#define MLW_COMMON_H
|
||||||
|
|
||||||
|
#define ZDIV_DISABLE 6 // not alternating mode
|
||||||
|
#define ZDIV_EOS 7 // indicates end of stream
|
||||||
|
|
||||||
|
#define WDIV_UNCOMPRESSED 7 // indicates uncompressed weights
|
||||||
|
|
||||||
|
#endif
|
||||||
1186
src/gallium/drivers/ethosu/mlw_codec/mlw_encode.c
Normal file
1186
src/gallium/drivers/ethosu/mlw_codec/mlw_encode.c
Normal file
File diff suppressed because it is too large
Load diff
65
src/gallium/drivers/ethosu/mlw_codec/mlw_encode.h
Normal file
65
src/gallium/drivers/ethosu/mlw_codec/mlw_encode.h
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: Copyright 2020-2022 Arm Limited and/or its affiliates <open-source-office@arm.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#ifndef MLW_ENCODE_H
|
||||||
|
#define MLW_ENCODE_H
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#define MLW_ENCODE_EXPORTED __declspec(dllexport)
|
||||||
|
#else
|
||||||
|
#define MLW_ENCODE_EXPORTED __attribute__((visibility("default")))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __cplusplus
|
||||||
|
extern "C"
|
||||||
|
{
|
||||||
|
#endif
|
||||||
|
|
||||||
|
MLW_ENCODE_EXPORTED
|
||||||
|
int mlw_encode(int16_t *inbuf, int inbuf_size, uint8_t **outbuf, int verbose);
|
||||||
|
|
||||||
|
MLW_ENCODE_EXPORTED
|
||||||
|
void mlw_free_outbuf(uint8_t *outbuf);
|
||||||
|
|
||||||
|
MLW_ENCODE_EXPORTED
|
||||||
|
int mlw_reorder_encode(
|
||||||
|
int ifm_ublock_depth,
|
||||||
|
int ofm_ublock_depth,
|
||||||
|
int ofm_depth,
|
||||||
|
int kernel_height,
|
||||||
|
int kernel_width,
|
||||||
|
int ifm_depth,
|
||||||
|
int* brick_strides,
|
||||||
|
int16_t* inbuf,
|
||||||
|
int ofm_block_depth,
|
||||||
|
int is_depthwise,
|
||||||
|
int is_partkernel,
|
||||||
|
int ifm_bitdepth,
|
||||||
|
int decomp_h,
|
||||||
|
int decomp_w,
|
||||||
|
uint8_t **outbuf,
|
||||||
|
int64_t* padded_length,
|
||||||
|
int verbose);
|
||||||
|
|
||||||
|
#if __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
399
src/gallium/drivers/ethosu/registers.xml
Normal file
399
src/gallium/drivers/ethosu/registers.xml
Normal file
|
|
@ -0,0 +1,399 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<database xmlns="http://nouveau.freedesktop.org/"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://nouveau.freedesktop.org/ rules-ng.xsd">
|
||||||
|
|
||||||
|
<copyright year="2024">
|
||||||
|
|
||||||
|
<author name="Tomeu Vizoso" email="tomeu@tomeuvizoso.net"><nick name="tomeu"/>
|
||||||
|
Initial Author.
|
||||||
|
</author>
|
||||||
|
|
||||||
|
<license>
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice (including the
|
||||||
|
next paragraph) shall be included in all copies or substantial
|
||||||
|
portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
|
||||||
|
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||||
|
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
</license>
|
||||||
|
|
||||||
|
</copyright>
|
||||||
|
|
||||||
|
<domain name="CMD0" width="32">
|
||||||
|
<reg32 offset="0x0" name="NPU_OP_STOP">
|
||||||
|
<bitfield name="mask" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x1" name="NPU_OP_IRQ">
|
||||||
|
<bitfield name="mask" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x2" name="NPU_OP_CONV">
|
||||||
|
<bitfield name="reserved0" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x3" name="NPU_OP_DEPTHWISE">
|
||||||
|
<bitfield name="reserved0" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x5" name="NPU_OP_POOL">
|
||||||
|
<bitfield name="mode" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x6" name="NPU_OP_ELEMENTWISE">
|
||||||
|
<bitfield name="mode" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x10" name="NPU_OP_DMA_START">
|
||||||
|
<bitfield name="channel_mode" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x11" name="NPU_OP_DMA_WAIT">
|
||||||
|
<bitfield name="reserved0" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x12" name="NPU_OP_KERNEL_WAIT">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x13" name="NPU_OP_PMU_MASK">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x100" name="NPU_SET_IFM_PAD_TOP">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x101" name="NPU_SET_IFM_PAD_LEFT">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x102" name="NPU_SET_IFM_PAD_RIGHT">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x103" name="NPU_SET_IFM_PAD_BOTTOM">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x104" name="NPU_SET_IFM_DEPTH_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x105" name="NPU_SET_IFM_PRECISION">
|
||||||
|
<bitfield name="round_mode" low="14" high="15" type="uint"/>
|
||||||
|
<bitfield name="reserved2" low="10" high="13" type="uint"/>
|
||||||
|
<bitfield name="scale_mode" low="8" high="9" type="uint"/>
|
||||||
|
<bitfield name="format" low="6" high="7" type="uint"/>
|
||||||
|
<bitfield name="reserved1" low="4" high="5" type="uint"/>
|
||||||
|
<bitfield name="precision" low="2" high="3" type="uint"/>
|
||||||
|
<bitfield name="reserved0" low="1" high="1" type="uint"/>
|
||||||
|
<bitfield name="activation" low="0" high="0" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x107" name="NPU_SET_IFM_UPSCALE">
|
||||||
|
<bitfield name="reserved0" low="2" high="15" type="uint"/>
|
||||||
|
<bitfield name="mode" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x109" name="NPU_SET_IFM_ZERO_POINT">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x10a" name="NPU_SET_IFM_WIDTH0_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x10b" name="NPU_SET_IFM_HEIGHT0_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x10c" name="NPU_SET_IFM_HEIGHT1_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x10d" name="NPU_SET_IFM_IB_END">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x10f" name="NPU_SET_IFM_REGION">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x111" name="NPU_SET_OFM_WIDTH_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x112" name="NPU_SET_OFM_HEIGHT_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x113" name="NPU_SET_OFM_DEPTH_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x114" name="NPU_SET_OFM_PRECISION">
|
||||||
|
<bitfield name="round_mode" low="14" high="15" type="uint"/>
|
||||||
|
<bitfield name="reserved1" low="9" high="13" type="uint"/>
|
||||||
|
<bitfield name="scale_mode" low="8" high="8" type="uint"/>
|
||||||
|
<bitfield name="format" low="6" high="7" type="uint"/>
|
||||||
|
<bitfield name="reserved0" low="3" high="5" type="uint"/>
|
||||||
|
<bitfield name="precision" low="1" high="2" type="uint"/>
|
||||||
|
<bitfield name="activation" low="0" high="0" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x115" name="NPU_SET_OFM_BLK_WIDTH_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x116" name="NPU_SET_OFM_BLK_HEIGHT_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x117" name="NPU_SET_OFM_BLK_DEPTH_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x118" name="NPU_SET_OFM_ZERO_POINT">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x11a" name="NPU_SET_OFM_WIDTH0_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x11b" name="NPU_SET_OFM_HEIGHT0_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x11c" name="NPU_SET_OFM_HEIGHT1_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x11f" name="NPU_SET_OFM_REGION">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x120" name="NPU_SET_KERNEL_WIDTH_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x121" name="NPU_SET_KERNEL_HEIGHT_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x122" name="NPU_SET_KERNEL_STRIDE">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x123" name="NPU_SET_PARALLEL_MODE">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x124" name="NPU_SET_ACC_FORMAT">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x125" name="NPU_SET_ACTIVATION">
|
||||||
|
<bitfield name="act_clip_range" low="12" high="15" type="uint"/>
|
||||||
|
<bitfield name="type" low="0" high="11" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x126" name="NPU_SET_ACTIVATION_MIN">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x127" name="NPU_SET_ACTIVATION_MAX">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x128" name="NPU_SET_WEIGHT_REGION">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x129" name="NPU_SET_SCALE_REGION">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x12d" name="NPU_SET_AB_START">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x12f" name="NPU_SET_BLOCKDEP">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x130" name="NPU_SET_DMA0_SRC_REGION">
|
||||||
|
<bitfield name="reserved0" low="11" high="15" type="uint"/>
|
||||||
|
<bitfield name="stride_mode" low="9" high="10" type="uint"/>
|
||||||
|
<bitfield name="internal" low="8" high="8" type="uint"/>
|
||||||
|
<bitfield name="region" low="0" high="7" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x131" name="NPU_SET_DMA0_DST_REGION">
|
||||||
|
<bitfield name="reserved0" low="11" high="15" type="uint"/>
|
||||||
|
<bitfield name="stride_mode" low="9" high="10" type="uint"/>
|
||||||
|
<bitfield name="internal" low="8" high="8" type="uint"/>
|
||||||
|
<bitfield name="region" low="0" high="7" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x132" name="NPU_SET_DMA0_SIZE0">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x133" name="NPU_SET_DMA0_SIZE1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x180" name="NPU_SET_IFM2_BROADCAST">
|
||||||
|
<bitfield name="reserved1" low="8" high="15" type="uint"/>
|
||||||
|
<bitfield name="broadcast_scalar" low="7" high="7" type="uint"/>
|
||||||
|
<bitfield name="operand_order" low="6" high="6" type="uint"/>
|
||||||
|
<bitfield name="reserved0" low="3" high="5" type="uint"/>
|
||||||
|
<bitfield name="broadcast_depth" low="2" high="2" type="uint"/>
|
||||||
|
<bitfield name="broadcast_width" low="1" high="1" type="uint"/>
|
||||||
|
<bitfield name="broadcast_height" low="0" high="0" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x181" name="NPU_SET_IFM2_SCALAR">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x185" name="NPU_SET_IFM2_PRECISION">
|
||||||
|
<bitfield name="reserved1" low="8" high="15" type="uint"/>
|
||||||
|
<bitfield name="format" low="6" high="7" type="uint"/>
|
||||||
|
<bitfield name="reserved0" low="4" high="5" type="uint"/>
|
||||||
|
<bitfield name="precision" low="0" high="3" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x189" name="NPU_SET_IFM2_ZERO_POINT">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x18a" name="NPU_SET_IFM2_WIDTH0_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x18b" name="NPU_SET_IFM2_HEIGHT0_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x18c" name="NPU_SET_IFM2_HEIGHT1_M1">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x18d" name="NPU_SET_IFM2_IB_START">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x18f" name="NPU_SET_IFM2_REGION">
|
||||||
|
<bitfield name="param" low="0" high="15" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
</domain>
|
||||||
|
<domain name="CMD1" width="32">
|
||||||
|
<reg32 offset="0x0" name="NPU_SET_IFM_BASE0">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x1" name="NPU_SET_IFM_BASE1">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x2" name="NPU_SET_IFM_BASE2">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x3" name="NPU_SET_IFM_BASE3">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x4" name="NPU_SET_IFM_STRIDE_X">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x5" name="NPU_SET_IFM_STRIDE_Y">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x6" name="NPU_SET_IFM_STRIDE_C">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x10" name="NPU_SET_OFM_BASE0">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x11" name="NPU_SET_OFM_BASE1">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x12" name="NPU_SET_OFM_BASE2">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x13" name="NPU_SET_OFM_BASE3">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x14" name="NPU_SET_OFM_STRIDE_X">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x15" name="NPU_SET_OFM_STRIDE_Y">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x16" name="NPU_SET_OFM_STRIDE_C">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x20" name="NPU_SET_WEIGHT_BASE">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x21" name="NPU_SET_WEIGHT_LENGTH">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x22" name="NPU_SET_SCALE_BASE">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x23" name="NPU_SET_SCALE_LENGTH">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x24" name="NPU_SET_OFM_SCALE">
|
||||||
|
<bitfield name="shift" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x25" name="NPU_SET_OPA_SCALE">
|
||||||
|
<bitfield name="shift" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x26" name="NPU_SET_OPB_SCALE">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x30" name="NPU_SET_DMA0_SRC">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x31" name="NPU_SET_DMA0_DST">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x32" name="NPU_SET_DMA0_LEN">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x33" name="NPU_SET_DMA0_SKIP0">
|
||||||
|
<bitfield name="param" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x34" name="NPU_SET_DMA0_SKIP1">
|
||||||
|
<bitfield name="param" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x80" name="NPU_SET_IFM2_BASE0">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x81" name="NPU_SET_IFM2_BASE1">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x82" name="NPU_SET_IFM2_BASE2">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x83" name="NPU_SET_IFM2_BASE3">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x84" name="NPU_SET_IFM2_STRIDE_X">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x85" name="NPU_SET_IFM2_STRIDE_Y">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x86" name="NPU_SET_IFM2_STRIDE_C">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x90" name="NPU_SET_WEIGHT1_BASE">
|
||||||
|
<bitfield name="param" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x91" name="NPU_SET_WEIGHT1_LENGTH">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x92" name="NPU_SET_SCALE1_BASE">
|
||||||
|
<bitfield name="param" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
<reg32 offset="0x93" name="NPU_SET_SCALE1_LENGTH">
|
||||||
|
<bitfield name="reserved0" low="2" high="17" type="uint"/>
|
||||||
|
<bitfield name="payload_size" low="0" high="1" type="uint"/>
|
||||||
|
</reg32>
|
||||||
|
</domain>
|
||||||
|
|
||||||
|
</database>
|
||||||
457
src/gallium/drivers/ethosu/rules-ng.xsd
Normal file
457
src/gallium/drivers/ethosu/rules-ng.xsd
Normal file
|
|
@ -0,0 +1,457 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<schema xmlns="http://www.w3.org/2001/XMLSchema"
|
||||||
|
targetNamespace="http://nouveau.freedesktop.org/"
|
||||||
|
xmlns:rng="http://nouveau.freedesktop.org/"
|
||||||
|
elementFormDefault="qualified">
|
||||||
|
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
An updated version of the old rules.xml file from the
|
||||||
|
RivaTV project. Specifications by Pekka Paalanen,
|
||||||
|
preliminary attempt by KoalaBR,
|
||||||
|
first working version by Jakob Bornecrantz.
|
||||||
|
For specifications, see the file rules-ng-format.txt
|
||||||
|
in Nouveau CVS module 'rules-ng'.
|
||||||
|
</documentation>
|
||||||
|
<documentation>Version 0.1</documentation>
|
||||||
|
</annotation>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Elements -->
|
||||||
|
|
||||||
|
<element name="database" type="rng:databaseType" />
|
||||||
|
<element name="import" type="rng:importType" />
|
||||||
|
<element name="copyright" type="rng:copyrightType" />
|
||||||
|
<element name="domain" type="rng:domainType" />
|
||||||
|
<element name="group" type="rng:groupType" />
|
||||||
|
<element name="use-group" type="rng:refType" />
|
||||||
|
<element name="array" type="rng:arrayType" />
|
||||||
|
<element name="stripe" type="rng:stripeType" />
|
||||||
|
<element name="reg64" type="rng:registerType" />
|
||||||
|
<element name="reg32" type="rng:registerType" />
|
||||||
|
<element name="reg16" type="rng:registerType" />
|
||||||
|
<element name="reg8" type="rng:registerType" />
|
||||||
|
<element name="bitset" type="rng:bitsetType" />
|
||||||
|
<element name="bitfield" type="rng:bitfieldType" />
|
||||||
|
<element name="enum" type="rng:enumType" />
|
||||||
|
<element name="value" type="rng:valueType" />
|
||||||
|
|
||||||
|
<!-- Copyright elements -->
|
||||||
|
<element name="author" type="rng:authorType" />
|
||||||
|
<element name="nick" type="rng:nickType" />
|
||||||
|
<element name="license" type="rng:docType" />
|
||||||
|
|
||||||
|
<!-- Documentation elements -->
|
||||||
|
|
||||||
|
<!-- FIXME: allowed only one per parent element -->
|
||||||
|
<element name="brief" type="rng:briefType" />
|
||||||
|
|
||||||
|
<element name="doc" type="rng:docType" />
|
||||||
|
<element name="b" type="rng:textformatType" />
|
||||||
|
<element name="i" type="rng:textformatType" />
|
||||||
|
<element name="u" type="rng:textformatType" />
|
||||||
|
<element name="code" type="rng:textcodeType" />
|
||||||
|
<element name="ul" type="rng:listType" />
|
||||||
|
<element name="ol" type="rng:listType" />
|
||||||
|
<element name="li" type="rng:listitemType" />
|
||||||
|
|
||||||
|
<!-- Copyright element types -->
|
||||||
|
|
||||||
|
<complexType name="authorType" mixed="true">
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
register database author
|
||||||
|
</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<element ref="rng:nick" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="string" use="required" />
|
||||||
|
<attribute name="email" type="string" use="required" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="nickType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>nickType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<attribute name="name" type="string" use="required" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<!-- Database element types -->
|
||||||
|
|
||||||
|
<complexType name="databaseType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>databaseType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
</choice>
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="importType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>importType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<attribute name="file" type="string" use="required" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="copyrightType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>copyrightType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
<element ref="rng:author" />
|
||||||
|
<element ref="rng:license" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="year" type="nonNegativeInteger" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="domainType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>domainType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
<group ref="rng:regarrayGroup" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="required" />
|
||||||
|
<attribute name="bare" type="rng:Boolean" use="optional" />
|
||||||
|
<attribute name="prefix" type="NMTOKENS" use="optional" />
|
||||||
|
<attribute name="width" type="rng:DomainWidth" use="optional" />
|
||||||
|
<attribute name="size" type="rng:HexOrNumber" use="optional" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="variants" type="string" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="groupType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>groupType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
<group ref="rng:regarrayGroup" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="required" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="arrayType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>arrayType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
<group ref="rng:regarrayGroup" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="offset" type="rng:HexOrNumber" use="optional" />
|
||||||
|
<attribute name="offsets" type="string" use="optional"/>
|
||||||
|
<attribute name="doffsets" type="string" use="optional"/>
|
||||||
|
<attribute name="index" type="NMTOKENS" use="optional"/>
|
||||||
|
<attribute name="stride" type="rng:HexOrNumber" use="required" />
|
||||||
|
<attribute name="length" type="rng:HexOrNumber" use="required" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="variants" type="string" use="optional" />
|
||||||
|
<attribute name="usage" type="string" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="stripeType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>stripeType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
<group ref="rng:regarrayGroup" minOccurs="0" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="offset" type="rng:HexOrNumber" use="optional" />
|
||||||
|
<attribute name="stride" type="rng:HexOrNumber" use="optional" />
|
||||||
|
<attribute name="length" type="rng:HexOrNumber" use="optional" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="variants" type="string" use="optional" />
|
||||||
|
<attribute name="prefix" type="NMTOKENS" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="registerType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
registerType used by reg8, reg16, reg32, reg64
|
||||||
|
</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
<element ref="rng:value" />
|
||||||
|
<element ref="rng:bitfield" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="required" />
|
||||||
|
<attribute name="offset" type="rng:HexOrNumber" use="required" />
|
||||||
|
<attribute name="access" type="rng:Access" default="rw" use="optional" />
|
||||||
|
<attribute name="type" type="NMTOKENS" use="optional" />
|
||||||
|
<attribute name="shr" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="variants" type="string" use="optional" />
|
||||||
|
<attribute name="stride" type="rng:HexOrNumber" use="optional" />
|
||||||
|
<attribute name="length" type="rng:HexOrNumber" use="optional" />
|
||||||
|
<attribute name="high" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="low" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="pos" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="align" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="radix" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="usage" type="string" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="bitsetType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>bitsetType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice maxOccurs="unbounded">
|
||||||
|
<element ref="rng:bitfield" />
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="required" />
|
||||||
|
<attribute name="inline" type="rng:Boolean" use="optional" />
|
||||||
|
<attribute name="bare" type="rng:Boolean" use="optional" />
|
||||||
|
<attribute name="prefix" type="NMTOKENS" use="optional" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="bitfieldType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>bitfieldType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<element ref="rng:value" maxOccurs="unbounded" />
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="required" />
|
||||||
|
<attribute name="high" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="low" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="pos" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="radix" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="align" type="nonNegativeInteger" use="optional" />
|
||||||
|
<attribute name="type" type="NMTOKENS" use="optional" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="variants" type="string" use="optional" />
|
||||||
|
<attribute name="addvariant" type="rng:Boolean" use="optional" />
|
||||||
|
<attribute name="shr" type="nonNegativeInteger" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="enumType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>enumType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice maxOccurs="unbounded">
|
||||||
|
<element ref="rng:value" />
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="required" />
|
||||||
|
<attribute name="inline" type="rng:Boolean" use="optional" />
|
||||||
|
<attribute name="bare" type="rng:Boolean" use="optional" />
|
||||||
|
<attribute name="prefix" type="NMTOKENS" use="optional" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="valueType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>valueType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:docGroup" />
|
||||||
|
<group ref="rng:topGroup" />
|
||||||
|
</choice>
|
||||||
|
<attribute name="name" type="NMTOKEN" use="required" />
|
||||||
|
<attribute name="value" type="string" use="optional" />
|
||||||
|
<attribute name="varset" type="NMTOKEN" use="optional" />
|
||||||
|
<attribute name="variants" type="string" use="optional" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="refType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>refType</documentation>
|
||||||
|
</annotation>
|
||||||
|
<attribute name="ref" type="NMTOKEN" use="required" />
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Documentation element types -->
|
||||||
|
|
||||||
|
<complexType name="briefType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
brief documentation, no markup
|
||||||
|
</documentation>
|
||||||
|
</annotation>
|
||||||
|
<simpleContent>
|
||||||
|
<extension base="string" />
|
||||||
|
</simpleContent>
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="docType" mixed="true">
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
root element of documentation sub-tree
|
||||||
|
</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:textformatGroup" />
|
||||||
|
<group ref="rng:listGroup" />
|
||||||
|
<element ref="rng:code" />
|
||||||
|
</choice>
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="textformatType" mixed="true">
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
for bold, underline, italics
|
||||||
|
</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:textformatGroup" />
|
||||||
|
</choice>
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="textcodeType">
|
||||||
|
<simpleContent>
|
||||||
|
<extension base="string">
|
||||||
|
<attribute name="title" type="string" />
|
||||||
|
</extension>
|
||||||
|
</simpleContent>
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="listType">
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
definition of a list, ordered or unordered
|
||||||
|
</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<element ref="rng:li" />
|
||||||
|
</choice>
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
<complexType name="listitemType" mixed="true">
|
||||||
|
<annotation>
|
||||||
|
<documentation>
|
||||||
|
items of a list
|
||||||
|
</documentation>
|
||||||
|
</annotation>
|
||||||
|
<choice minOccurs="0" maxOccurs="unbounded">
|
||||||
|
<group ref="rng:textformatGroup" />
|
||||||
|
<group ref="rng:listGroup" />
|
||||||
|
<element ref="rng:code" />
|
||||||
|
</choice>
|
||||||
|
</complexType>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Attribute value types -->
|
||||||
|
|
||||||
|
<simpleType name="Hexadecimal">
|
||||||
|
<restriction base="string">
|
||||||
|
<pattern value="0x[0-9a-f]+" />
|
||||||
|
<pattern value="0x[0-9A-F]+" />
|
||||||
|
<pattern value="[0-9]" />
|
||||||
|
</restriction>
|
||||||
|
</simpleType>
|
||||||
|
|
||||||
|
<simpleType name="HexOrNumber">
|
||||||
|
<annotation>
|
||||||
|
<documentation>HexOrNumber</documentation>
|
||||||
|
</annotation>
|
||||||
|
<union memberTypes="rng:Hexadecimal nonNegativeInteger" />
|
||||||
|
</simpleType>
|
||||||
|
|
||||||
|
<simpleType name="Boolean">
|
||||||
|
<restriction base="string">
|
||||||
|
<enumeration value="true" />
|
||||||
|
<enumeration value="1" />
|
||||||
|
<enumeration value="yes" />
|
||||||
|
<enumeration value="false" />
|
||||||
|
<enumeration value="0" />
|
||||||
|
<enumeration value="no" />
|
||||||
|
</restriction>
|
||||||
|
</simpleType>
|
||||||
|
|
||||||
|
<simpleType name="Access">
|
||||||
|
<annotation>
|
||||||
|
<documentation>Access</documentation>
|
||||||
|
</annotation>
|
||||||
|
<restriction base="string">
|
||||||
|
<enumeration value="r" />
|
||||||
|
<enumeration value="w" />
|
||||||
|
<enumeration value="rw" />
|
||||||
|
</restriction>
|
||||||
|
</simpleType>
|
||||||
|
|
||||||
|
<simpleType name="DomainWidth">
|
||||||
|
<annotation>
|
||||||
|
<documentation>DomainWidth</documentation>
|
||||||
|
</annotation>
|
||||||
|
<restriction base="string">
|
||||||
|
<enumeration value="8" />
|
||||||
|
<enumeration value="16" />
|
||||||
|
<enumeration value="32" />
|
||||||
|
<enumeration value="64" />
|
||||||
|
</restriction>
|
||||||
|
</simpleType>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<!-- Element groups -->
|
||||||
|
|
||||||
|
<group name="topGroup">
|
||||||
|
<choice>
|
||||||
|
<element ref="rng:copyright" />
|
||||||
|
<element ref="rng:domain" />
|
||||||
|
<element ref="rng:enum" />
|
||||||
|
<element ref="rng:group" />
|
||||||
|
<element ref="rng:bitset" />
|
||||||
|
<element ref="rng:import" />
|
||||||
|
</choice>
|
||||||
|
</group>
|
||||||
|
|
||||||
|
<group name="regarrayGroup">
|
||||||
|
<choice>
|
||||||
|
<element ref="rng:reg64" />
|
||||||
|
<element ref="rng:reg32" />
|
||||||
|
<element ref="rng:reg16" />
|
||||||
|
<element ref="rng:reg8" />
|
||||||
|
<element ref="rng:array" />
|
||||||
|
<element ref="rng:stripe" />
|
||||||
|
<element ref="rng:use-group" />
|
||||||
|
</choice>
|
||||||
|
</group>
|
||||||
|
|
||||||
|
<group name="docGroup">
|
||||||
|
<choice>
|
||||||
|
<element ref="rng:brief" />
|
||||||
|
<element ref="rng:doc" />
|
||||||
|
</choice>
|
||||||
|
</group>
|
||||||
|
|
||||||
|
<group name="textformatGroup">
|
||||||
|
<choice>
|
||||||
|
<element ref="rng:b" />
|
||||||
|
<element ref="rng:i" />
|
||||||
|
<element ref="rng:u" />
|
||||||
|
</choice>
|
||||||
|
</group>
|
||||||
|
|
||||||
|
<group name="listGroup">
|
||||||
|
<choice>
|
||||||
|
<element ref="rng:ul" />
|
||||||
|
<element ref="rng:ol" />
|
||||||
|
</choice>
|
||||||
|
</group>
|
||||||
|
|
||||||
|
</schema>
|
||||||
|
|
@ -190,6 +190,12 @@ if with_gallium_rocket
|
||||||
else
|
else
|
||||||
driver_rocket = declare_dependency()
|
driver_rocket = declare_dependency()
|
||||||
endif
|
endif
|
||||||
|
if with_gallium_ethosu
|
||||||
|
subdir('winsys/ethosu/drm')
|
||||||
|
subdir('drivers/ethosu')
|
||||||
|
else
|
||||||
|
driver_ethosu = declare_dependency()
|
||||||
|
endif
|
||||||
if with_gallium_zink
|
if with_gallium_zink
|
||||||
subdir('drivers/zink')
|
subdir('drivers/zink')
|
||||||
else
|
else
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ libgallium_dri = shared_library(
|
||||||
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
|
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
|
||||||
driver_tegra, driver_i915, driver_svga, driver_virgl,
|
driver_tegra, driver_i915, driver_svga, driver_virgl,
|
||||||
driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
|
driver_panfrost, driver_iris, driver_lima, driver_zink, driver_d3d12,
|
||||||
driver_asahi, driver_crocus, driver_rocket
|
driver_asahi, driver_crocus, driver_rocket, driver_ethosu
|
||||||
],
|
],
|
||||||
install : true,
|
install : true,
|
||||||
name_suffix : libname_suffix,
|
name_suffix : libname_suffix,
|
||||||
|
|
|
||||||
17
src/gallium/winsys/ethosu/drm/ethosu_drm_public.h
Normal file
17
src/gallium/winsys/ethosu/drm/ethosu_drm_public.h
Normal file
|
|
@ -0,0 +1,17 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2014 Broadcom
|
||||||
|
* Copyright 2018 Alyssa Rosenzweig
|
||||||
|
* Copyright 2025 Tomeu Vizoso
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __ETHOSU_DRM_PUBLIC_H__
|
||||||
|
#define __ETHOSU_DRM_PUBLIC_H__
|
||||||
|
|
||||||
|
struct pipe_screen;
|
||||||
|
struct pipe_screen_config;
|
||||||
|
|
||||||
|
struct pipe_screen *
|
||||||
|
ethosu_drm_screen_create(int drmFD, const struct pipe_screen_config *config);
|
||||||
|
|
||||||
|
#endif /* __ETHOSU_DRM_PUBLIC_H__ */
|
||||||
19
src/gallium/winsys/ethosu/drm/ethosu_drm_winsys.c
Normal file
19
src/gallium/winsys/ethosu/drm/ethosu_drm_winsys.c
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
/*
|
||||||
|
* Copyright 2014 Broadcom
|
||||||
|
* Copyright 2018 Alyssa Rosenzweig
|
||||||
|
* Copyright 2025 Tomeu Vizoso
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "util/os_file.h"
|
||||||
|
#include "util/u_screen.h"
|
||||||
|
|
||||||
|
#include "ethosu/ethosu_device.h"
|
||||||
|
#include "ethosu_drm_public.h"
|
||||||
|
|
||||||
|
struct pipe_screen *
|
||||||
|
ethosu_drm_screen_create(int fd, const struct pipe_screen_config *config)
|
||||||
|
{
|
||||||
|
return u_pipe_screen_lookup_or_create(os_dupfd_cloexec(fd), config, NULL,
|
||||||
|
ethosu_screen_create);
|
||||||
|
}
|
||||||
13
src/gallium/winsys/ethosu/drm/meson.build
Normal file
13
src/gallium/winsys/ethosu/drm/meson.build
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
# Copyright 2017 Broadcom
|
||||||
|
# SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
libethosuwinsys = static_library(
|
||||||
|
'ethosuwinsys',
|
||||||
|
files('ethosu_drm_winsys.c'),
|
||||||
|
include_directories : [
|
||||||
|
inc_src, inc_include,
|
||||||
|
inc_gallium, inc_gallium_aux, inc_gallium_drivers,
|
||||||
|
],
|
||||||
|
gnu_symbol_visibility : 'hidden',
|
||||||
|
dependencies: [dep_libdrm, idep_mesautil],
|
||||||
|
)
|
||||||
Loading…
Add table
Reference in a new issue