mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 09:28:07 +02:00
gallium: add pipe_ml_device, pipe_screen::get_ml_device()
For compiling models, we don't really need a context for a real device. To support ML frameworks models in which compilation happens ahead-of-time (AoT), add API for compilation that doesn't require a pipe_context. Add struct pipe_ml_device with function pointers for: - ml_operation_supported: query operation support - ml_subgraph_create: compile a subgraph - ml_subgraph_serialize: serialize a compiled subgraph - ml_subgraph_destroy: free subgraph resources Move ml_operation_supported, ml_subgraph_create, and ml_subgraph_destroy from pipe_context to pipe_ml_device. Add pipe_screen::get_ml_device() to obtain the device. Change pipe_ml_subgraph.context (pipe_context*) to pipe_ml_subgraph.device (pipe_ml_device*). Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40167>
This commit is contained in:
parent
1d4d1fc61d
commit
16e15ee205
30 changed files with 491 additions and 278 deletions
|
|
@ -192,7 +192,7 @@ emit_ifm_precision(struct ethosu_subgraph *subgraph,
|
|||
if (feature_map->is_signed)
|
||||
prec |= NPU_SET_IFM_PRECISION_ACTIVATION(1); // signed activation
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
prec |= NPU_SET_IFM_PRECISION_SCALE_MODE(op_to_scale);
|
||||
|
||||
EMIT0(precision_cmd, prec);
|
||||
|
|
@ -222,13 +222,13 @@ emit_ofm(struct ethosu_subgraph *subgraph, struct ethosu_feature_map *feature_ma
|
|||
EMIT0(NPU_SET_OFM_HEIGHT_M1, feature_map->shape.height - 1);
|
||||
EMIT0(NPU_SET_OFM_WIDTH_M1, feature_map->shape.width - 1);
|
||||
|
||||
if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1);
|
||||
|
||||
emit_tiles(
|
||||
subgraph, feature_map, NPU_SET_OFM_HEIGHT0_M1, NPU_SET_OFM_HEIGHT1_M1, NPU_SET_OFM_WIDTH0_M1);
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
EMIT0(NPU_SET_OFM_DEPTH_M1, feature_map->shape.depth - 1);
|
||||
|
||||
emit_strides(subgraph, feature_map, NPU_SET_OFM_STRIDE_C, NPU_SET_OFM_STRIDE_Y, NPU_SET_OFM_STRIDE_X);
|
||||
|
|
@ -277,7 +277,7 @@ emit_kernel(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation
|
|||
static void
|
||||
emit_weights(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
EMIT0(NPU_SET_WEIGHT_FORMAT, 0x0);
|
||||
|
||||
EMIT0(NPU_SET_WEIGHT_REGION, operation->conv.weights.region);
|
||||
|
|
@ -378,22 +378,22 @@ emit_acc_format(struct ethosu_subgraph *subgraph, struct ethosu_operation *opera
|
|||
static void
|
||||
emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, enum ethosu_op_to_scale op_to_scale)
|
||||
{
|
||||
if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION);
|
||||
emit_ifm(subgraph, &operation->ifm);
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_ifm_precision(subgraph, &operation->ifm, op_to_scale, NPU_SET_IFM_PRECISION);
|
||||
EMIT0(NPU_SET_IFM_UPSCALE, operation->upscale);
|
||||
|
||||
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||
emit_padding(subgraph, operation);
|
||||
|
||||
if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_ofm_precision(subgraph, operation);
|
||||
|
||||
emit_ofm(subgraph, &operation->ofm);
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_ofm_precision(subgraph, operation);
|
||||
|
||||
if (operation->type != ETHOSU_OPERATION_TYPE_ELTWISE)
|
||||
|
|
@ -410,7 +410,7 @@ emit_common(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation
|
|||
static void
|
||||
emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_SHIFT(operation->conv.shift), operation->conv.scale);
|
||||
|
||||
operation->ifm.tiles.addresses[0] = ethosu_allocate_feature_map(subgraph, operation->ifm.tensor_idx);
|
||||
|
|
@ -426,7 +426,7 @@ emit_convolution(struct ethosu_subgraph *subgraph, struct ethosu_operation *oper
|
|||
emit_common(subgraph, operation, false);
|
||||
|
||||
emit_block_config(subgraph, operation);
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_shram_registers(subgraph, operation);
|
||||
else
|
||||
emit_acc_format(subgraph, operation);
|
||||
|
|
@ -500,7 +500,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
|
||||
switch (operation->pooling.type) {
|
||||
case ETHOSU_POOLING_TYPE_MAX: {
|
||||
if (!ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) {
|
||||
if (!ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
EMIT1(NPU_SET_OFM_SCALE, NPU_SET_OFM_SCALE_ROUND_MODE(1), 1);
|
||||
break;
|
||||
} else
|
||||
|
|
@ -533,7 +533,7 @@ emit_pooling(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
}
|
||||
|
||||
emit_block_config(subgraph, operation);
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_shram_registers(subgraph, operation);
|
||||
else
|
||||
emit_acc_format(subgraph, operation);
|
||||
|
|
@ -564,7 +564,7 @@ static void
|
|||
emit_ifm2(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, bool has_scalar)
|
||||
{
|
||||
if (has_scalar) {
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
EMIT0(NPU_SET_IFM2_SCALAR, operation->ifm2.scalar);
|
||||
else {
|
||||
emit_ifm2_precision(subgraph, operation, true);
|
||||
|
|
@ -612,7 +612,7 @@ emit_ifm2_broadcast(struct ethosu_subgraph *subgraph, struct ethosu_operation *o
|
|||
{
|
||||
unsigned ifm2_broadcast = 0;
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) {
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
ifm2_broadcast |= NPU_SET_IFM2_BROADCAST_OPERAND_ORDER(operation->eltwise.ifm_reversed);
|
||||
|
||||
if (has_scalar) {
|
||||
|
|
@ -787,7 +787,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
bool has_scalar = operation->ifm2.scalar != 0;
|
||||
enum ethosu_op_to_scale op_to_scale;
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) {
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
op_to_scale = eltwise_emit_ofm_scaling(
|
||||
subgraph,
|
||||
operation->ifm.scale,
|
||||
|
|
@ -812,7 +812,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
|
||||
emit_ifm2(subgraph, operation, has_scalar);
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_ifm_precision(subgraph, &operation->ifm2, OP_NONE, NPU_SET_IFM2_PRECISION);
|
||||
else
|
||||
emit_ifm2_precision(subgraph, operation, has_scalar);
|
||||
|
|
@ -820,7 +820,7 @@ emit_eltwise(struct ethosu_subgraph *subgraph, struct ethosu_operation *operatio
|
|||
emit_ifm2_broadcast(subgraph, operation, has_scalar);
|
||||
|
||||
emit_block_config(subgraph, operation);
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
emit_shram_registers(subgraph, operation);
|
||||
else
|
||||
emit_acc_format(subgraph, operation);
|
||||
|
|
@ -1090,7 +1090,7 @@ get_jobs(const struct ethosu_block *area,
|
|||
static unsigned
|
||||
calc_blockdep(struct ethosu_subgraph *subgraph, struct ethosu_operation *prev_op, struct ethosu_operation *operation)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
|
||||
if (!prev_op)
|
||||
return 0;
|
||||
|
|
@ -1187,7 +1187,7 @@ ethosu_emit_cmdstream(struct ethosu_subgraph *subgraph)
|
|||
|
||||
/* Compile */
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
EMIT0(NPU_SET_PARALLEL_MODE, 0x0);
|
||||
|
||||
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
|
|||
/* U65 packs 10-byte bias/scale entries contiguously then aligns to 16.
|
||||
* U85 scales are read in groups of 16 channels, so pad depth to a
|
||||
* 16-channel boundary first, then multiply by 10 bytes per entry. */
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
*scales_size = align(operation->ofm.shape.depth * 10, 16);
|
||||
else
|
||||
*scales_size = align(operation->ofm.shape.depth, 16) * 10;
|
||||
|
|
@ -87,7 +87,7 @@ fill_scale_and_biases(struct ethosu_subgraph *subgraph, struct ethosu_operation
|
|||
uint32_t shift;
|
||||
int scale = ethosu_quantize_scale(conv_scale, &shift);
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen)))
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device)))
|
||||
encode_bias_scale_u65(
|
||||
biases[i], scale, shift, &(*scales)[idx]);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -5,10 +5,12 @@
|
|||
|
||||
#include "ethosu_device.h"
|
||||
#include "ethosu_ml.h"
|
||||
#include "ethosu_public.h"
|
||||
|
||||
#include "drm-uapi/ethosu_accel.h"
|
||||
|
||||
#include <xf86drm.h>
|
||||
#include <string.h>
|
||||
#include "util/os_mman.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_surface.h"
|
||||
|
|
@ -113,11 +115,8 @@ ethosu_create_context(struct pipe_screen *screen,
|
|||
pctx->buffer_subdata = u_default_buffer_subdata;
|
||||
pctx->clear_buffer = u_default_clear_buffer;
|
||||
|
||||
pctx->ml_operation_supported = ethosu_ml_operation_supported;
|
||||
pctx->ml_subgraph_create = ethosu_ml_subgraph_create;
|
||||
pctx->ml_subgraph_invoke = ethosu_ml_subgraph_invoke;
|
||||
pctx->ml_subgraph_read_output = ethosu_ml_subgraph_read_outputs;
|
||||
pctx->ml_subgraph_destroy = ethosu_ml_subgraph_destroy;
|
||||
|
||||
return pctx;
|
||||
}
|
||||
|
|
@ -217,6 +216,23 @@ dev_query(struct ethosu_screen *screen)
|
|||
assert(ret != -1);
|
||||
}
|
||||
|
||||
static struct pipe_ml_device *
|
||||
ethosu_ml_device_create_accel(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(pscreen);
|
||||
|
||||
return &screen->ml_device.base;
|
||||
}
|
||||
|
||||
static void
|
||||
set_device_callbacks(struct ethosu_ml_device *device)
|
||||
{
|
||||
device->base.ml_operation_supported = ethosu_ml_operation_supported;
|
||||
device->base.ml_subgraph_create = ethosu_ml_subgraph_create;
|
||||
device->base.ml_subgraph_serialize = ethosu_ml_subgraph_serialize;
|
||||
device->base.ml_subgraph_destroy = ethosu_ml_subgraph_destroy;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
ethosu_screen_create(int fd,
|
||||
const struct pipe_screen_config *config,
|
||||
|
|
@ -266,6 +282,26 @@ ethosu_screen_create(int fd,
|
|||
screen->context_create = ethosu_create_context;
|
||||
screen->resource_create = ethosu_resource_create;
|
||||
screen->resource_destroy = ethosu_resource_destroy;
|
||||
screen->get_ml_device = ethosu_ml_device_create_accel;
|
||||
|
||||
ethosu_screen->ml_device.base.id = "ethosu-65-256";
|
||||
set_device_callbacks(ðosu_screen->ml_device);
|
||||
|
||||
return screen;
|
||||
}
|
||||
|
||||
struct pipe_ml_device *
|
||||
ethosu_ml_device_create(const char *spec)
|
||||
{
|
||||
struct ethosu_ml_device *device = NULL;
|
||||
|
||||
if (strcmp(spec, "65-256") != 0)
|
||||
return NULL;
|
||||
|
||||
ethosu_debug = debug_get_option_ethosu_debug();
|
||||
|
||||
device = rzalloc(NULL, struct ethosu_ml_device);
|
||||
set_device_callbacks(device);
|
||||
|
||||
return &device->base;
|
||||
}
|
||||
|
|
@ -40,8 +40,13 @@ struct ethosu_block {
|
|||
unsigned depth;
|
||||
};
|
||||
|
||||
struct ethosu_ml_device {
|
||||
struct pipe_ml_device base;
|
||||
};
|
||||
|
||||
struct ethosu_screen {
|
||||
struct pipe_screen pscreen;
|
||||
struct ethosu_ml_device ml_device;
|
||||
|
||||
int fd;
|
||||
struct drm_ethosu_npu_info info;
|
||||
|
|
@ -65,6 +70,13 @@ ethosu_is_u65(struct ethosu_screen *e)
|
|||
return DRM_ETHOSU_ARCH_MAJOR(e->info.id) == 1;
|
||||
}
|
||||
|
||||
static inline struct ethosu_screen *
|
||||
ethosu_device_screen(struct pipe_ml_device *pdevice)
|
||||
{
|
||||
struct ethosu_ml_device *dev = (struct ethosu_ml_device *)pdevice;
|
||||
return container_of(dev, struct ethosu_screen, ml_device);
|
||||
}
|
||||
|
||||
struct ethosu_context {
|
||||
struct pipe_context base;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ ml_reorder_encode_weights(struct ethosu_subgraph *subgraph,
|
|||
uint8_t **weights,
|
||||
long *weights_size)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
int bit_depth = 8;
|
||||
bool is_sparse = false;
|
||||
EthosUTraversal traversal;
|
||||
|
|
|
|||
|
|
@ -201,7 +201,7 @@ ethosu_lower_concatenation(struct ethosu_subgraph *subgraph,
|
|||
{
|
||||
operation->type = ETHOSU_OPERATION_TYPE_POOLING;
|
||||
|
||||
if (ethosu_is_u65(ethosu_screen(subgraph->base.context->screen))) {
|
||||
if (ethosu_is_u65(ethosu_device_screen(subgraph->base.device))) {
|
||||
operation->pooling.type = ETHOSU_POOLING_TYPE_AVG;
|
||||
operation->round_mode = ETHOSU_ROUNDING_NATURAL;
|
||||
} else
|
||||
|
|
@ -432,7 +432,7 @@ ethosu_lower_graph(struct ethosu_subgraph *subgraph,
|
|||
}
|
||||
|
||||
if (operation.conv.scales.size + operation.conv.weights.size <=
|
||||
ethosu_screen(subgraph->base.context->screen)->info.sram_size) {
|
||||
ethosu_device_screen(subgraph->base.device)->info.sram_size) {
|
||||
struct ethosu_operation dma_operation = {0};
|
||||
ethosu_lower_dma(subgraph, &poperations[i], &operation, &dma_operation);
|
||||
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ ethosu_quantize_scale(double scale, uint32_t *shift)
|
|||
}
|
||||
|
||||
bool
|
||||
ethosu_ml_operation_supported(struct pipe_context *pcontext,
|
||||
ethosu_ml_operation_supported(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *operation)
|
||||
{
|
||||
bool supported = false;
|
||||
|
|
@ -183,16 +183,14 @@ ethosu_ml_operation_supported(struct pipe_context *pcontext,
|
|||
}
|
||||
|
||||
struct pipe_ml_subgraph *
|
||||
ethosu_ml_subgraph_create(struct pipe_context *pcontext,
|
||||
ethosu_ml_subgraph_create(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *poperations,
|
||||
unsigned count)
|
||||
{
|
||||
struct pipe_screen *pscreen = pcontext->screen;
|
||||
struct ethosu_screen *screen = ethosu_screen(pscreen);
|
||||
struct ethosu_subgraph *subgraph;
|
||||
|
||||
subgraph = calloc(1, sizeof(*subgraph));
|
||||
subgraph->base.context = pcontext;
|
||||
subgraph->base.device = pdevice;
|
||||
|
||||
subgraph->tensors = UTIL_DYNARRAY_INIT;
|
||||
subgraph->operations = UTIL_DYNARRAY_INIT;
|
||||
|
|
@ -216,42 +214,120 @@ ethosu_ml_subgraph_create(struct pipe_context *pcontext,
|
|||
|
||||
ethosu_emit_cmdstream(subgraph);
|
||||
|
||||
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
|
||||
free(operation->kernel.scales);
|
||||
free(operation->kernel.zero_points);
|
||||
}
|
||||
util_dynarray_fini(&subgraph->operations);
|
||||
|
||||
free(subgraph->cmd0_state);
|
||||
free(subgraph->cmd1_state);
|
||||
free(subgraph->cmd0_valid);
|
||||
free(subgraph->cmd1_valid);
|
||||
|
||||
return &subgraph->base;
|
||||
}
|
||||
|
||||
uint8_t *
|
||||
ethosu_ml_subgraph_serialize(struct pipe_ml_device *pdevice,
|
||||
struct pipe_ml_subgraph *psubgraph,
|
||||
size_t *size)
|
||||
{
|
||||
struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph);
|
||||
uint64_t header_size = NUM_HEADER_FIELDS * sizeof(uint64_t);
|
||||
uint64_t tensors_size = util_dynarray_num_elements(&subgraph->tensors,
|
||||
struct ethosu_tensor) * NUM_TENSOR_FIELDS * sizeof(uint32_t);
|
||||
uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) *
|
||||
sizeof(*subgraph->cursor);
|
||||
uint64_t coefs_size = subgraph->coefs_used * sizeof(*subgraph->coefs);
|
||||
uint64_t io_size = subgraph->io_used;
|
||||
uint64_t total_size = header_size + cmdstream_size + coefs_size +
|
||||
tensors_size;
|
||||
uint8_t *buffer, *cursor;
|
||||
|
||||
buffer = malloc(total_size);
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
|
||||
cursor = buffer;
|
||||
|
||||
uint64_t *header = (uint64_t *)cursor;
|
||||
header[0] = cmdstream_size;
|
||||
header[1] = coefs_size;
|
||||
header[2] = io_size;
|
||||
header[3] = tensors_size;
|
||||
cursor += header_size;
|
||||
|
||||
uint32_t *tensors = (uint32_t *)cursor;
|
||||
util_dynarray_foreach(&subgraph->tensors, struct ethosu_tensor, tensor) {
|
||||
tensors[0] = tensor->index;
|
||||
tensors[1] = tensor->offset;
|
||||
tensors[2] = tensor->size;
|
||||
tensors += NUM_TENSOR_FIELDS;
|
||||
}
|
||||
cursor += tensors_size;
|
||||
|
||||
memcpy(cursor, subgraph->cmdstream, cmdstream_size);
|
||||
cursor += cmdstream_size;
|
||||
|
||||
if (coefs_size > 0)
|
||||
memcpy(cursor, subgraph->coefs, coefs_size);
|
||||
|
||||
*size = total_size;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_for_submission(struct ethosu_subgraph *subgraph,
|
||||
struct pipe_context *pcontext)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
|
||||
uint64_t cmdstream_size = (subgraph->cursor - subgraph->cmdstream) *
|
||||
sizeof(*subgraph->cursor);
|
||||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS))
|
||||
ethosu_dump_buffer((uint8_t *)subgraph->cmdstream, "cmdstream", 0, 0, 0,
|
||||
cmdstream_size);
|
||||
|
||||
struct drm_ethosu_cmdstream_bo_create cmd_bo_create = {
|
||||
.size = (subgraph->cursor - subgraph->cmdstream) * sizeof(*subgraph->cursor),
|
||||
.size = cmdstream_size,
|
||||
.data = (uintptr_t)subgraph->cmdstream,
|
||||
};
|
||||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS))
|
||||
ethosu_dump_buffer((uint8_t *)subgraph->cmdstream, "cmdstream", 0, 0, 0, (subgraph->cursor - subgraph->cmdstream) * sizeof(*subgraph->cursor));
|
||||
|
||||
int ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE, &cmd_bo_create);
|
||||
int ret = drmIoctl(screen->fd, DRM_IOCTL_ETHOSU_CMDSTREAM_BO_CREATE,
|
||||
&cmd_bo_create);
|
||||
assert(ret == 0);
|
||||
|
||||
free(subgraph->cmdstream);
|
||||
subgraph->cmdstream = NULL;
|
||||
|
||||
subgraph->cmdstream_bo = cmd_bo_create.handle;
|
||||
|
||||
DBG("subgraph->coefs_used %d\n", subgraph->coefs_used);
|
||||
if (subgraph->coefs_used > 0) {
|
||||
subgraph->coefs_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->coefs_used);
|
||||
assert(subgraph->coefs_rsrc != NULL);
|
||||
pipe_buffer_write(subgraph->base.context, subgraph->coefs_rsrc, 0, subgraph->coefs_used, subgraph->coefs);
|
||||
subgraph->coefs_rsrc = pipe_buffer_create(pcontext->screen, 0,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
subgraph->coefs_used);
|
||||
pipe_buffer_write(pcontext, subgraph->coefs_rsrc, 0,
|
||||
subgraph->coefs_used, subgraph->coefs);
|
||||
|
||||
free(subgraph->coefs);
|
||||
subgraph->coefs = NULL;
|
||||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) {
|
||||
struct pipe_transfer *transfer_in;
|
||||
uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->coefs_rsrc,
|
||||
uint8_t *buf = pipe_buffer_map(pcontext, subgraph->coefs_rsrc,
|
||||
PIPE_MAP_READ, &transfer_in);
|
||||
ethosu_dump_buffer(buf, "coefs", 0, 0, 0, pipe_buffer_size(subgraph->coefs_rsrc));
|
||||
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||
ethosu_dump_buffer(buf, "coefs", 0, 0, 0,
|
||||
pipe_buffer_size(subgraph->coefs_rsrc));
|
||||
pipe_buffer_unmap(pcontext, transfer_in);
|
||||
}
|
||||
}
|
||||
|
||||
subgraph->io_rsrc = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, subgraph->io_used);
|
||||
assert(subgraph->io_rsrc != NULL);
|
||||
|
||||
return &subgraph->base;
|
||||
DBG("subgraph->io_used %d\n", subgraph->io_used);
|
||||
subgraph->io_rsrc = pipe_buffer_create(pcontext->screen, 0,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
subgraph->io_used);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -267,6 +343,9 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
|||
struct timespec start, end;
|
||||
int ret;
|
||||
|
||||
if (subgraph->io_rsrc == NULL)
|
||||
prepare_for_submission(subgraph, pcontext);
|
||||
|
||||
for (unsigned i = 0; i < inputs_count; i++) {
|
||||
struct ethosu_tensor *input = ethosu_find_tensor(subgraph, input_idxs[i]);
|
||||
assert(input);
|
||||
|
|
@ -279,10 +358,10 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
|||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) {
|
||||
struct pipe_transfer *transfer_in;
|
||||
uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc,
|
||||
uint8_t *buf = pipe_buffer_map(pcontext, subgraph->io_rsrc,
|
||||
PIPE_MAP_READ, &transfer_in);
|
||||
ethosu_dump_buffer(buf, "io-before", 0, 0, 0, pipe_buffer_size(subgraph->io_rsrc));
|
||||
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||
pipe_buffer_unmap(pcontext, transfer_in);
|
||||
}
|
||||
|
||||
job.cmd_bo = subgraph->cmdstream_bo;
|
||||
|
|
@ -313,8 +392,8 @@ ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
|||
|
||||
/* Force a sync */
|
||||
struct pipe_transfer *transfer_in;
|
||||
pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc, PIPE_MAP_READ, &transfer_in);
|
||||
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||
pipe_buffer_map(pcontext, subgraph->io_rsrc, PIPE_MAP_READ, &transfer_in);
|
||||
pipe_buffer_unmap(pcontext, transfer_in);
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &end);
|
||||
duration_ns = (long long)(end.tv_sec - start.tv_sec) * 1000000000LL + (end.tv_nsec - start.tv_nsec);
|
||||
|
|
@ -337,10 +416,10 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
|||
|
||||
if (DBG_ENABLED(ETHOSU_DBG_DUMP_BOS)) {
|
||||
struct pipe_transfer *transfer_in;
|
||||
uint8_t *buf = pipe_buffer_map(subgraph->base.context, subgraph->io_rsrc,
|
||||
uint8_t *buf = pipe_buffer_map(pcontext, subgraph->io_rsrc,
|
||||
PIPE_MAP_READ, &transfer_in);
|
||||
ethosu_dump_buffer(buf, "io-after", 0, 0, 0, pipe_buffer_size(subgraph->io_rsrc));
|
||||
pipe_buffer_unmap(subgraph->base.context, transfer_in);
|
||||
pipe_buffer_unmap(pcontext, transfer_in);
|
||||
}
|
||||
|
||||
pipe_buffer_read(pcontext, subgraph->io_rsrc, output->offset, output->size, outputs[i]);
|
||||
|
|
@ -348,33 +427,30 @@ ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
|||
}
|
||||
|
||||
void
|
||||
ethosu_ml_subgraph_destroy(struct pipe_context *pcontext,
|
||||
ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
|
||||
struct pipe_ml_subgraph *psubgraph)
|
||||
{
|
||||
int ret;
|
||||
struct drm_gem_close arg = {0};
|
||||
struct ethosu_screen *screen = ethosu_screen(pcontext->screen);
|
||||
struct ethosu_subgraph *subgraph = (struct ethosu_subgraph *)(psubgraph);
|
||||
|
||||
pipe_resource_reference(&subgraph->io_rsrc, NULL);
|
||||
pipe_resource_reference(&subgraph->coefs_rsrc, NULL);
|
||||
if (subgraph->io_rsrc) {
|
||||
/* Post-submission state: cleanup DRM resources */
|
||||
struct ethosu_screen *screen = ethosu_device_screen(pdevice);
|
||||
struct drm_gem_close arg = {0};
|
||||
int ret;
|
||||
|
||||
arg.handle = subgraph->cmdstream_bo;
|
||||
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
|
||||
assert(ret >= 0);
|
||||
pipe_resource_reference(&subgraph->io_rsrc, NULL);
|
||||
pipe_resource_reference(&subgraph->coefs_rsrc, NULL);
|
||||
|
||||
util_dynarray_foreach (&subgraph->operations, struct ethosu_operation, operation) {
|
||||
free(operation->kernel.scales);
|
||||
free(operation->kernel.zero_points);
|
||||
arg.handle = subgraph->cmdstream_bo;
|
||||
ret = drmIoctl(screen->fd, DRM_IOCTL_GEM_CLOSE, &arg);
|
||||
assert(ret >= 0);
|
||||
} else {
|
||||
/* Pre-submission state: cleanup raw buffers */
|
||||
free(subgraph->cmdstream);
|
||||
free(subgraph->coefs);
|
||||
}
|
||||
util_dynarray_fini(&subgraph->operations);
|
||||
|
||||
util_dynarray_fini(&subgraph->tensors);
|
||||
|
||||
free(subgraph->cmd0_state);
|
||||
free(subgraph->cmd1_state);
|
||||
free(subgraph->cmd0_valid);
|
||||
free(subgraph->cmd1_valid);
|
||||
|
||||
free(subgraph);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -195,11 +195,14 @@ struct ethosu_tensor {
|
|||
enum ethosu_layout layout;
|
||||
};
|
||||
|
||||
#define NUM_HEADER_FIELDS 4
|
||||
#define NUM_TENSOR_FIELDS 3
|
||||
|
||||
struct ethosu_subgraph {
|
||||
struct pipe_ml_subgraph base;
|
||||
|
||||
struct util_dynarray operations; /* ethosu_operation */
|
||||
struct util_dynarray tensors; /* ethosu_tensor* */
|
||||
struct util_dynarray tensors; /* ethosu_tensor */
|
||||
|
||||
unsigned cmdstream_used;
|
||||
uint32_t *cmdstream;
|
||||
|
|
@ -221,13 +224,19 @@ struct ethosu_subgraph {
|
|||
};
|
||||
|
||||
bool
|
||||
ethosu_ml_operation_supported(struct pipe_context *pcontext, const struct pipe_ml_operation *operation);
|
||||
ethosu_ml_operation_supported(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *operation);
|
||||
|
||||
struct pipe_ml_subgraph *
|
||||
ethosu_ml_subgraph_create(struct pipe_context *pcontext,
|
||||
ethosu_ml_subgraph_create(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *poperations,
|
||||
unsigned count);
|
||||
|
||||
uint8_t *
|
||||
ethosu_ml_subgraph_serialize(struct pipe_ml_device *pdevice,
|
||||
struct pipe_ml_subgraph *psubgraph,
|
||||
size_t *size);
|
||||
|
||||
void ethosu_ml_subgraph_invoke(struct pipe_context *pcontext,
|
||||
struct pipe_ml_subgraph *psubgraph,
|
||||
unsigned inputs_count, unsigned input_idxs[],
|
||||
|
|
@ -239,7 +248,7 @@ void ethosu_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
|||
unsigned output_idxs[], void *outputs[],
|
||||
bool is_signed[]);
|
||||
|
||||
void ethosu_ml_subgraph_destroy(struct pipe_context *context,
|
||||
void ethosu_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
|
||||
struct pipe_ml_subgraph *psubgraph);
|
||||
|
||||
unsigned ethosu_allocate_feature_map(struct ethosu_subgraph *subgraph, unsigned tensor_idx);
|
||||
|
|
|
|||
13
src/gallium/drivers/ethosu/ethosu_public.h
Normal file
13
src/gallium/drivers/ethosu/ethosu_public.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/*
|
||||
* Copyright (c) 2024 Tomeu Vizoso <tomeu@tomeuvizoso.net>
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef ETHOSU_PUBLIC_H
|
||||
#define ETHOSU_PUBLIC_H
|
||||
|
||||
struct pipe_ml_device;
|
||||
|
||||
struct pipe_ml_device *ethosu_ml_device_create(const char *spec);
|
||||
|
||||
#endif /* ETHOSU_PUBLIC_H */
|
||||
|
|
@ -15,7 +15,7 @@ required_input_size(int value, int stride, int border)
|
|||
static struct ethosu_block
|
||||
_get_ifm_blocksize(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation, struct ethosu_block ofm_block)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
struct ethosu_block ifm_block = {0};
|
||||
|
||||
// IFM block height
|
||||
|
|
@ -73,7 +73,7 @@ try_block_config(struct ethosu_operation *operation, struct ethosu_block ofm_blo
|
|||
static struct ethosu_block_config
|
||||
find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
struct ethosu_block_config config = {};
|
||||
struct ethosu_block search_space = ARCH_OFM_BLOCK_MAX;
|
||||
float ofm_elements = operation->ofm.shape.width * operation->ofm.shape.height * operation->ofm.shape.depth;
|
||||
|
|
@ -205,7 +205,7 @@ find_block_config(struct ethosu_subgraph *subgraph, struct ethosu_operation *ope
|
|||
void
|
||||
ethosu_sched_operation(struct ethosu_subgraph *subgraph, struct ethosu_operation *operation)
|
||||
{
|
||||
struct ethosu_screen *screen = ethosu_screen(subgraph->base.context->screen);
|
||||
struct ethosu_screen *screen = ethosu_device_screen(subgraph->base.device);
|
||||
|
||||
if (ethosu_is_u65(screen))
|
||||
operation->block_config = find_block_config(subgraph, operation);
|
||||
|
|
|
|||
|
|
@ -723,11 +723,8 @@ etna_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
|||
|
||||
pctx->destroy = etna_context_destroy;
|
||||
pctx->draw_vbo = etna_draw_vbo;
|
||||
pctx->ml_operation_supported = etna_ml_operation_supported;
|
||||
pctx->ml_subgraph_create = etna_ml_subgraph_create;
|
||||
pctx->ml_subgraph_invoke = etna_ml_subgraph_invoke;
|
||||
pctx->ml_subgraph_read_output = etna_ml_subgraph_read_outputs;
|
||||
pctx->ml_subgraph_destroy = etna_ml_subgraph_destroy;
|
||||
pctx->flush = etna_context_flush;
|
||||
pctx->set_debug_callback = etna_set_debug_callback;
|
||||
pctx->create_fence_fd = etna_create_fence_fd;
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include "etnaviv_ml_nn.h"
|
||||
#include "etnaviv_ml_tp.h"
|
||||
#include "etnaviv_ml.h"
|
||||
#include "etnaviv_screen.h"
|
||||
|
||||
struct etna_ml_tensor *
|
||||
etna_ml_get_tensor(struct etna_ml_subgraph *subgraph, unsigned idx)
|
||||
|
|
@ -68,7 +69,6 @@ etna_ml_allocate_tensor(struct etna_ml_subgraph *subgraph)
|
|||
void
|
||||
etna_ml_create_tensor(struct etna_ml_subgraph *subgraph, unsigned idx, unsigned size)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_ml_tensor *tensor = etna_ml_get_tensor(subgraph, idx);
|
||||
|
||||
assert(idx < util_dynarray_num_elements(&subgraph->tensors, struct etna_ml_tensor *));
|
||||
|
|
@ -81,7 +81,7 @@ etna_ml_create_tensor(struct etna_ml_subgraph *subgraph, unsigned idx, unsigned
|
|||
return;
|
||||
}
|
||||
|
||||
res = etna_ml_create_resource(context, size);
|
||||
res = etna_ml_create_resource(&subgraph->screen->base, size);
|
||||
tensor->resource = res;
|
||||
tensor->size = size;
|
||||
|
||||
|
|
@ -99,10 +99,9 @@ etna_ml_destroy_tensor(struct etna_ml_subgraph *subgraph, unsigned idx)
|
|||
}
|
||||
|
||||
struct etna_bo *
|
||||
etna_ml_create_bo(struct pipe_context *pctx, size_t size)
|
||||
etna_ml_create_bo(struct etna_screen *screen, size_t size)
|
||||
{
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
struct etna_bo *bo = etna_bo_new(ctx->screen->dev,
|
||||
struct etna_bo *bo = etna_bo_new(screen->dev,
|
||||
size,
|
||||
DRM_ETNA_GEM_CACHE_WC);
|
||||
|
||||
|
|
@ -115,9 +114,9 @@ etna_ml_create_bo(struct pipe_context *pctx, size_t size)
|
|||
}
|
||||
|
||||
struct pipe_resource *
|
||||
etna_ml_create_resource(struct pipe_context *pctx, size_t size)
|
||||
etna_ml_create_resource(struct pipe_screen *pscreen, size_t size)
|
||||
{
|
||||
struct pipe_resource *res = pipe_buffer_create(pctx->screen, 0, PIPE_USAGE_DEFAULT, size);
|
||||
struct pipe_resource *res = pipe_buffer_create(pscreen, 0, PIPE_USAGE_DEFAULT, size);
|
||||
void *ptr = etna_bo_map(etna_buffer_resource(res)->bo);
|
||||
memset(ptr, 0, pipe_buffer_size(res));
|
||||
|
||||
|
|
@ -125,9 +124,8 @@ etna_ml_create_resource(struct pipe_context *pctx, size_t size)
|
|||
}
|
||||
|
||||
struct etna_core_npu_info *
|
||||
etna_ml_get_core_info(struct etna_context *context)
|
||||
etna_ml_get_core_info(struct etna_screen *screen)
|
||||
{
|
||||
struct etna_screen *screen = context->screen;
|
||||
struct etna_core_info *info = etna_gpu_get_core_info(screen->npu);
|
||||
return &info->npu;
|
||||
}
|
||||
|
|
@ -135,9 +133,7 @@ etna_ml_get_core_info(struct etna_context *context)
|
|||
static bool
|
||||
needs_reshuffle(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *poperation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_version = ctx->screen->specs.nn_core_version;
|
||||
unsigned nn_core_version = subgraph->screen->specs.nn_core_version;
|
||||
bool has_stride = poperation->conv.stride_x > 1 || poperation->conv.stride_y > 1;
|
||||
bool pointwise = poperation->conv.pointwise;
|
||||
bool has_padding = poperation->conv.padding_top > 0 ||
|
||||
|
|
@ -651,7 +647,7 @@ tensor_quantization_supported(struct pipe_tensor *tensor)
|
|||
}
|
||||
|
||||
bool
|
||||
etna_ml_operation_supported(struct pipe_context *pcontext,
|
||||
etna_ml_operation_supported(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *operation)
|
||||
{
|
||||
bool supported = false;
|
||||
|
|
@ -743,12 +739,12 @@ etna_ml_operation_supported(struct pipe_context *pcontext,
|
|||
}
|
||||
|
||||
struct pipe_ml_subgraph *
|
||||
etna_ml_subgraph_create(struct pipe_context *pcontext,
|
||||
etna_ml_subgraph_create(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *poperations,
|
||||
unsigned count)
|
||||
{
|
||||
struct etna_context *ctx = etna_context(pcontext);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
struct etna_screen *screen = etna_ml_device_screen(pdevice);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
struct etna_ml_subgraph *subgraph;
|
||||
struct list_head operations;
|
||||
unsigned tensor_count;
|
||||
|
|
@ -763,7 +759,8 @@ etna_ml_subgraph_create(struct pipe_context *pcontext,
|
|||
|
||||
list_inithead(&operations);
|
||||
|
||||
subgraph->base.context = pcontext;
|
||||
subgraph->base.device = pdevice;
|
||||
subgraph->screen = screen;
|
||||
subgraph->operations = UTIL_DYNARRAY_INIT;
|
||||
|
||||
subgraph->tensors = UTIL_DYNARRAY_INIT;
|
||||
|
|
@ -880,7 +877,7 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub
|
|||
bool is_signed[])
|
||||
{
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
unsigned tp_core_count = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
unsigned tp_core_count = etna_ml_get_core_info(ctx->screen)->tp_core_count;
|
||||
struct etna_ml_subgraph *subgraph = (struct etna_ml_subgraph *)(psubgraph);
|
||||
struct etna_cmd_stream *stream = ctx->stream;
|
||||
static bool is_initialized = false;
|
||||
|
|
@ -968,10 +965,10 @@ etna_ml_subgraph_invoke(struct pipe_context *pctx, struct pipe_ml_subgraph *psub
|
|||
|
||||
switch (operation->type) {
|
||||
case ETNA_JOB_TYPE_TP:
|
||||
etna_ml_emit_operation_tp(subgraph, operation, i);
|
||||
etna_ml_emit_operation_tp(pctx, subgraph, operation, i);
|
||||
break;
|
||||
case ETNA_JOB_TYPE_NN:
|
||||
etna_ml_emit_operation_nn(subgraph, operation, i);
|
||||
etna_ml_emit_operation_nn(pctx, subgraph, operation, i);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE("Unsupported ML operation type");
|
||||
|
|
@ -1063,7 +1060,7 @@ etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgr
|
|||
}
|
||||
|
||||
void
|
||||
etna_ml_subgraph_destroy(struct pipe_context *context, struct pipe_ml_subgraph *psubgraph)
|
||||
etna_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *psubgraph)
|
||||
{
|
||||
struct etna_ml_subgraph *subgraph = (struct etna_ml_subgraph *)(psubgraph);
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,16 @@
|
|||
|
||||
#define MAX_CONFIG_BOS 4
|
||||
|
||||
struct etna_ml_device {
|
||||
struct pipe_ml_device base;
|
||||
};
|
||||
|
||||
static inline struct etna_ml_device *
|
||||
etna_ml_device(struct pipe_ml_device *dev)
|
||||
{
|
||||
return (struct etna_ml_device *)dev;
|
||||
}
|
||||
|
||||
enum etna_job_type {
|
||||
ETNA_JOB_TYPE_NN,
|
||||
ETNA_JOB_TYPE_TP,
|
||||
|
|
@ -47,6 +57,7 @@ struct etna_ml_tensor {
|
|||
|
||||
struct etna_ml_subgraph {
|
||||
struct pipe_ml_subgraph base;
|
||||
struct etna_screen *screen;
|
||||
|
||||
struct util_dynarray operations;
|
||||
|
||||
|
|
@ -142,18 +153,18 @@ struct pipe_resource *etna_ml_get_resource(struct etna_ml_subgraph *subgraph, un
|
|||
unsigned etna_ml_get_offset(struct etna_ml_subgraph *subgraph, unsigned idx);
|
||||
unsigned etna_ml_get_size(struct etna_ml_subgraph *subgraph, unsigned idx);
|
||||
|
||||
struct etna_bo *etna_ml_create_bo(struct pipe_context *pctx, size_t size);
|
||||
struct etna_bo *etna_ml_create_bo(struct etna_screen *screen, size_t size);
|
||||
|
||||
struct pipe_resource *etna_ml_create_resource(struct pipe_context *pctx, size_t size);
|
||||
struct pipe_resource *etna_ml_create_resource(struct pipe_screen *pscreen, size_t size);
|
||||
|
||||
struct etna_core_npu_info *etna_ml_get_core_info(struct etna_context *context);
|
||||
struct etna_core_npu_info *etna_ml_get_core_info(struct etna_screen *screen);
|
||||
|
||||
bool
|
||||
etna_ml_operation_supported(struct pipe_context *pcontext,
|
||||
etna_ml_operation_supported(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *operation);
|
||||
|
||||
struct pipe_ml_subgraph *
|
||||
etna_ml_subgraph_create(struct pipe_context *context,
|
||||
etna_ml_subgraph_create(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *operations,
|
||||
unsigned count);
|
||||
|
||||
|
|
@ -167,6 +178,6 @@ etna_ml_subgraph_read_outputs(struct pipe_context *context, struct pipe_ml_subgr
|
|||
bool is_signed[]);
|
||||
|
||||
void
|
||||
etna_ml_subgraph_destroy(struct pipe_context *context, struct pipe_ml_subgraph *subgraph);
|
||||
etna_ml_subgraph_destroy(struct pipe_ml_device *pdevice, struct pipe_ml_subgraph *subgraph);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -376,8 +376,7 @@ reorder_for_hw_depthwise(struct etna_ml_subgraph *subgraph, struct etna_operatio
|
|||
static void
|
||||
transpose(struct etna_ml_subgraph *subgraph, struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
unsigned nn_core_version = etna_context(context)->screen->specs.nn_core_version;
|
||||
unsigned nn_core_version = subgraph->screen->specs.nn_core_version;
|
||||
void *map = operation->weight_tensor;
|
||||
unsigned new_size;
|
||||
uint8_t *output;
|
||||
|
|
@ -503,8 +502,7 @@ static bool
|
|||
calc_pooling_first_pixel(struct etna_ml_subgraph *subgraph,
|
||||
const struct pipe_ml_operation *poperation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
unsigned nn_core_version = etna_context(context)->screen->specs.nn_core_version;
|
||||
unsigned nn_core_version = subgraph->screen->specs.nn_core_version;
|
||||
unsigned input_width = poperation->input_tensors[0]->dims[1];
|
||||
unsigned input_channels = poperation->input_tensors[0]->dims[3];
|
||||
|
||||
|
|
@ -554,9 +552,7 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph,
|
|||
const struct pipe_ml_operation *poperation,
|
||||
struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_version = ctx->screen->specs.nn_core_version;
|
||||
unsigned nn_core_version = subgraph->screen->specs.nn_core_version;
|
||||
|
||||
/* TODO: Support stride_x != stride_y */
|
||||
assert(poperation->conv.stride_x == poperation->conv.stride_y);
|
||||
|
|
@ -886,9 +882,7 @@ etna_ml_lower_add(struct etna_ml_subgraph *subgraph,
|
|||
const struct pipe_ml_operation *poperation,
|
||||
struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_version = ctx->screen->specs.nn_core_version;
|
||||
unsigned nn_core_version = subgraph->screen->specs.nn_core_version;
|
||||
|
||||
if (nn_core_version < 8)
|
||||
etna_ml_lower_add_v7(subgraph, poperation, operation);
|
||||
|
|
@ -988,24 +982,23 @@ etna_ml_calc_addition_sizes(unsigned *input_width, unsigned *input_height, unsig
|
|||
}
|
||||
|
||||
static unsigned
|
||||
etna_ml_calculate_tiling(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out)
|
||||
etna_ml_calculate_tiling(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out)
|
||||
{
|
||||
unsigned nn_core_version = ctx->screen->specs.nn_core_version;
|
||||
unsigned nn_core_version = screen->specs.nn_core_version;
|
||||
if (nn_core_version == 7)
|
||||
return etna_ml_calculate_tiling_v7(ctx, operation, tile_width_out, tile_height_out);
|
||||
return etna_ml_calculate_tiling_v7(screen, operation, tile_width_out, tile_height_out);
|
||||
else
|
||||
return etna_ml_calculate_tiling_v8(ctx, operation, tile_width_out, tile_height_out);
|
||||
return etna_ml_calculate_tiling_v8(screen, operation, tile_width_out, tile_height_out);
|
||||
}
|
||||
|
||||
static struct etna_bo *
|
||||
create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct etna_bo *coefficients, unsigned coef_cache_size)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
unsigned nn_core_version = ctx->screen->specs.nn_core_version;
|
||||
unsigned oc_sram_size = etna_ml_get_core_info(ctx)->on_chip_sram_size;
|
||||
struct etna_bo *bo = etna_ml_create_bo(context, sizeof(struct etna_nn_params));
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned nn_core_version = screen->specs.nn_core_version;
|
||||
unsigned oc_sram_size = etna_ml_get_core_info(screen)->on_chip_sram_size;
|
||||
struct etna_bo *bo = etna_ml_create_bo(screen, sizeof(struct etna_nn_params));
|
||||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
|
|
@ -1162,7 +1155,7 @@ create_nn_config(struct etna_ml_subgraph *subgraph, const struct etna_operation
|
|||
}
|
||||
|
||||
unsigned tile_x, tile_y;
|
||||
unsigned superblocks = etna_ml_calculate_tiling(ctx, operation, &tile_x, &tile_y);
|
||||
unsigned superblocks = etna_ml_calculate_tiling(screen, operation, &tile_x, &tile_y);
|
||||
map->out_image_tile_x_size = tile_x;
|
||||
map->out_image_tile_y_size = tile_y;
|
||||
|
||||
|
|
@ -1294,9 +1287,7 @@ void
|
|||
etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,
|
||||
struct etna_vip_instruction *instruction)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
unsigned nn_core_version = ctx->screen->specs.nn_core_version;
|
||||
unsigned nn_core_version = subgraph->screen->specs.nn_core_version;
|
||||
unsigned coef_cache_size;
|
||||
|
||||
instruction->type = ETNA_JOB_TYPE_NN;
|
||||
|
|
@ -1320,11 +1311,11 @@ etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph, const struct etn
|
|||
}
|
||||
|
||||
void
|
||||
etna_ml_emit_operation_nn(struct etna_ml_subgraph *subgraph,
|
||||
etna_ml_emit_operation_nn(struct pipe_context *pctx,
|
||||
struct etna_ml_subgraph *subgraph,
|
||||
struct etna_vip_instruction *operation,
|
||||
unsigned idx)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
struct etna_cmd_stream *stream = ctx->stream;
|
||||
unsigned offset = idx + 1;
|
||||
|
|
|
|||
|
|
@ -11,13 +11,13 @@ etna_ml_calc_addition_sizes(unsigned *input_width, unsigned *input_height, unsig
|
|||
unsigned *output_width, unsigned *output_height, unsigned *output_channels);
|
||||
|
||||
unsigned
|
||||
etna_ml_calculate_tiling_v7(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out);
|
||||
etna_ml_calculate_tiling_v7(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out);
|
||||
|
||||
struct etna_bo *
|
||||
etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned *cache_size);
|
||||
|
||||
unsigned
|
||||
etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out);
|
||||
etna_ml_calculate_tiling_v8(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out);
|
||||
|
||||
struct etna_bo *
|
||||
etna_ml_create_coeffs_v8(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned *cache_size);
|
||||
|
|
@ -43,6 +43,7 @@ etna_ml_compile_operation_nn(struct etna_ml_subgraph *subgraph,
|
|||
struct etna_vip_instruction *instruction);
|
||||
|
||||
void
|
||||
etna_ml_emit_operation_nn(struct etna_ml_subgraph *subgraph,
|
||||
etna_ml_emit_operation_nn(struct pipe_context *pctx,
|
||||
struct etna_ml_subgraph *subgraph,
|
||||
struct etna_vip_instruction *operation,
|
||||
unsigned idx);
|
||||
|
|
|
|||
|
|
@ -17,10 +17,10 @@ map_resource(struct pipe_resource *resource)
|
|||
#define MAX_TILE_WIDTH 64
|
||||
|
||||
static unsigned
|
||||
calc_superblocks(struct etna_context *ctx, const struct etna_operation *operation, unsigned tile_y, unsigned interleave_mode)
|
||||
calc_superblocks(struct etna_screen *screen, const struct etna_operation *operation, unsigned tile_y, unsigned interleave_mode)
|
||||
{
|
||||
unsigned nn_core_count = ctx->screen->info->npu.nn_core_count;
|
||||
unsigned nn_accum_buffer_depth = ctx->screen->info->npu.nn_accum_buffer_depth;
|
||||
unsigned nn_core_count = screen->info->npu.nn_core_count;
|
||||
unsigned nn_accum_buffer_depth = screen->info->npu.nn_accum_buffer_depth;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
unsigned kernels_per_core = DIV_ROUND_UP(output_channels, nn_core_count);
|
||||
unsigned foo = (nn_accum_buffer_depth * interleave_mode) / tile_y;
|
||||
|
|
@ -60,10 +60,10 @@ calc_interleave_mode(unsigned tile_width, unsigned weight_height)
|
|||
}
|
||||
|
||||
unsigned
|
||||
etna_ml_calculate_tiling_v7(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out)
|
||||
etna_ml_calculate_tiling_v7(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out)
|
||||
{
|
||||
unsigned nn_input_buffer_depth = ctx->screen->info->npu.nn_input_buffer_depth;
|
||||
unsigned nn_accum_buffer_depth = ctx->screen->info->npu.nn_accum_buffer_depth;
|
||||
unsigned nn_input_buffer_depth = screen->info->npu.nn_input_buffer_depth;
|
||||
unsigned nn_accum_buffer_depth = screen->info->npu.nn_accum_buffer_depth;
|
||||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
|
|
@ -95,7 +95,7 @@ etna_ml_calculate_tiling_v7(struct etna_context *ctx, const struct etna_operatio
|
|||
tile_height -= 1;
|
||||
|
||||
tile_height = MAX2(tile_height, 1);
|
||||
superblocks = calc_superblocks(ctx, operation, tile_height, interleave_mode);
|
||||
superblocks = calc_superblocks(screen, operation, tile_height, interleave_mode);
|
||||
|
||||
if (tile_width_out)
|
||||
*tile_width_out = tile_width;
|
||||
|
|
@ -185,8 +185,8 @@ wb_stream_write(struct wb_stream *wb_stream, unsigned value)
|
|||
static unsigned
|
||||
write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, const struct etna_operation *operation, unsigned zrl_bits)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(etna_context(pctx))->nn_core_count;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
|
|
@ -195,7 +195,7 @@ write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, co
|
|||
uint32_t *biases = (uint32_t *)operation->bias_tensor;
|
||||
unsigned out_values_per_channel = operation->output_width * operation->output_height;
|
||||
unsigned stride = MIN2(input_channels, 6);
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v7(etna_context(pctx), operation, NULL, NULL);
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v7(screen, operation, NULL, NULL);
|
||||
uint8_t *weights_maps[DIV_ROUND_UP(kernels_per_core, superblocks)];
|
||||
uint32_t *initial_ptr = map;
|
||||
bool do_write = initial_ptr != NULL;
|
||||
|
|
@ -265,8 +265,8 @@ write_core_6(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, co
|
|||
static unsigned
|
||||
write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, const struct etna_operation *operation, unsigned zrl_bits)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(etna_context(pctx))->nn_core_count;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
|
|
@ -274,7 +274,7 @@ write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigne
|
|||
uint8_t *input = operation->weight_tensor;
|
||||
uint32_t *biases = (uint32_t *)operation->bias_tensor;
|
||||
unsigned out_values_per_channel = operation->output_width * operation->output_height;
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v7(etna_context(pctx), operation, NULL, NULL);
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v7(screen, operation, NULL, NULL);
|
||||
uint8_t (*weights_map)[input_channels][operation->weight_width][operation->weight_height] = (void *)input;
|
||||
uint32_t *initial_ptr = map;
|
||||
bool do_write = initial_ptr != NULL;
|
||||
|
|
@ -352,15 +352,15 @@ write_core_interleaved(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigne
|
|||
static unsigned
|
||||
write_core_sequential(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned core, const struct etna_operation *operation, unsigned zrl_bits)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(etna_context(pctx))->nn_core_count;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
unsigned kernels_per_core = DIV_ROUND_UP(output_channels, cores_used);
|
||||
uint8_t *input = operation->weight_tensor;
|
||||
uint32_t *biases = (uint32_t *)operation->bias_tensor;
|
||||
unsigned out_values_per_channel = operation->output_width * operation->output_height;
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v7(etna_context(pctx), operation, NULL, NULL);
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v7(screen, operation, NULL, NULL);
|
||||
uint32_t *initial_ptr = map;
|
||||
bool do_write = initial_ptr != NULL;
|
||||
uint64_t buffer = 0;
|
||||
|
|
@ -438,9 +438,8 @@ write_core_sequential(struct etna_ml_subgraph *subgraph, uint32_t *map, unsigned
|
|||
static unsigned
|
||||
calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned header_size = align(nn_core_count * 4, 64);
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
|
|
@ -462,10 +461,9 @@ calculate_weight_bo_size(struct etna_ml_subgraph *subgraph, const struct etna_op
|
|||
static unsigned
|
||||
calculate_zrl_bits(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
unsigned max_zrl_bits = etna_ml_get_core_info(ctx)->nn_zrl_bits;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned max_zrl_bits = etna_ml_get_core_info(screen)->nn_zrl_bits;
|
||||
unsigned header_size = align(nn_core_count * 4, 64);
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
|
|
@ -515,9 +513,8 @@ calculate_zrl_bits(struct etna_ml_subgraph *subgraph, const struct etna_operatio
|
|||
struct etna_bo *
|
||||
etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, unsigned *cache_size)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned header_size = align(nn_core_count * 4, 64);
|
||||
unsigned input_channels = operation->addition ? 1 : operation->input_channels;
|
||||
unsigned output_channels = operation->addition ? 1 : operation->output_channels;
|
||||
|
|
@ -529,7 +526,7 @@ etna_ml_create_coeffs_v7(struct etna_ml_subgraph *subgraph, const struct etna_op
|
|||
bo_size = calculate_weight_bo_size(subgraph, operation);
|
||||
zrl_bits = calculate_zrl_bits(subgraph, operation);
|
||||
|
||||
struct etna_bo *compressed = etna_ml_create_bo(context, bo_size);
|
||||
struct etna_bo *compressed = etna_ml_create_bo(screen, bo_size);
|
||||
|
||||
etna_bo_cpu_prep(compressed, DRM_ETNA_PREP_WRITE);
|
||||
|
||||
|
|
|
|||
|
|
@ -39,10 +39,10 @@ struct etna_nn_header_v8 {
|
|||
#define MAX_TILE_WIDTH 64
|
||||
|
||||
static unsigned
|
||||
calc_superblocks(struct etna_context *ctx, const struct etna_operation *operation, unsigned tile_x, unsigned tile_y, unsigned interleave_mode)
|
||||
calc_superblocks(struct etna_screen *screen, const struct etna_operation *operation, unsigned tile_x, unsigned tile_y, unsigned interleave_mode)
|
||||
{
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
struct etna_core_info *info = etna_gpu_get_core_info(ctx->screen->npu);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
struct etna_core_info *info = etna_gpu_get_core_info(screen->npu);
|
||||
unsigned nn_accum_buffer_depth = info->npu.nn_accum_buffer_depth;
|
||||
unsigned output_channels = operation->output_channels;
|
||||
unsigned kernels_per_core = DIV_ROUND_UP(output_channels, nn_core_count);
|
||||
|
|
@ -67,7 +67,7 @@ calc_superblocks(struct etna_context *ctx, const struct etna_operation *operatio
|
|||
}
|
||||
|
||||
static unsigned
|
||||
calc_interleave_mode(struct etna_context *ctx, unsigned tile_width, unsigned weight_height)
|
||||
calc_interleave_mode(unsigned tile_width, unsigned weight_height)
|
||||
{
|
||||
unsigned mode;
|
||||
|
||||
|
|
@ -103,10 +103,10 @@ calc_interleave_mode(struct etna_context *ctx, unsigned tile_width, unsigned wei
|
|||
}
|
||||
|
||||
unsigned
|
||||
etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out)
|
||||
etna_ml_calculate_tiling_v8(struct etna_screen *screen, const struct etna_operation *operation, unsigned *tile_width_out, unsigned *tile_height_out)
|
||||
{
|
||||
unsigned nn_input_buffer_depth = etna_ml_get_core_info(ctx)->nn_input_buffer_depth;
|
||||
unsigned nn_accum_buffer_depth = etna_ml_get_core_info(ctx)->nn_accum_buffer_depth;
|
||||
unsigned nn_input_buffer_depth = etna_ml_get_core_info(screen)->nn_input_buffer_depth;
|
||||
unsigned nn_accum_buffer_depth = etna_ml_get_core_info(screen)->nn_accum_buffer_depth;
|
||||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
|
|
@ -128,7 +128,7 @@ etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operatio
|
|||
}
|
||||
|
||||
tile_width = MIN2(output_width, 64);
|
||||
interleave_mode = calc_interleave_mode(ctx, tile_width, operation->weight_height);
|
||||
interleave_mode = calc_interleave_mode(tile_width, operation->weight_height);
|
||||
|
||||
tile_height = nn_input_buffer_depth * interleave_mode - operation->weight_height + 1;
|
||||
tile_height = MIN2(tile_height, interleave_mode * nn_accum_buffer_depth);
|
||||
|
|
@ -143,7 +143,7 @@ etna_ml_calculate_tiling_v8(struct etna_context *ctx, const struct etna_operatio
|
|||
|
||||
tile_height = MAX2(tile_height, 1);
|
||||
|
||||
superblocks = calc_superblocks(ctx, operation, tile_width, tile_height, interleave_mode);
|
||||
superblocks = calc_superblocks(screen, operation, tile_width, tile_height, interleave_mode);
|
||||
|
||||
if (tile_width_out)
|
||||
*tile_width_out = tile_width;
|
||||
|
|
@ -437,9 +437,7 @@ static void encode_byte(struct encoder *encoder, uint8_t byte)
|
|||
static void
|
||||
encode_value(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct encoder *encoder, uint8_t value)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned customer_id = ctx->screen->info->customer_id;
|
||||
unsigned customer_id = subgraph->screen->info->customer_id;
|
||||
uint8_t zero_point = operation->weight_zero_point;
|
||||
|
||||
value -= encoder->avg_bias;
|
||||
|
|
@ -600,9 +598,8 @@ static void encoder_reset(struct etna_ml_subgraph *subgraph, const struct etna_o
|
|||
|
||||
static void encode_superblock(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct encoder *encoder, unsigned kernels_in_superblock, unsigned first_channel)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
unsigned output_channels = operation->output_channels;
|
||||
unsigned kernel_size;
|
||||
|
|
@ -671,9 +668,8 @@ static uint32_t pack_symbol_map(uint8_t map[8])
|
|||
static struct etna_bo *
|
||||
create_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
unsigned output_channels = operation->output_channels;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
|
|
@ -689,7 +685,7 @@ create_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operat
|
|||
unsigned tail_size = 64;
|
||||
max_size = header_size + cores_used * body_size + tail_size;
|
||||
|
||||
return etna_ml_create_bo(context, max_size);
|
||||
return etna_ml_create_bo(screen, max_size);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -718,12 +714,11 @@ calculate_symbol_map(struct etna_ml_subgraph *subgraph, const struct etna_operat
|
|||
static void
|
||||
fill_weights(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, struct encoder *encoder, struct etna_nn_header_v8 *header)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
struct etna_screen *screen = subgraph->screen;
|
||||
unsigned output_channels = operation->output_channels;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(ctx)->nn_core_count;
|
||||
unsigned nn_core_count = etna_ml_get_core_info(screen)->nn_core_count;
|
||||
unsigned cores_used = MIN2(output_channels, nn_core_count);
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v8(ctx, operation, NULL, NULL);
|
||||
unsigned superblocks = etna_ml_calculate_tiling_v8(screen, operation, NULL, NULL);
|
||||
unsigned full_superblock = DIV_ROUND_UP(output_channels, nn_core_count * superblocks);
|
||||
|
||||
unsigned channel_per_superblock[superblocks];
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ set_default_tp_config(struct etna_tp_params *map)
|
|||
static struct etna_bo *
|
||||
create_transpose_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation)
|
||||
{
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->base.context, sizeof(struct etna_tp_params));
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params));
|
||||
|
||||
etna_bo_cpu_prep(bo, DRM_ETNA_PREP_WRITE);
|
||||
|
||||
|
|
@ -297,7 +297,7 @@ create_detranspose_config(struct etna_ml_subgraph *subgraph, const struct etna_o
|
|||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->base.context, sizeof(struct etna_tp_params));
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params));
|
||||
|
||||
etna_bo_cpu_prep(bo, DRM_ETNA_PREP_WRITE);
|
||||
|
||||
|
|
@ -418,7 +418,7 @@ static struct etna_bo *
|
|||
create_reshuffle_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,
|
||||
unsigned tp_core, unsigned tp_cores_used)
|
||||
{
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->base.context, sizeof(struct etna_tp_params));
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params));
|
||||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned output_width = operation->output_width;
|
||||
|
|
@ -572,8 +572,7 @@ static struct etna_bo *
|
|||
create_pad_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,
|
||||
unsigned tp_core, unsigned tp_cores_used)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
struct etna_bo *bo = etna_ml_create_bo(pctx, sizeof(struct etna_tp_params));
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params));
|
||||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
|
|
@ -747,8 +746,7 @@ static struct etna_bo *
|
|||
create_pwl_lut_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,
|
||||
unsigned tp_core, unsigned tp_cores_used, struct etna_bo *pwl_lut)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
struct etna_bo *bo = etna_ml_create_bo(pctx, sizeof(struct etna_tp_params));
|
||||
struct etna_bo *bo = etna_ml_create_bo(subgraph->screen, sizeof(struct etna_tp_params));
|
||||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
|
|
@ -1124,10 +1122,8 @@ static struct etna_bo *
|
|||
create_relu_lut_bo(struct etna_ml_subgraph *subgraph,
|
||||
const struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
const unsigned lut_length = 1024;
|
||||
struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev,
|
||||
struct etna_bo *pwl_lut = etna_bo_new(subgraph->screen->dev,
|
||||
lut_length * sizeof(uint32_t),
|
||||
DRM_ETNA_GEM_CACHE_WC);
|
||||
|
||||
|
|
@ -1155,10 +1151,8 @@ static struct etna_bo *
|
|||
create_abs_lut_bo(struct etna_ml_subgraph *subgraph,
|
||||
const struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned lut_length = 1024;
|
||||
struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev,
|
||||
struct etna_bo *pwl_lut = etna_bo_new(subgraph->screen->dev,
|
||||
lut_length * sizeof(uint32_t),
|
||||
DRM_ETNA_GEM_CACHE_WC);
|
||||
|
||||
|
|
@ -1274,11 +1268,9 @@ static struct etna_bo *
|
|||
create_log_lut_bo(struct etna_ml_subgraph *subgraph,
|
||||
const struct etna_operation *operation)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct etna_context *ctx = etna_context(context);
|
||||
unsigned lut_table_len = 1024;
|
||||
|
||||
struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev,
|
||||
struct etna_bo *pwl_lut = etna_bo_new(subgraph->screen->dev,
|
||||
lut_table_len * sizeof(uint32_t),
|
||||
DRM_ETNA_GEM_CACHE_WC);
|
||||
|
||||
|
|
@ -1331,7 +1323,6 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
const struct etna_operation *operation,
|
||||
struct etna_vip_instruction *instruction)
|
||||
{
|
||||
struct etna_context *ctx = etna_context(subgraph->base.context);
|
||||
struct pipe_resource *input = etna_ml_get_resource(subgraph, operation->input_tensors[0]);
|
||||
assert(input);
|
||||
pipe_resource_reference(&instruction->input, input);
|
||||
|
|
@ -1351,7 +1342,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
instruction->configs[0] = create_detranspose_config(subgraph, operation);
|
||||
break;
|
||||
case ETNA_ML_TP_RESHUFFLE: {
|
||||
unsigned tp_core_count = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
unsigned tp_core_count = etna_ml_get_core_info(subgraph->screen)->tp_core_count;
|
||||
unsigned tp_cores_used;
|
||||
|
||||
tp_cores_used = (operation->input_width > 8 || operation->input_channels > 1) ? tp_core_count : 1;
|
||||
|
|
@ -1368,7 +1359,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
break;
|
||||
}
|
||||
case ETNA_ML_TP_PAD: {
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count;
|
||||
|
||||
if (operation->input_width == 1)
|
||||
tp_cores_used = 1;
|
||||
|
|
@ -1380,7 +1371,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
break;
|
||||
}
|
||||
case ETNA_ML_TP_RELU: {
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count;
|
||||
|
||||
if (operation->input_width < 6)
|
||||
tp_cores_used = 1;
|
||||
|
|
@ -1393,7 +1384,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
break;
|
||||
}
|
||||
case ETNA_ML_TP_ABSOLUTE: {
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count;
|
||||
|
||||
ML_DBG("absolute: input_width %d tp_cores_used %d\n", operation->input_width, tp_cores_used);
|
||||
instruction->pwl_lut = create_abs_lut_bo(subgraph, operation);
|
||||
|
|
@ -1403,7 +1394,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
break;
|
||||
}
|
||||
case ETNA_ML_TP_LOGISTIC: {
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(subgraph->screen)->tp_core_count;
|
||||
|
||||
if (operation->input_width < 6)
|
||||
tp_cores_used = 1;
|
||||
|
|
@ -1421,12 +1412,13 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
}
|
||||
|
||||
void
|
||||
etna_ml_emit_operation_tp(struct etna_ml_subgraph *subgraph,
|
||||
etna_ml_emit_operation_tp(struct pipe_context *pctx,
|
||||
struct etna_ml_subgraph *subgraph,
|
||||
struct etna_vip_instruction *operation,
|
||||
unsigned idx)
|
||||
{
|
||||
struct etna_context *ctx = etna_context(subgraph->base.context);
|
||||
unsigned tp_core_count = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
unsigned tp_core_count = etna_ml_get_core_info(subgraph->screen)->tp_core_count;
|
||||
struct etna_cmd_stream *stream = ctx->stream;
|
||||
bool more_than_one_tp_job = operation->configs[1] != NULL;
|
||||
bool parallel = DBG_ENABLED(ETNA_DBG_NPU_PARALLEL);
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
struct etna_vip_instruction *instruction);
|
||||
|
||||
void
|
||||
etna_ml_emit_operation_tp(struct etna_ml_subgraph *subgraph,
|
||||
etna_ml_emit_operation_tp(struct pipe_context *pctx,
|
||||
struct etna_ml_subgraph *subgraph,
|
||||
struct etna_vip_instruction *operation,
|
||||
unsigned idx);
|
||||
|
|
@ -34,6 +34,7 @@
|
|||
#include "etnaviv_debug.h"
|
||||
#include "etnaviv_fence.h"
|
||||
#include "etnaviv_format.h"
|
||||
#include "etnaviv_ml.h"
|
||||
#include "etnaviv_query.h"
|
||||
#include "etnaviv_resource.h"
|
||||
#include "etnaviv_translate.h"
|
||||
|
|
@ -1014,6 +1015,14 @@ etna_screen_get_fd(struct pipe_screen *pscreen)
|
|||
return etna_device_fd(screen->dev);
|
||||
}
|
||||
|
||||
static struct pipe_ml_device *
|
||||
etna_get_ml_device(struct pipe_screen *pscreen)
|
||||
{
|
||||
struct etna_screen *screen = etna_screen(pscreen);
|
||||
|
||||
return &screen->ml_device.base;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
|
||||
struct etna_gpu *npu, struct renderonly *ro)
|
||||
|
|
@ -1089,6 +1098,13 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
|
|||
pscreen->is_dmabuf_modifier_supported = etna_screen_is_dmabuf_modifier_supported;
|
||||
pscreen->get_dmabuf_modifier_planes = etna_screen_get_dmabuf_modifier_planes;
|
||||
|
||||
if (npu) {
|
||||
screen->ml_device.base.ml_operation_supported = etna_ml_operation_supported;
|
||||
screen->ml_device.base.ml_subgraph_create = etna_ml_subgraph_create;
|
||||
screen->ml_device.base.ml_subgraph_destroy = etna_ml_subgraph_destroy;
|
||||
pscreen->get_ml_device = etna_get_ml_device;
|
||||
}
|
||||
|
||||
if (!etna_shader_screen_init(pscreen))
|
||||
goto fail;
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@
|
|||
#include "util/u_helpers.h"
|
||||
#include "util/u_queue.h"
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "etnaviv_ml.h"
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
struct etna_bo;
|
||||
|
||||
|
|
@ -54,6 +56,7 @@ struct etna_screen {
|
|||
struct etna_pipe *pipe_nn;
|
||||
struct etna_perfmon *perfmon;
|
||||
struct renderonly *ro;
|
||||
struct etna_ml_device ml_device;
|
||||
|
||||
struct util_dynarray supported_pm_queries;
|
||||
struct slab_parent_pool transfer_pool;
|
||||
|
|
@ -89,6 +92,13 @@ etna_screen(struct pipe_screen *pscreen)
|
|||
return (struct etna_screen *)pscreen;
|
||||
}
|
||||
|
||||
static inline struct etna_screen *
|
||||
etna_ml_device_screen(struct pipe_ml_device *pdevice)
|
||||
{
|
||||
struct etna_ml_device *dev = etna_ml_device(pdevice);
|
||||
return container_of(dev, struct etna_screen, ml_device);
|
||||
}
|
||||
|
||||
struct etna_bo *
|
||||
etna_screen_bo_from_handle(struct pipe_screen *pscreen,
|
||||
struct winsys_handle *whandle);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ struct pipe_resource *
|
|||
rkt_fill_weights(struct rkt_ml_subgraph *subgraph,
|
||||
const struct pipe_ml_operation *poperation)
|
||||
{
|
||||
struct pipe_context *pcontext = subgraph->base.context;
|
||||
struct pipe_context *pcontext = subgraph->context;
|
||||
unsigned weights_width = poperation->conv.weight_tensor->dims[1];
|
||||
unsigned weights_height = poperation->conv.weight_tensor->dims[2];
|
||||
unsigned input_channels = poperation->input_tensors[0]->dims[3];
|
||||
|
|
@ -130,7 +130,7 @@ rkt_fill_biases(struct rkt_ml_subgraph *subgraph,
|
|||
const struct pipe_ml_operation *poperation,
|
||||
unsigned *truncate_bits)
|
||||
{
|
||||
struct pipe_context *pcontext = subgraph->base.context;
|
||||
struct pipe_context *pcontext = subgraph->context;
|
||||
unsigned output_channels = poperation->output_tensors[0]->dims[3];
|
||||
unsigned weights_size = poperation->conv.weight_tensor->dims[1];
|
||||
struct pipe_transfer *transfer_out;
|
||||
|
|
|
|||
|
|
@ -124,11 +124,8 @@ rkt_create_context(struct pipe_screen *screen,
|
|||
pctx->buffer_subdata = u_default_buffer_subdata;
|
||||
pctx->clear_buffer = u_default_clear_buffer;
|
||||
|
||||
pctx->ml_operation_supported = rkt_ml_operation_supported;
|
||||
pctx->ml_subgraph_create = rkt_ml_subgraph_create;
|
||||
pctx->ml_subgraph_invoke = rkt_ml_subgraph_invoke;
|
||||
pctx->ml_subgraph_read_output = rkt_ml_subgraph_read_outputs;
|
||||
pctx->ml_subgraph_destroy = rkt_ml_subgraph_destroy;
|
||||
|
||||
return pctx;
|
||||
}
|
||||
|
|
@ -204,6 +201,12 @@ rkt_screen_get_fd(struct pipe_screen *pscreen)
|
|||
return rkt_screen(pscreen)->fd;
|
||||
}
|
||||
|
||||
static struct pipe_ml_device *
|
||||
rkt_get_ml_device(struct pipe_screen *pscreen)
|
||||
{
|
||||
return &rkt_screen(pscreen)->ml_device.base;
|
||||
}
|
||||
|
||||
struct pipe_screen *
|
||||
rkt_screen_create(int fd,
|
||||
const struct pipe_screen_config *config,
|
||||
|
|
@ -228,5 +231,10 @@ rkt_screen_create(int fd,
|
|||
screen->resource_create = rkt_resource_create;
|
||||
screen->resource_destroy = rkt_resource_destroy;
|
||||
|
||||
rkt_screen->ml_device.base.ml_operation_supported = rkt_ml_operation_supported;
|
||||
rkt_screen->ml_device.base.ml_subgraph_create = rkt_ml_subgraph_create;
|
||||
rkt_screen->ml_device.base.ml_subgraph_destroy = rkt_ml_subgraph_destroy;
|
||||
screen->get_ml_device = rkt_get_ml_device;
|
||||
|
||||
return screen;
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@
|
|||
#include "pipe/p_state.h"
|
||||
#include "renderonly/renderonly.h"
|
||||
#include "util/log.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
#ifndef RKT_SCREEN_H
|
||||
#define RKT_SCREEN_H
|
||||
|
|
@ -29,11 +30,23 @@ extern int rocket_debug;
|
|||
##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
struct rkt_ml_device {
|
||||
struct pipe_ml_device base;
|
||||
struct pipe_context *context;
|
||||
};
|
||||
|
||||
static inline struct rkt_ml_device *
|
||||
rkt_ml_device(struct pipe_ml_device *dev)
|
||||
{
|
||||
return (struct rkt_ml_device *)dev;
|
||||
}
|
||||
|
||||
struct rkt_screen {
|
||||
struct pipe_screen pscreen;
|
||||
|
||||
int fd;
|
||||
struct renderonly *ro;
|
||||
struct rkt_ml_device ml_device;
|
||||
};
|
||||
|
||||
static inline struct rkt_screen *
|
||||
|
|
@ -42,6 +55,13 @@ rkt_screen(struct pipe_screen *p)
|
|||
return (struct rkt_screen *)p;
|
||||
}
|
||||
|
||||
static inline struct rkt_screen *
|
||||
rkt_ml_device_screen(struct pipe_ml_device *pdevice)
|
||||
{
|
||||
struct rkt_ml_device *dev = rkt_ml_device(pdevice);
|
||||
return container_of(dev, struct rkt_screen, ml_device);
|
||||
}
|
||||
|
||||
struct rkt_context {
|
||||
struct pipe_context base;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ static void
|
|||
create_tensor(struct rkt_ml_subgraph *subgraph, unsigned idx,
|
||||
unsigned size)
|
||||
{
|
||||
struct pipe_context *context = subgraph->base.context;
|
||||
struct pipe_context *context = subgraph->context;
|
||||
struct pipe_resource **tensors = util_dynarray_begin(&subgraph->tensors);
|
||||
|
||||
assert(idx < util_dynarray_num_elements(&subgraph->tensors,
|
||||
|
|
@ -90,7 +90,7 @@ static void
|
|||
compile_operation(struct rkt_ml_subgraph *subgraph,
|
||||
struct rkt_operation *operation)
|
||||
{
|
||||
struct pipe_context *pcontext = subgraph->base.context;
|
||||
struct pipe_context *pcontext = subgraph->context;
|
||||
unsigned regcfg_total_size = 0;
|
||||
struct util_dynarray *regcfgs;
|
||||
struct pipe_transfer *transfer = NULL;
|
||||
|
|
@ -264,7 +264,7 @@ tensor_quantization_supported(struct pipe_tensor *tensor)
|
|||
}
|
||||
|
||||
bool
|
||||
rkt_ml_operation_supported(struct pipe_context *pcontext,
|
||||
rkt_ml_operation_supported(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *operation)
|
||||
{
|
||||
bool supported = false;
|
||||
|
|
@ -299,15 +299,21 @@ rkt_ml_operation_supported(struct pipe_context *pcontext,
|
|||
}
|
||||
|
||||
struct pipe_ml_subgraph *
|
||||
rkt_ml_subgraph_create(struct pipe_context *pcontext,
|
||||
rkt_ml_subgraph_create(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *poperations,
|
||||
unsigned count)
|
||||
{
|
||||
struct rkt_screen *screen = rkt_ml_device_screen(pdevice);
|
||||
struct rkt_ml_device *dev = rkt_ml_device(pdevice);
|
||||
struct rkt_ml_subgraph *subgraph;
|
||||
unsigned tensor_count;
|
||||
|
||||
if (!dev->context)
|
||||
dev->context = screen->pscreen.context_create(&screen->pscreen, NULL, 0);
|
||||
|
||||
subgraph = calloc(1, sizeof(*subgraph));
|
||||
subgraph->base.context = pcontext;
|
||||
subgraph->base.device = pdevice;
|
||||
subgraph->context = dev->context;
|
||||
|
||||
tensor_count = count_tensors(poperations, count);
|
||||
subgraph->tensors = UTIL_DYNARRAY_INIT;
|
||||
|
|
@ -614,7 +620,7 @@ free_operation(struct rkt_operation *operation)
|
|||
}
|
||||
|
||||
void
|
||||
rkt_ml_subgraph_destroy(struct pipe_context *context,
|
||||
rkt_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
|
||||
struct pipe_ml_subgraph *psubgraph)
|
||||
{
|
||||
struct rkt_ml_subgraph *subgraph = (struct rkt_ml_subgraph *)(psubgraph);
|
||||
|
|
|
|||
|
|
@ -117,15 +117,16 @@ struct rkt_operation {
|
|||
struct rkt_ml_subgraph {
|
||||
struct pipe_ml_subgraph base;
|
||||
|
||||
struct pipe_context *context;
|
||||
struct util_dynarray operations; /* rkt_operation */
|
||||
struct util_dynarray tensors; /* pipe_resource* */
|
||||
};
|
||||
|
||||
bool
|
||||
rkt_ml_operation_supported(struct pipe_context *pcontext, const struct pipe_ml_operation *operation);
|
||||
rkt_ml_operation_supported(struct pipe_ml_device *pdevice, const struct pipe_ml_operation *operation);
|
||||
|
||||
struct pipe_ml_subgraph *
|
||||
rkt_ml_subgraph_create(struct pipe_context *pcontext,
|
||||
rkt_ml_subgraph_create(struct pipe_ml_device *pdevice,
|
||||
const struct pipe_ml_operation *poperations,
|
||||
unsigned count);
|
||||
|
||||
|
|
@ -140,7 +141,7 @@ void rkt_ml_subgraph_read_outputs(struct pipe_context *pcontext,
|
|||
unsigned output_idxs[], void *outputs[],
|
||||
bool is_signed[]);
|
||||
|
||||
void rkt_ml_subgraph_destroy(struct pipe_context *context,
|
||||
void rkt_ml_subgraph_destroy(struct pipe_ml_device *pdevice,
|
||||
struct pipe_ml_subgraph *psubgraph);
|
||||
|
||||
struct rkt_resource *rkt_get_tensor(struct rkt_ml_subgraph *subgraph,
|
||||
|
|
|
|||
|
|
@ -43,7 +43,8 @@ teflon_debug(const char *format, ...)
|
|||
struct teflon_delegate {
|
||||
TfLiteDelegate base;
|
||||
struct pipe_loader_device *dev;
|
||||
struct pipe_context *context;
|
||||
struct pipe_screen *screen;
|
||||
struct pipe_ml_device *ml_dev;
|
||||
struct pipe_tensor *tensors;
|
||||
unsigned tensor_count;
|
||||
};
|
||||
|
|
@ -526,7 +527,6 @@ partition_init(TfLiteContext *tf_context, const char *buffer, size_t length)
|
|||
{
|
||||
const TfLiteDelegateParams *params = (const TfLiteDelegateParams *)buffer;
|
||||
struct teflon_delegate *delegate = (struct teflon_delegate *)params->delegate;
|
||||
struct pipe_context *context = delegate->context;
|
||||
struct pipe_ml_operation operations[params->nodes_to_replace->size];
|
||||
long start = 0, end = 0;
|
||||
|
||||
|
|
@ -553,9 +553,9 @@ partition_init(TfLiteContext *tf_context, const char *buffer, size_t length)
|
|||
dump_graph(delegate->tensors, tf_context->tensors_size, operations, params->nodes_to_replace->size);
|
||||
|
||||
struct pipe_ml_subgraph *subgraph;
|
||||
subgraph = context->ml_subgraph_create(context,
|
||||
operations,
|
||||
params->nodes_to_replace->size);
|
||||
subgraph = delegate->ml_dev->ml_subgraph_create(delegate->ml_dev,
|
||||
operations,
|
||||
params->nodes_to_replace->size);
|
||||
|
||||
struct teflon_subgraph *tsubgraph = calloc(1, sizeof(*tsubgraph));
|
||||
tsubgraph->base = subgraph;
|
||||
|
|
@ -603,9 +603,8 @@ partition_free(TfLiteContext *tf_context, void *buffer)
|
|||
{
|
||||
struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)buffer;
|
||||
struct pipe_ml_subgraph *subgraph = tsubgraph->base;
|
||||
struct pipe_context *context = subgraph->context;
|
||||
|
||||
context->ml_subgraph_destroy(context, subgraph);
|
||||
subgraph->device->ml_subgraph_destroy(subgraph->device, subgraph);
|
||||
free(tsubgraph->input_tensors);
|
||||
free(tsubgraph->output_tensors);
|
||||
free(tsubgraph);
|
||||
|
|
@ -617,7 +616,7 @@ partition_invoke(TfLiteContext *tf_context, TfLiteNode *node)
|
|||
struct teflon_delegate *delegate = (struct teflon_delegate *)node->delegate;
|
||||
struct teflon_subgraph *tsubgraph = (struct teflon_subgraph *)node->user_data;
|
||||
struct pipe_ml_subgraph *subgraph = tsubgraph->base;
|
||||
struct pipe_context *context = delegate->context;
|
||||
struct pipe_context *context = delegate->screen->context_create(delegate->screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY);
|
||||
long start = 0, end = 0;
|
||||
|
||||
if (unlikely(debug_get_option_debug_teflon() & TEFLON_DEBUG_VERBOSE)) {
|
||||
|
|
@ -663,6 +662,9 @@ partition_invoke(TfLiteContext *tf_context, TfLiteNode *node)
|
|||
teflon_debug("teflon: invoked graph, took %ld ms\n", (end - start));
|
||||
}
|
||||
|
||||
context->destroy(context);
|
||||
context = NULL;
|
||||
|
||||
return kTfLiteOk;
|
||||
}
|
||||
|
||||
|
|
@ -794,14 +796,13 @@ static bool
|
|||
check_op_support(TfLiteDelegate *tf_delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *registration)
|
||||
{
|
||||
struct teflon_delegate *delegate = (struct teflon_delegate *)tf_delegate;
|
||||
struct pipe_context *context = delegate->context;
|
||||
struct pipe_ml_operation operation = {0};
|
||||
bool supported = false;
|
||||
|
||||
if (!fill_operation(delegate, tf_context, node, registration, &operation))
|
||||
return false;
|
||||
|
||||
supported = context->ml_operation_supported(context, &operation);
|
||||
supported = delegate->ml_dev->ml_operation_supported(delegate->ml_dev, &operation);
|
||||
|
||||
free_operation(&operation);
|
||||
|
||||
|
|
@ -976,7 +977,6 @@ tflite_plugin_create_delegate(char **options_keys,
|
|||
void (*report_error)(const char *))
|
||||
{
|
||||
struct teflon_delegate *delegate = (struct teflon_delegate *)calloc(1, sizeof(*delegate));
|
||||
struct pipe_screen *screen;
|
||||
struct pipe_loader_device **devs;
|
||||
|
||||
delegate->base.flags = kTfLiteDelegateFlagsAllowDynamicTensors | kTfLiteDelegateFlagsRequirePropagatedShapes;
|
||||
|
|
@ -999,8 +999,8 @@ tflite_plugin_create_delegate(char **options_keys,
|
|||
|
||||
teflon_debug("Teflon delegate: loaded %s driver\n", delegate->dev->driver_name);
|
||||
|
||||
screen = pipe_loader_create_screen(delegate->dev, false);
|
||||
delegate->context = screen->context_create(screen, NULL, PIPE_CONTEXT_COMPUTE_ONLY);
|
||||
delegate->screen = pipe_loader_create_screen(delegate->dev, false);
|
||||
delegate->ml_dev = delegate->screen->get_ml_device(delegate->screen);
|
||||
|
||||
return &delegate->base;
|
||||
}
|
||||
|
|
@ -1009,7 +1009,6 @@ __attribute__((visibility("default"))) void
|
|||
tflite_plugin_destroy_delegate(TfLiteDelegate *tf_delegate)
|
||||
{
|
||||
struct teflon_delegate *delegate = (struct teflon_delegate *)tf_delegate;
|
||||
struct pipe_screen *screen;
|
||||
|
||||
if (tf_delegate == NULL) {
|
||||
fprintf(stderr, "tflite_plugin_destroy_delegate: NULL delegate!\n");
|
||||
|
|
@ -1028,9 +1027,7 @@ tflite_plugin_destroy_delegate(TfLiteDelegate *tf_delegate)
|
|||
}
|
||||
free(delegate->tensors);
|
||||
|
||||
screen = delegate->context->screen;
|
||||
delegate->context->destroy(delegate->context);
|
||||
screen->destroy(screen);
|
||||
delegate->screen->destroy(delegate->screen);
|
||||
pipe_loader_release(&delegate->dev, 1);
|
||||
free(delegate);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ struct u_upload_mgr;
|
|||
struct util_debug_callback;
|
||||
struct u_vbuf;
|
||||
struct pipe_context;
|
||||
struct pipe_ml_subgraph;
|
||||
|
||||
typedef void (*pipe_draw_func)(struct pipe_context *pipe,
|
||||
const struct pipe_draw_info *info,
|
||||
|
|
@ -1244,29 +1245,6 @@ struct pipe_context {
|
|||
struct winsys_handle *handle,
|
||||
unsigned usage );
|
||||
|
||||
/**
|
||||
* Checks whether an operation can be accelerated by this context.
|
||||
*
|
||||
* \param ctx pipe context
|
||||
* \param operation pipe_ml_operation to be checked
|
||||
* \return whether the context can accelerate this operation
|
||||
*/
|
||||
bool (*ml_operation_supported)(struct pipe_context *context, const struct pipe_ml_operation *operation);
|
||||
|
||||
/**
|
||||
* Compiles a ML subgraph, to be executed later. The returned pipe_ml_subgraph
|
||||
* should contain all information needed to execute the subgraph with as
|
||||
* little effort as strictly needed.
|
||||
*
|
||||
* \param ctx pipe context
|
||||
* \param operations array containing the definitions of the operations in the graph
|
||||
* \param count number of operations
|
||||
* \return a newly allocated pipe_ml_subgraph
|
||||
*/
|
||||
struct pipe_ml_subgraph *(*ml_subgraph_create)(struct pipe_context *context,
|
||||
const struct pipe_ml_operation *operations,
|
||||
unsigned count);
|
||||
|
||||
/**
|
||||
* Invokes a ML subgraph for a given input tensor.
|
||||
*
|
||||
|
|
@ -1298,15 +1276,6 @@ struct pipe_context {
|
|||
struct pipe_ml_subgraph *subgraph,
|
||||
unsigned outputs_count, unsigned output_idxs[],
|
||||
void *outputs[], bool is_signed[]);
|
||||
|
||||
/**
|
||||
* Release all resources allocated by the implementation of ml_subgraph_create
|
||||
*
|
||||
* \param ctx pipe context
|
||||
* \param subgraph subgraph to release
|
||||
*/
|
||||
void (*ml_subgraph_destroy)(struct pipe_context *context,
|
||||
struct pipe_ml_subgraph *subgraph);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -122,6 +122,11 @@ struct pipe_screen {
|
|||
|
||||
const char *(*get_vendor)(struct pipe_screen *);
|
||||
|
||||
/**
|
||||
* Returns the ML device for this screen, or NULL if ML is not supported.
|
||||
*/
|
||||
struct pipe_ml_device *(*get_ml_device)(struct pipe_screen *);
|
||||
|
||||
/**
|
||||
* Returns the device vendor.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -1275,9 +1275,62 @@ struct pipe_ml_operation
|
|||
struct pipe_ml_subgraph
|
||||
{
|
||||
/**
|
||||
* pipe_context that owns this subgraph.
|
||||
* pipe_ml_device that owns this subgraph.
|
||||
*/
|
||||
struct pipe_context *context;
|
||||
struct pipe_ml_device *device;
|
||||
};
|
||||
|
||||
/**
|
||||
* ML device providing ahead-of-time operations: operation support queries,
|
||||
* subgraph compilation/serialization, and subgraph destruction.
|
||||
*/
|
||||
struct pipe_ml_device {
|
||||
const char *id;
|
||||
|
||||
/**
|
||||
* Checks whether an operation can be accelerated by this device.
|
||||
*
|
||||
* \param device pipe_ml_device to be used
|
||||
* \param operation pipe_ml_operation to be checked
|
||||
* \return whether the device can accelerate this operation
|
||||
*/
|
||||
bool (*ml_operation_supported)(struct pipe_ml_device *device,
|
||||
const struct pipe_ml_operation *operation);
|
||||
|
||||
/**
|
||||
* Compiles a ML subgraph, to be executed later. The returned pipe_ml_subgraph
|
||||
* should contain all information needed to execute the subgraph with as
|
||||
* little effort as strictly needed.
|
||||
*
|
||||
* \param device pipe_ml_device to be used
|
||||
* \param operations array containing the definitions of the operations in the graph
|
||||
* \param count number of operations
|
||||
* \return a newly allocated pipe_ml_subgraph
|
||||
*/
|
||||
struct pipe_ml_subgraph *(*ml_subgraph_create)(struct pipe_ml_device *device,
|
||||
const struct pipe_ml_operation *operations,
|
||||
unsigned count);
|
||||
|
||||
/**
|
||||
* Serialize a compiled subgraph into a byte buffer.
|
||||
*
|
||||
* \param device pipe_ml_device to be used
|
||||
* \param subgraph previously-compiled subgraph
|
||||
* \param size output: size of the returned buffer
|
||||
* \return malloc'd buffer (caller frees), or NULL on failure
|
||||
*/
|
||||
uint8_t *(*ml_subgraph_serialize)(struct pipe_ml_device *device,
|
||||
struct pipe_ml_subgraph *subgraph,
|
||||
size_t *size);
|
||||
|
||||
/**
|
||||
* Release all resources allocated by the implementation of ml_subgraph_create
|
||||
*
|
||||
* \param device pipe_ml_device to be used
|
||||
* \param subgraph subgraph to release
|
||||
*/
|
||||
void (*ml_subgraph_destroy)(struct pipe_ml_device *device,
|
||||
struct pipe_ml_subgraph *subgraph);
|
||||
};
|
||||
|
||||
struct pipe_compute_state
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue