From 3e7423445076f4bd7aeb78d2f1df717b6a1563b2 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Thu, 24 Oct 2024 12:01:38 +0200 Subject: [PATCH] etnaviv/ml: Add support for tensor padding operations Just one more TP operation, at least for the pad modes supported. Reviewed-by: Philipp Zabel Part-of: --- src/gallium/drivers/etnaviv/etnaviv_ml.c | 25 +++ src/gallium/drivers/etnaviv/etnaviv_ml.h | 2 + src/gallium/drivers/etnaviv/etnaviv_ml_nn.c | 1 + src/gallium/drivers/etnaviv/etnaviv_ml_tp.c | 185 ++++++++++++++++++++ src/gallium/drivers/etnaviv/etnaviv_ml_tp.h | 5 + 5 files changed, 218 insertions(+) diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c index 93be715c897..9f839038284 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c @@ -389,6 +389,30 @@ lower_operations(struct etna_ml_subgraph *subgraph, break; } + case PIPE_ML_OPERATION_TYPE_PAD: { + unsigned input_tensor = poperation->input_tensors[0]->index; + + if (needs_transpose(poperations, count, poperation)) { + struct etna_operation *operation = calloc(1, sizeof(*operation)); + etna_ml_lower_transpose(subgraph, poperation->input_tensors[0], operation, &input_tensor); + list_addtail(&operation->link, etna_operations); + } + + ML_DBG("Adding pad operation.\n"); + struct etna_operation *operation = calloc(1, sizeof(*operation)); + etna_ml_lower_pad(subgraph, poperation, operation); + operation->input_tensors[0] = input_tensor; + list_addtail(&operation->link, etna_operations); + + if (needs_detranspose(poperations, count, poperation)) { + struct etna_operation *detranspose = calloc(1, sizeof(*operation)); + etna_ml_lower_detranspose(subgraph, operation, detranspose); + operation->output_tensors[0] = detranspose->input_tensors[0]; + list_addtail(&detranspose->link, etna_operations); + } + + break; + } default: unreachable("Unsupported ML operation type"); } @@ -469,6 +493,7 @@ count_tensors(const struct pipe_ml_operation *poperations, tensor_count = MAX2(tensor_count, poperation->conv.weight_tensor->index); tensor_count = MAX2(tensor_count, poperation->conv.bias_tensor->index); break; + case PIPE_ML_OPERATION_TYPE_PAD: case PIPE_ML_OPERATION_TYPE_ADD: case PIPE_ML_OPERATION_TYPE_CONCATENATION: case PIPE_ML_OPERATION_TYPE_SPLIT: diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.h b/src/gallium/drivers/etnaviv/etnaviv_ml.h index 46e786070c5..382ed1615d6 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.h @@ -33,6 +33,7 @@ enum etna_ml_tp_type { ETNA_ML_TP_TRANSPOSE, ETNA_ML_TP_DETRANSPOSE, ETNA_ML_TP_RESHUFFLE, + ETNA_ML_TP_PAD, }; struct etna_ml_subgraph { @@ -48,6 +49,7 @@ struct etna_ml_subgraph { struct etna_vip_instruction { enum etna_job_type type; + enum etna_ml_tp_type tp_type; struct etna_bo *configs[MAX_CONFIG_BOS]; struct etna_bo *coefficients; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c index ee55f6451a8..99250b5a6d8 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c @@ -9,6 +9,7 @@ #include "etnaviv_context.h" #include "etnaviv_debug.h" #include "etnaviv_emit.h" +#include "etnaviv_ml.h" #include "etnaviv_ml_nn.h" #define ETNA_NN_INT8 0 diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c index 371de498531..a7754756124 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c @@ -3,11 +3,13 @@ * SPDX-License-Identifier: MIT */ +#include "hw/state.xml.h" #include "util/u_inlines.h" #include "etnaviv_context.h" #include "etnaviv_debug.h" #include "etnaviv_emit.h" +#include "etnaviv_ml.h" #include "etnaviv_ml_tp.h" #define FIELD(field, bits) uint32_t field : bits; @@ -538,6 +540,141 @@ create_reshuffle_config(struct etna_ml_subgraph *subgraph, const struct etna_ope return bo; } + +static void +split_pad(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, + unsigned tp_core, unsigned tp_cores_used, unsigned *in_dims, unsigned *out_dims) +{ + unsigned remaining_in_size; + unsigned dim_to_split = 2; + + remaining_in_size = in_dims[dim_to_split]; + + for (unsigned i = 0; i <= tp_core; i++) { + unsigned size = DIV_ROUND_UP(remaining_in_size, (tp_cores_used - i)); + + if (i < tp_cores_used - 1) { + in_dims[dim_to_split] = size; + remaining_in_size -= in_dims[dim_to_split]; + } else + in_dims[dim_to_split] = remaining_in_size; + + out_dims[dim_to_split] = size; + } +} + +static struct etna_bo * +create_pad_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, + unsigned tp_core, unsigned tp_cores_used) +{ + struct pipe_context *pctx = subgraph->base.context; + struct etna_bo *bo = etna_ml_create_bo(pctx, sizeof(struct etna_tp_params)); + unsigned input_width = operation->input_width; + unsigned input_height = operation->input_height; + unsigned input_channels = operation->input_channels; + unsigned output_width = operation->output_width; + unsigned output_height = operation->output_height; + unsigned output_channels = operation->output_channels; + unsigned in_dims[3]; + unsigned out_dims[3]; + + SWAP(input_width, input_height); + SWAP(output_width, output_height); + + etna_bo_cpu_prep(bo, DRM_ETNA_PREP_WRITE); + + struct etna_tp_params *map = etna_bo_map(bo); + + set_default_tp_config(map); + + in_dims[0] = input_width; + in_dims[1] = input_height; + in_dims[2] = input_channels; + + out_dims[0] = output_width; + out_dims[1] = output_height; + out_dims[2] = output_channels; + + split_pad(subgraph, operation, tp_core, tp_cores_used, in_dims, out_dims); + + map->in_image_x_size = in_dims[0]; + map->in_image_y_size = in_dims[1]; + map->in_image_z_size = in_dims[2]; + + map->in_image_stride = input_width; + map->in_image_slice = input_width * input_height; + + map->in_window_x_start = 0xffff; + map->in_window_y_start = 0xffff; + + map->in_window_x_end = in_dims[0]; + map->in_window_y_end = in_dims[1]; + map->in_tile_x_size = out_dims[0]; + map->in_tile_x_inc = out_dims[0]; + map->in_tile_y_size = out_dims[1]; + map->in_tile_y_inc = out_dims[1]; + + struct pipe_resource *input = etna_ml_get_tensor(subgraph, operation->input_tensors[0]); + unsigned offset = etna_ml_get_offset(subgraph, operation->input_tensors[0]); + map->in_image_base_address = etna_bo_gpu_va(etna_resource(input)->bo) + offset; + + struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]); + offset = etna_ml_get_offset(subgraph, operation->output_tensors[0]); + map->out_image_base_address = etna_bo_gpu_va(etna_resource(output)->bo) + offset; + + for (unsigned i = 0; i < tp_core; i++) { + unsigned in_dims[3]; + unsigned out_dims[3]; + unsigned in_offset = 0; + unsigned out_offset = 0; + + in_dims[0] = input_width; + in_dims[1] = input_height; + in_dims[2] = input_channels; + + out_dims[0] = output_width; + out_dims[1] = output_height; + out_dims[2] = output_channels; + + split_pad(subgraph, operation, i, tp_cores_used, in_dims, out_dims); + + in_offset = map->in_image_slice * in_dims[2]; + out_offset = out_dims[2]; + out_offset *= map->in_tile_x_size * map->in_tile_y_size; + + map->in_image_base_address += in_offset; + map->out_image_base_address += out_offset; + } + + map->out_loop_1_reset = 0x0; + map->out_loop_2_reset = 0x0; + map->out_loop_3_reset = 0x0; + map->out_loop_0_inc = 0x0; + map->out_loop_1_inc = 0x1; + map->out_loop_0_count = 0x1; + map->out_loop_1_count = out_dims[0]; + map->out_loop_2_count = out_dims[1]; + map->out_loop_3_count = 0x1; + map->out_loop_2_inc = out_dims[0]; + map->out_loop_3_inc = 0x0; + map->out_loop_6_inc = out_dims[0] * out_dims[1]; + + map->in_zp = operation->input_zero_point; + map->out_zp = operation->output_zero_point; + + if (tp_cores_used > 1) + map->no_flush = tp_core < tp_cores_used - 1; + + map->in_image_circular_buf_size = 0x0; + map->in_image_circular_buf_end_address_plus_1 = 0xFFFFFFFF >> 6; + map->out_image_circular_buf_size = 0x0; + map->out_image_circular_buf_end_address_plus_1 = 0xFFFFFFFF >> 6; + + etna_bo_cpu_fini(bo); + + return bo; +} + static inline uint8_t etna_tensor_zero_point(const struct pipe_tensor *tensor) { @@ -663,6 +800,37 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph, } } +void +etna_ml_lower_pad(struct etna_ml_subgraph *subgraph, + const struct pipe_ml_operation *pad, + struct etna_operation *operation) +{ + operation->type = ETNA_JOB_TYPE_TP; + operation->tp_type = ETNA_ML_TP_PAD; + operation->stride = 1; + + operation->input_tensors[0] = pad->input_tensors[0]->index; + operation->input_count = 1; + operation->input_width = pad->input_tensors[0]->dims[1]; + operation->input_height = pad->input_tensors[0]->dims[2]; + operation->input_channels = pad->input_tensors[0]->dims[3]; + operation->input_tensor_sizes[0] = operation->input_width * + operation->input_height * + operation->input_channels; + operation->input_zero_point = pad->input_tensors[0]->zero_point; + operation->input_scale = pad->input_tensors[0]->scale; + + operation->output_tensors[0] = pad->output_tensors[0]->index; + operation->output_width = pad->output_tensors[0]->dims[1]; + operation->output_height = pad->output_tensors[0]->dims[2]; + operation->output_channels = pad->output_tensors[0]->dims[3]; + operation->output_zero_point = pad->output_tensors[0]->zero_point; + operation->output_scale = pad->output_tensors[0]->scale; + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; +} + void etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, @@ -704,8 +872,18 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, } break; } + case ETNA_ML_TP_PAD: { + unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count; + + ML_DBG("pad: input_width %d tp_cores_used %d\n", operation->input_width, tp_cores_used); + for (unsigned i = 0; i < tp_cores_used; i++) { + instruction->configs[i] = create_pad_config(subgraph, operation, i, tp_cores_used); + } + break; + } } instruction->type = ETNA_JOB_TYPE_TP; + instruction->tp_type = operation->tp_type; } void @@ -728,6 +906,13 @@ etna_ml_emit_operation_tp(struct etna_ml_subgraph *subgraph, etna_set_state(stream, VIVS_GL_OCB_REMAP_START, 0x0); etna_set_state(stream, VIVS_GL_OCB_REMAP_END, 0x0); etna_set_state(stream, VIVS_GL_TP_CONFIG, 0x0); + + if (operation->tp_type == ETNA_ML_TP_PAD) { + etna_set_state(stream, VIVS_GL_UNK03950, j < tp_core_count - 1 ? 0x8 : 0x0); + } else { + etna_set_state(stream, VIVS_GL_UNK03950, 0x0); + } + etna_set_state_reloc(stream, VIVS_PS_TP_INST_ADDR, &(struct etna_reloc) { .bo = operation->configs[j], .flags = ETNA_RELOC_READ, diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h index 1a4e3aef6b4..2bbda23252d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h @@ -22,6 +22,11 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph, struct etna_operation *operation, unsigned *output_tensor); +void +etna_ml_lower_pad(struct etna_ml_subgraph *subgraph, + const struct pipe_ml_operation *pad, + struct etna_operation *operation); + void etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,