mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 15:38:09 +02:00
etnaviv/ml: Add support for tensor padding operations
Just one more TP operation, at least for the pad modes supported. Reviewed-by: Philipp Zabel <p.zabel@pengutronix.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32509>
This commit is contained in:
parent
02e92bbcea
commit
3e74234450
5 changed files with 218 additions and 0 deletions
|
|
@ -389,6 +389,30 @@ lower_operations(struct etna_ml_subgraph *subgraph,
|
|||
|
||||
break;
|
||||
}
|
||||
case PIPE_ML_OPERATION_TYPE_PAD: {
|
||||
unsigned input_tensor = poperation->input_tensors[0]->index;
|
||||
|
||||
if (needs_transpose(poperations, count, poperation)) {
|
||||
struct etna_operation *operation = calloc(1, sizeof(*operation));
|
||||
etna_ml_lower_transpose(subgraph, poperation->input_tensors[0], operation, &input_tensor);
|
||||
list_addtail(&operation->link, etna_operations);
|
||||
}
|
||||
|
||||
ML_DBG("Adding pad operation.\n");
|
||||
struct etna_operation *operation = calloc(1, sizeof(*operation));
|
||||
etna_ml_lower_pad(subgraph, poperation, operation);
|
||||
operation->input_tensors[0] = input_tensor;
|
||||
list_addtail(&operation->link, etna_operations);
|
||||
|
||||
if (needs_detranspose(poperations, count, poperation)) {
|
||||
struct etna_operation *detranspose = calloc(1, sizeof(*operation));
|
||||
etna_ml_lower_detranspose(subgraph, operation, detranspose);
|
||||
operation->output_tensors[0] = detranspose->input_tensors[0];
|
||||
list_addtail(&detranspose->link, etna_operations);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
unreachable("Unsupported ML operation type");
|
||||
}
|
||||
|
|
@ -469,6 +493,7 @@ count_tensors(const struct pipe_ml_operation *poperations,
|
|||
tensor_count = MAX2(tensor_count, poperation->conv.weight_tensor->index);
|
||||
tensor_count = MAX2(tensor_count, poperation->conv.bias_tensor->index);
|
||||
break;
|
||||
case PIPE_ML_OPERATION_TYPE_PAD:
|
||||
case PIPE_ML_OPERATION_TYPE_ADD:
|
||||
case PIPE_ML_OPERATION_TYPE_CONCATENATION:
|
||||
case PIPE_ML_OPERATION_TYPE_SPLIT:
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ enum etna_ml_tp_type {
|
|||
ETNA_ML_TP_TRANSPOSE,
|
||||
ETNA_ML_TP_DETRANSPOSE,
|
||||
ETNA_ML_TP_RESHUFFLE,
|
||||
ETNA_ML_TP_PAD,
|
||||
};
|
||||
|
||||
struct etna_ml_subgraph {
|
||||
|
|
@ -48,6 +49,7 @@ struct etna_ml_subgraph {
|
|||
|
||||
struct etna_vip_instruction {
|
||||
enum etna_job_type type;
|
||||
enum etna_ml_tp_type tp_type;
|
||||
|
||||
struct etna_bo *configs[MAX_CONFIG_BOS];
|
||||
struct etna_bo *coefficients;
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "etnaviv_context.h"
|
||||
#include "etnaviv_debug.h"
|
||||
#include "etnaviv_emit.h"
|
||||
#include "etnaviv_ml.h"
|
||||
#include "etnaviv_ml_nn.h"
|
||||
|
||||
#define ETNA_NN_INT8 0
|
||||
|
|
|
|||
|
|
@ -3,11 +3,13 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "hw/state.xml.h"
|
||||
#include "util/u_inlines.h"
|
||||
|
||||
#include "etnaviv_context.h"
|
||||
#include "etnaviv_debug.h"
|
||||
#include "etnaviv_emit.h"
|
||||
#include "etnaviv_ml.h"
|
||||
#include "etnaviv_ml_tp.h"
|
||||
|
||||
#define FIELD(field, bits) uint32_t field : bits;
|
||||
|
|
@ -538,6 +540,141 @@ create_reshuffle_config(struct etna_ml_subgraph *subgraph, const struct etna_ope
|
|||
return bo;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
split_pad(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,
|
||||
unsigned tp_core, unsigned tp_cores_used, unsigned *in_dims, unsigned *out_dims)
|
||||
{
|
||||
unsigned remaining_in_size;
|
||||
unsigned dim_to_split = 2;
|
||||
|
||||
remaining_in_size = in_dims[dim_to_split];
|
||||
|
||||
for (unsigned i = 0; i <= tp_core; i++) {
|
||||
unsigned size = DIV_ROUND_UP(remaining_in_size, (tp_cores_used - i));
|
||||
|
||||
if (i < tp_cores_used - 1) {
|
||||
in_dims[dim_to_split] = size;
|
||||
remaining_in_size -= in_dims[dim_to_split];
|
||||
} else
|
||||
in_dims[dim_to_split] = remaining_in_size;
|
||||
|
||||
out_dims[dim_to_split] = size;
|
||||
}
|
||||
}
|
||||
|
||||
static struct etna_bo *
|
||||
create_pad_config(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,
|
||||
unsigned tp_core, unsigned tp_cores_used)
|
||||
{
|
||||
struct pipe_context *pctx = subgraph->base.context;
|
||||
struct etna_bo *bo = etna_ml_create_bo(pctx, sizeof(struct etna_tp_params));
|
||||
unsigned input_width = operation->input_width;
|
||||
unsigned input_height = operation->input_height;
|
||||
unsigned input_channels = operation->input_channels;
|
||||
unsigned output_width = operation->output_width;
|
||||
unsigned output_height = operation->output_height;
|
||||
unsigned output_channels = operation->output_channels;
|
||||
unsigned in_dims[3];
|
||||
unsigned out_dims[3];
|
||||
|
||||
SWAP(input_width, input_height);
|
||||
SWAP(output_width, output_height);
|
||||
|
||||
etna_bo_cpu_prep(bo, DRM_ETNA_PREP_WRITE);
|
||||
|
||||
struct etna_tp_params *map = etna_bo_map(bo);
|
||||
|
||||
set_default_tp_config(map);
|
||||
|
||||
in_dims[0] = input_width;
|
||||
in_dims[1] = input_height;
|
||||
in_dims[2] = input_channels;
|
||||
|
||||
out_dims[0] = output_width;
|
||||
out_dims[1] = output_height;
|
||||
out_dims[2] = output_channels;
|
||||
|
||||
split_pad(subgraph, operation, tp_core, tp_cores_used, in_dims, out_dims);
|
||||
|
||||
map->in_image_x_size = in_dims[0];
|
||||
map->in_image_y_size = in_dims[1];
|
||||
map->in_image_z_size = in_dims[2];
|
||||
|
||||
map->in_image_stride = input_width;
|
||||
map->in_image_slice = input_width * input_height;
|
||||
|
||||
map->in_window_x_start = 0xffff;
|
||||
map->in_window_y_start = 0xffff;
|
||||
|
||||
map->in_window_x_end = in_dims[0];
|
||||
map->in_window_y_end = in_dims[1];
|
||||
map->in_tile_x_size = out_dims[0];
|
||||
map->in_tile_x_inc = out_dims[0];
|
||||
map->in_tile_y_size = out_dims[1];
|
||||
map->in_tile_y_inc = out_dims[1];
|
||||
|
||||
struct pipe_resource *input = etna_ml_get_tensor(subgraph, operation->input_tensors[0]);
|
||||
unsigned offset = etna_ml_get_offset(subgraph, operation->input_tensors[0]);
|
||||
map->in_image_base_address = etna_bo_gpu_va(etna_resource(input)->bo) + offset;
|
||||
|
||||
struct pipe_resource *output = etna_ml_get_tensor(subgraph, operation->output_tensors[0]);
|
||||
offset = etna_ml_get_offset(subgraph, operation->output_tensors[0]);
|
||||
map->out_image_base_address = etna_bo_gpu_va(etna_resource(output)->bo) + offset;
|
||||
|
||||
for (unsigned i = 0; i < tp_core; i++) {
|
||||
unsigned in_dims[3];
|
||||
unsigned out_dims[3];
|
||||
unsigned in_offset = 0;
|
||||
unsigned out_offset = 0;
|
||||
|
||||
in_dims[0] = input_width;
|
||||
in_dims[1] = input_height;
|
||||
in_dims[2] = input_channels;
|
||||
|
||||
out_dims[0] = output_width;
|
||||
out_dims[1] = output_height;
|
||||
out_dims[2] = output_channels;
|
||||
|
||||
split_pad(subgraph, operation, i, tp_cores_used, in_dims, out_dims);
|
||||
|
||||
in_offset = map->in_image_slice * in_dims[2];
|
||||
out_offset = out_dims[2];
|
||||
out_offset *= map->in_tile_x_size * map->in_tile_y_size;
|
||||
|
||||
map->in_image_base_address += in_offset;
|
||||
map->out_image_base_address += out_offset;
|
||||
}
|
||||
|
||||
map->out_loop_1_reset = 0x0;
|
||||
map->out_loop_2_reset = 0x0;
|
||||
map->out_loop_3_reset = 0x0;
|
||||
map->out_loop_0_inc = 0x0;
|
||||
map->out_loop_1_inc = 0x1;
|
||||
map->out_loop_0_count = 0x1;
|
||||
map->out_loop_1_count = out_dims[0];
|
||||
map->out_loop_2_count = out_dims[1];
|
||||
map->out_loop_3_count = 0x1;
|
||||
map->out_loop_2_inc = out_dims[0];
|
||||
map->out_loop_3_inc = 0x0;
|
||||
map->out_loop_6_inc = out_dims[0] * out_dims[1];
|
||||
|
||||
map->in_zp = operation->input_zero_point;
|
||||
map->out_zp = operation->output_zero_point;
|
||||
|
||||
if (tp_cores_used > 1)
|
||||
map->no_flush = tp_core < tp_cores_used - 1;
|
||||
|
||||
map->in_image_circular_buf_size = 0x0;
|
||||
map->in_image_circular_buf_end_address_plus_1 = 0xFFFFFFFF >> 6;
|
||||
map->out_image_circular_buf_size = 0x0;
|
||||
map->out_image_circular_buf_end_address_plus_1 = 0xFFFFFFFF >> 6;
|
||||
|
||||
etna_bo_cpu_fini(bo);
|
||||
|
||||
return bo;
|
||||
}
|
||||
|
||||
static inline uint8_t
|
||||
etna_tensor_zero_point(const struct pipe_tensor *tensor)
|
||||
{
|
||||
|
|
@ -663,6 +800,37 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph,
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
etna_ml_lower_pad(struct etna_ml_subgraph *subgraph,
|
||||
const struct pipe_ml_operation *pad,
|
||||
struct etna_operation *operation)
|
||||
{
|
||||
operation->type = ETNA_JOB_TYPE_TP;
|
||||
operation->tp_type = ETNA_ML_TP_PAD;
|
||||
operation->stride = 1;
|
||||
|
||||
operation->input_tensors[0] = pad->input_tensors[0]->index;
|
||||
operation->input_count = 1;
|
||||
operation->input_width = pad->input_tensors[0]->dims[1];
|
||||
operation->input_height = pad->input_tensors[0]->dims[2];
|
||||
operation->input_channels = pad->input_tensors[0]->dims[3];
|
||||
operation->input_tensor_sizes[0] = operation->input_width *
|
||||
operation->input_height *
|
||||
operation->input_channels;
|
||||
operation->input_zero_point = pad->input_tensors[0]->zero_point;
|
||||
operation->input_scale = pad->input_tensors[0]->scale;
|
||||
|
||||
operation->output_tensors[0] = pad->output_tensors[0]->index;
|
||||
operation->output_width = pad->output_tensors[0]->dims[1];
|
||||
operation->output_height = pad->output_tensors[0]->dims[2];
|
||||
operation->output_channels = pad->output_tensors[0]->dims[3];
|
||||
operation->output_zero_point = pad->output_tensors[0]->zero_point;
|
||||
operation->output_scale = pad->output_tensors[0]->scale;
|
||||
operation->output_tensor_sizes[0] = operation->output_width *
|
||||
operation->output_height *
|
||||
operation->output_channels;
|
||||
}
|
||||
|
||||
void
|
||||
etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
||||
const struct etna_operation *operation,
|
||||
|
|
@ -704,8 +872,18 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
}
|
||||
break;
|
||||
}
|
||||
case ETNA_ML_TP_PAD: {
|
||||
unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count;
|
||||
|
||||
ML_DBG("pad: input_width %d tp_cores_used %d\n", operation->input_width, tp_cores_used);
|
||||
for (unsigned i = 0; i < tp_cores_used; i++) {
|
||||
instruction->configs[i] = create_pad_config(subgraph, operation, i, tp_cores_used);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
instruction->type = ETNA_JOB_TYPE_TP;
|
||||
instruction->tp_type = operation->tp_type;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
@ -728,6 +906,13 @@ etna_ml_emit_operation_tp(struct etna_ml_subgraph *subgraph,
|
|||
etna_set_state(stream, VIVS_GL_OCB_REMAP_START, 0x0);
|
||||
etna_set_state(stream, VIVS_GL_OCB_REMAP_END, 0x0);
|
||||
etna_set_state(stream, VIVS_GL_TP_CONFIG, 0x0);
|
||||
|
||||
if (operation->tp_type == ETNA_ML_TP_PAD) {
|
||||
etna_set_state(stream, VIVS_GL_UNK03950, j < tp_core_count - 1 ? 0x8 : 0x0);
|
||||
} else {
|
||||
etna_set_state(stream, VIVS_GL_UNK03950, 0x0);
|
||||
}
|
||||
|
||||
etna_set_state_reloc(stream, VIVS_PS_TP_INST_ADDR, &(struct etna_reloc) {
|
||||
.bo = operation->configs[j],
|
||||
.flags = ETNA_RELOC_READ,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,11 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph,
|
|||
struct etna_operation *operation,
|
||||
unsigned *output_tensor);
|
||||
|
||||
void
|
||||
etna_ml_lower_pad(struct etna_ml_subgraph *subgraph,
|
||||
const struct pipe_ml_operation *pad,
|
||||
struct etna_operation *operation);
|
||||
|
||||
void
|
||||
etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph,
|
||||
const struct etna_operation *operation,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue