diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c index e722d518094..05cfdc0364d 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c @@ -480,6 +480,13 @@ lower_operations(struct etna_ml_subgraph *subgraph, list_addtail(&operation->link, etna_operations); break; } + case PIPE_ML_OPERATION_TYPE_LOGISTIC: { + etna_ml_lower_logistic(subgraph, poperation, operation); + operation->input_tensors[0] = input_tensors[0]; + operation->output_tensors[0] = poperation->output_tensors[0]->index; + list_addtail(&operation->link, etna_operations); + break; + } default: unreachable("Unsupported ML operation type"); } @@ -600,6 +607,7 @@ count_tensors(const struct pipe_ml_operation *poperations, case PIPE_ML_OPERATION_TYPE_RESHAPE: case PIPE_ML_OPERATION_TYPE_RELU: case PIPE_ML_OPERATION_TYPE_ABSOLUTE: + case PIPE_ML_OPERATION_TYPE_LOGISTIC: break; default: unreachable("Unsupported ML operation type"); @@ -687,6 +695,7 @@ etna_ml_operation_supported(struct pipe_context *pcontext, case PIPE_ML_OPERATION_TYPE_RESHAPE: case PIPE_ML_OPERATION_TYPE_RELU: case PIPE_ML_OPERATION_TYPE_ABSOLUTE: + case PIPE_ML_OPERATION_TYPE_LOGISTIC: supported = true; break; default: diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.h b/src/gallium/drivers/etnaviv/etnaviv_ml.h index f9aa5a79a1d..8b3af994f56 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.h @@ -28,6 +28,7 @@ enum etna_ml_tp_type { ETNA_ML_TP_PAD, ETNA_ML_TP_RELU, ETNA_ML_TP_ABSOLUTE, + ETNA_ML_TP_LOGISTIC, }; enum etna_ml_tensor_layout { diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c index 35f956fc13f..0c7b1f0a8bd 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c @@ -726,7 +726,7 @@ split_pwl_lut(struct etna_ml_subgraph *subgraph, const struct etna_operation *op unsigned tp_core, unsigned tp_cores_used, unsigned *in_dims, unsigned *out_dims) { unsigned remaining_in_size; - unsigned dim_to_split = 2; + const unsigned dim_to_split = 2; remaining_in_size = in_dims[dim_to_split]; @@ -854,7 +854,10 @@ create_pwl_lut_config(struct etna_ml_subgraph *subgraph, const struct etna_opera uint32_t scale; - scale = fui(operation->input_scale / operation->output_scale); + if (operation->tp_type == ETNA_ML_TP_LOGISTIC) + scale = fui(1.0 / operation->output_scale); + else + scale = fui(operation->input_scale / operation->output_scale); /* This should compensate for some loss of precision */ if ((scale >> 7 & 1) != 0 && (scale & 0x17f) != 0) { @@ -1084,13 +1087,43 @@ etna_ml_lower_absolute(struct etna_ml_subgraph *subgraph, operation->output_channels; } +void +etna_ml_lower_logistic(struct etna_ml_subgraph *subgraph, + const struct pipe_ml_operation *abs, + struct etna_operation *operation) +{ + operation->type = ETNA_JOB_TYPE_TP; + operation->tp_type = ETNA_ML_TP_LOGISTIC; + operation->stride = 1; + + operation->input_count = 1; + operation->input_width = abs->input_tensors[0]->dims[1]; + operation->input_height = abs->input_tensors[0]->dims[2]; + operation->input_channels = abs->input_tensors[0]->dims[3]; + operation->input_tensor_sizes[0] = operation->input_width * + operation->input_height * + operation->input_channels; + operation->input_zero_point = etna_tensor_zero_point(abs->input_tensors[0]); + operation->input_scale = abs->input_tensors[0]->scale; + + operation->output_count = 1; + operation->output_width = abs->output_tensors[0]->dims[1]; + operation->output_height = abs->output_tensors[0]->dims[2]; + operation->output_channels = abs->output_tensors[0]->dims[3]; + operation->output_zero_point = etna_tensor_zero_point(abs->output_tensors[0]); + operation->output_scale = abs->output_tensors[0]->scale; + operation->output_tensor_sizes[0] = operation->output_width * + operation->output_height * + operation->output_channels; +} + static struct etna_bo * create_relu_lut_bo(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation) { struct pipe_context *context = subgraph->base.context; struct etna_context *ctx = etna_context(context); - unsigned lut_length = 1024; + const unsigned lut_length = 1024; struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev, lut_length * sizeof(uint32_t), DRM_ETNA_GEM_CACHE_WC); @@ -1150,6 +1183,146 @@ create_abs_lut_bo(struct etna_ml_subgraph *subgraph, return pwl_lut; } +/* Based on code from src/util/half_float.c in Mesa itself. */ +static uint32_t +fp21(float val) +{ + const fi_type fi = {val}; + const int flt_m = fi.i & 0x7fffff; + const int flt_e = (fi.i >> 23) & 0xff; + const int flt_s = (fi.i >> 31) & 0x1; + int s, e, m = 0; + + /* sign bit */ + s = flt_s; + + /* handle special cases */ + if ((flt_e == 0) && (flt_m == 0)) { + /* zero */ + /* m = 0; - already set */ + e = 0; + } + else if ((flt_e == 0) && (flt_m != 0)) { + /* denorm -- denorm float maps to 0 fp21 */ + /* m = 0; - already set */ + e = 0; + } + else if ((flt_e == 0xff) && (flt_m == 0)) { + /* infinity */ + /* m = 0; - already set */ + e = 15; + } + else if ((flt_e == 0xff) && (flt_m != 0)) { + /* Retain the top bits of a NaN to make sure that the quiet/signaling + * status stays the same. + */ + m = flt_m >> 13; + if (!m) + m = 1; + e = 15; + } + else { + /* regular number */ + const int new_exp = flt_e - 127; + if (new_exp < -14) { + /* The float32 lies in the range (0.0, min_normal16) and is rounded + * to a nearby float16 value. The result will be either zero, subnormal, + * or normal. + */ + e = 0; + m = _mesa_lroundevenf((1 << 24) * fabsf(fi.f)); + } + else if (new_exp > 15) { + /* map this value to infinity */ + e = 0x1e; + m = 0x7fff; + } + else { + /* The float32 lies in the range + * [min_normal16, max_normal16 + max_step16) + * and is rounded to a nearby float16 value. The result will be + * either normal or infinite. + */ + e = new_exp + 15; + m = _mesa_lroundevenf(flt_m / (float) (1 << 8)); + } + } + + return (s << 20) | (e << 15) | m; +} + +/* Based on code from src/util/half_float.c in Mesa itself. */ +static float +fp32(uint32_t val) +{ + union fi f32; + + /* Exponent / Mantissa */ + f32.ui = (val & 0xfffff) << 8; + + /* Sign */ + f32.ui |= (uint32_t)(val & 0x100000) << 11; + f32.ui += 0x38000000; + + return f32.f; +} + +static struct etna_bo * +create_log_lut_bo(struct etna_ml_subgraph *subgraph, + const struct etna_operation *operation) +{ + struct pipe_context *context = subgraph->base.context; + struct etna_context *ctx = etna_context(context); + unsigned lut_table_len = 1024; + + struct etna_bo *pwl_lut = etna_bo_new(ctx->screen->dev, + lut_table_len * sizeof(uint32_t), + DRM_ETNA_GEM_CACHE_WC); + + etna_bo_cpu_prep(pwl_lut, DRM_ETNA_PREP_WRITE); + + uint32_t *map = etna_bo_map(pwl_lut); + unsigned pos = 0; + + uint32_t initial_val = fp21(0.5); + for (int i = 0; i < 16; i++) + map[pos++] = initial_val; + + for (int cur_val = 0x8000; cur_val < 0xf8000; cur_val += 0x800) { + float val = fp32(cur_val); + val *= operation->input_scale; + val *= -1.0; + val = expf(val); + val = 1.0f / (val + 1.0f); + map[pos++] = fp21(val); + } + + uint32_t middle_val = fp21(1.0); + for (int i = 0; i < 16; i++) + map[pos++] = middle_val; + + uint32_t middle_2_val = fp21(0.5); + for (int i = 0; i < 17; i++) + map[pos++] = middle_2_val; + + for (int cur_val = 0x108800; cur_val < 0x1f8000; cur_val += 0x800) { + float val = fp32(cur_val); + val *= operation->input_scale; + val *= -1.0; + val = expf(val); + val = 1.0f / (val + 1.0f); + map[pos++] = fp21(val); + } + + uint32_t final_val = fp21(0.0); + for (int i = 0; i < 16; i++) + map[pos++] = final_val; + + etna_bo_cpu_fini(pwl_lut); + + return pwl_lut; +} + void etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation, @@ -1226,6 +1399,19 @@ etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, } break; } + case ETNA_ML_TP_LOGISTIC: { + unsigned tp_cores_used = etna_ml_get_core_info(ctx)->tp_core_count; + + if (operation->input_width < 6) + tp_cores_used = 1; + + ML_DBG("logistic: input_width %d tp_cores_used %d\n", operation->input_width, tp_cores_used); + instruction->pwl_lut = create_log_lut_bo(subgraph, operation); + for (unsigned i = 0; i < tp_cores_used; i++) { + instruction->configs[i] = create_pwl_lut_config(subgraph, operation, i, tp_cores_used, instruction->pwl_lut); + } + break; + } } instruction->type = ETNA_JOB_TYPE_TP; instruction->tp_type = operation->tp_type; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h index 92bc868ade1..32667c2caf1 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.h @@ -36,6 +36,11 @@ etna_ml_lower_absolute(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operation *pad, struct etna_operation *operation); +void +etna_ml_lower_logistic(struct etna_ml_subgraph *subgraph, + const struct pipe_ml_operation *pad, + struct etna_operation *operation); + void etna_ml_compile_operation_tp(struct etna_ml_subgraph *subgraph, const struct etna_operation *operation,