From 1d4d1fc61dc3f6750b8220bd46a0fc3ab0f4bf4b Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Fri, 27 Feb 2026 11:47:55 +0100 Subject: [PATCH] gallium: replace padding_same with per-side padding Replace the boolean padding_same field in pipe_ml_operation.conv and .pooling with explicit per-side padding fields: padding_top, padding_bottom, padding_left, padding_right. Frontends always compute these from their own padding representation (e.g. TFLite same/valid, PyTorch (pad_h, pad_w)). Drivers use them directly, removing the need for drivers to derive padding. Part-of: --- src/gallium/drivers/ethosu/ethosu_lower.c | 36 ++++-------------- src/gallium/drivers/etnaviv/etnaviv_ml.c | 6 ++- src/gallium/drivers/etnaviv/etnaviv_ml_nn.c | 9 ++++- src/gallium/drivers/etnaviv/etnaviv_ml_tp.c | 5 ++- src/gallium/drivers/rocket/rkt_ml.c | 5 ++- src/gallium/drivers/rocket/rkt_ml.h | 5 ++- src/gallium/drivers/rocket/rkt_task.c | 33 ++-------------- src/gallium/frontends/teflon/tfl_device.c | 42 +++++++++++++++++++-- src/gallium/include/pipe/p_state.h | 17 +++++++-- 9 files changed, 88 insertions(+), 70 deletions(-) diff --git a/src/gallium/drivers/ethosu/ethosu_lower.c b/src/gallium/drivers/ethosu/ethosu_lower.c index 9cda7aae80d..f1f5485fd00 100644 --- a/src/gallium/drivers/ethosu/ethosu_lower.c +++ b/src/gallium/drivers/ethosu/ethosu_lower.c @@ -140,20 +140,10 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph, operation->kernel.zero_points = NULL; } - if (poperation->conv.padding_same) { - unsigned vert = needed_total_padding(input_tensor->dims[1], poperation->conv.stride_y, poperation->conv.weight_tensor->dims[1]); - unsigned horiz = needed_total_padding(input_tensor->dims[2], poperation->conv.stride_x, poperation->conv.weight_tensor->dims[2]); - - operation->pad.top = vert / 2; - operation->pad.left = horiz / 2; - operation->pad.bottom = (vert + 1) / 2; - operation->pad.right = (horiz + 1) / 2; - } else { - operation->pad.top = 0; - operation->pad.left = 0; - operation->pad.bottom = 0; - operation->pad.right = 0; - } + operation->pad.top = poperation->conv.padding_top; + operation->pad.bottom = poperation->conv.padding_bottom; + operation->pad.left = poperation->conv.padding_left; + operation->pad.right = poperation->conv.padding_right; allocate_feature_maps(subgraph, operation); @@ -194,20 +184,10 @@ ethosu_lower_pooling(struct ethosu_subgraph *subgraph, operation->kernel.dilation_y = 1; operation->kernel.dilation_x = 1; - if (poperation->pooling.padding_same) { - unsigned vert = needed_total_padding(operation->ifm.shape.height, poperation->pooling.stride_y, poperation->pooling.filter_height); - unsigned horiz = needed_total_padding(operation->ifm.shape.width, poperation->pooling.stride_x, poperation->pooling.filter_width); - - operation->pad.top = vert / 2; - operation->pad.left = horiz / 2; - operation->pad.bottom = (vert + 1) / 2; - operation->pad.right = (horiz + 1) / 2; - } else { - operation->pad.top = 0; - operation->pad.left = 0; - operation->pad.bottom = 0; - operation->pad.right = 0; - } + operation->pad.top = poperation->pooling.padding_top; + operation->pad.bottom = poperation->pooling.padding_bottom; + operation->pad.left = poperation->pooling.padding_left; + operation->pad.right = poperation->pooling.padding_right; allocate_feature_maps(subgraph, operation); ethosu_sched_operation(subgraph, operation); diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml.c b/src/gallium/drivers/etnaviv/etnaviv_ml.c index d66a8b8f321..32a63a06a50 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml.c @@ -140,6 +140,10 @@ needs_reshuffle(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operatio unsigned nn_core_version = ctx->screen->specs.nn_core_version; bool has_stride = poperation->conv.stride_x > 1 || poperation->conv.stride_y > 1; bool pointwise = poperation->conv.pointwise; + bool has_padding = poperation->conv.padding_top > 0 || + poperation->conv.padding_bottom > 0 || + poperation->conv.padding_left > 0 || + poperation->conv.padding_right > 0; unsigned input_width = poperation->input_tensors[0]->dims[1]; if (!has_stride) @@ -156,7 +160,7 @@ needs_reshuffle(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operatio if (poperation->conv.pointwise && input_width >= 3 && input_channels > 1) return false; - if (poperation->conv.pointwise && poperation->conv.padding_same) + if (poperation->conv.pointwise && has_padding) return false; return true; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c index b97714e6c9b..eb7a83df402 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_nn.c @@ -521,7 +521,9 @@ calc_pooling_first_pixel(struct etna_ml_subgraph *subgraph, if (poperation->conv.pointwise && input_width >= 3 && input_channels > 1) return true; - if (poperation->conv.pointwise && poperation->conv.padding_same) + if (poperation->conv.pointwise && + (poperation->conv.padding_top > 0 || poperation->conv.padding_bottom > 0 || + poperation->conv.padding_left > 0 || poperation->conv.padding_right > 0)) return true; } @@ -566,7 +568,10 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph, operation->pointwise = poperation->conv.pointwise; operation->relu = poperation->conv.relu; operation->pooling_first_pixel = calc_pooling_first_pixel(subgraph, poperation); - operation->padding_same = poperation->conv.padding_same; + operation->padding_same = poperation->conv.padding_top > 0 || + poperation->conv.padding_bottom > 0 || + poperation->conv.padding_left > 0 || + poperation->conv.padding_right > 0; operation->stride = poperation->conv.stride_x; operation->input_count = 1; diff --git a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c index 0c7b1f0a8bd..c9e78e91f9c 100644 --- a/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c +++ b/src/gallium/drivers/etnaviv/etnaviv_ml_tp.c @@ -950,7 +950,10 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph, operation->type = ETNA_JOB_TYPE_TP; operation->tp_type = ETNA_ML_TP_RESHUFFLE; operation->stride = convolution->conv.stride_x; - operation->padding_same = convolution->conv.padding_same; + operation->padding_same = convolution->conv.padding_top > 0 || + convolution->conv.padding_bottom > 0 || + convolution->conv.padding_left > 0 || + convolution->conv.padding_right > 0; operation->input_count = 1; operation->input_width = convolution->input_tensors[0]->dims[1]; diff --git a/src/gallium/drivers/rocket/rkt_ml.c b/src/gallium/drivers/rocket/rkt_ml.c index 498b25661c9..dbee31e669d 100644 --- a/src/gallium/drivers/rocket/rkt_ml.c +++ b/src/gallium/drivers/rocket/rkt_ml.c @@ -171,7 +171,10 @@ lower_convolution(struct rkt_ml_subgraph *subgraph, operation->tasks = UTIL_DYNARRAY_INIT; operation->depthwise = rkt_is_depthwise(poperation); - operation->padding_same = poperation->conv.padding_same; + operation->padding_top = poperation->conv.padding_top; + operation->padding_bottom = poperation->conv.padding_bottom; + operation->padding_left = poperation->conv.padding_left; + operation->padding_right = poperation->conv.padding_right; operation->stride = poperation->conv.stride_x; operation->input_index = poperation->input_tensors[0]->index; diff --git a/src/gallium/drivers/rocket/rkt_ml.h b/src/gallium/drivers/rocket/rkt_ml.h index 04dea3d1475..6bde9b4846e 100644 --- a/src/gallium/drivers/rocket/rkt_ml.h +++ b/src/gallium/drivers/rocket/rkt_ml.h @@ -80,7 +80,10 @@ struct rkt_operation { bool depthwise; bool reuse_weights_cbuf; unsigned truncate_bits; - bool padding_same; + unsigned padding_top; + unsigned padding_bottom; + unsigned padding_left; + unsigned padding_right; unsigned stride; bool addition_input; diff --git a/src/gallium/drivers/rocket/rkt_task.c b/src/gallium/drivers/rocket/rkt_task.c index 0fa50eb7a4b..5a910a97442 100644 --- a/src/gallium/drivers/rocket/rkt_task.c +++ b/src/gallium/drivers/rocket/rkt_task.c @@ -59,33 +59,6 @@ calc_line_stride(unsigned width) return width * ATOMIC_K_SIZE * sizeof(uint8_t); } -static void -calc_explicit_padding(const struct rkt_operation *operation, - unsigned *pad_top, unsigned *pad_bottom, - unsigned *pad_left, unsigned *pad_right) -{ - if (operation->padding_same && operation->weights_width > 1) { - /* Convert from implicit to explicit padding */ - unsigned pad_along_width = - MAX2((operation->output_width - 1) * operation->stride + - operation->weights_width - operation->input_width, - 0); - unsigned pad_along_height = - MAX2((operation->output_height - 1) * operation->stride + - operation->weights_height - operation->input_height, - 0); - *pad_left = pad_along_height / 2; - *pad_right = pad_along_height - *pad_left; - *pad_top = pad_along_width / 2; - *pad_bottom = pad_along_width - *pad_top; - } else { - *pad_left = 0; - *pad_right = 0; - *pad_top = 0; - *pad_bottom = 0; - } -} - static void fill_task(struct rkt_ml_subgraph *subgraph, struct rkt_operation *operation, @@ -192,8 +165,10 @@ rkt_split_tasks(struct rkt_ml_subgraph *subgraph, unsigned pad_left; unsigned pad_right; - calc_explicit_padding(operation, &pad_top, &pad_bottom, &pad_left, - &pad_right); + pad_top = operation->padding_top; + pad_bottom = operation->padding_bottom; + pad_left = operation->padding_left; + pad_right = operation->padding_right; if (weights_banks_required + 1 < CBUF_BANKS) { /* Full weights, partial input */ diff --git a/src/gallium/frontends/teflon/tfl_device.c b/src/gallium/frontends/teflon/tfl_device.c index f64754dbd7d..c92acca0d36 100644 --- a/src/gallium/frontends/teflon/tfl_device.c +++ b/src/gallium/frontends/teflon/tfl_device.c @@ -58,6 +58,18 @@ struct teflon_subgraph { unsigned output_count; }; +static void +calc_same_padding(unsigned input_size, unsigned filter_size, unsigned stride, + unsigned dilation, unsigned *pad_before, unsigned *pad_after) +{ + unsigned effective_filter = (filter_size - 1) * dilation + 1; + unsigned output_size = (input_size + stride - 1) / stride; + unsigned total = (output_size - 1) * stride + effective_filter; + unsigned padding = total > input_size ? total - input_size : 0; + *pad_before = padding / 2; + *pad_after = padding - *pad_before; +} + static bool fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *node_registration, struct pipe_ml_operation *operation) { @@ -94,7 +106,16 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi } operation->conv.stride_x = params->stride_width; operation->conv.stride_y = params->stride_height; - operation->conv.padding_same = params->padding == kTfLitePaddingSame; + if (params->padding == kTfLitePaddingSame) { + struct pipe_tensor *in = operation->input_tensors[0]; + struct pipe_tensor *wt = &tensors[node->inputs->data[1]]; + calc_same_padding(in->dims[1], wt->dims[1], params->stride_height, + operation->conv.dilation_height_factor, + &operation->conv.padding_top, &operation->conv.padding_bottom); + calc_same_padding(in->dims[2], wt->dims[2], params->stride_width, + operation->conv.dilation_width_factor, + &operation->conv.padding_left, &operation->conv.padding_right); + } operation->conv.depthwise = false; operation->conv.relu = params->activation == kTfLiteActRelu || params->activation == kTfLiteActRelu6; @@ -117,7 +138,16 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi } operation->conv.stride_x = params->stride_width; operation->conv.stride_y = params->stride_height; - operation->conv.padding_same = params->padding == kTfLitePaddingSame; + if (params->padding == kTfLitePaddingSame) { + struct pipe_tensor *in = operation->input_tensors[0]; + struct pipe_tensor *wt = &tensors[node->inputs->data[1]]; + calc_same_padding(in->dims[1], wt->dims[1], params->stride_height, + operation->conv.dilation_height_factor, + &operation->conv.padding_top, &operation->conv.padding_bottom); + calc_same_padding(in->dims[2], wt->dims[2], params->stride_width, + operation->conv.dilation_width_factor, + &operation->conv.padding_left, &operation->conv.padding_right); + } operation->conv.depthwise = true; operation->conv.relu = params->activation == kTfLiteActRelu || params->activation == kTfLiteActRelu6; @@ -142,7 +172,13 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi operation->pooling.filter_width = params->filter_width; operation->pooling.stride_x = params->stride_width; operation->pooling.stride_y = params->stride_height; - operation->pooling.padding_same = params->padding == kTfLitePaddingSame; + if (params->padding == kTfLitePaddingSame) { + struct pipe_tensor *in = operation->input_tensors[0]; + calc_same_padding(in->dims[1], params->filter_height, params->stride_height, + 1, &operation->pooling.padding_top, &operation->pooling.padding_bottom); + calc_same_padding(in->dims[2], params->filter_width, params->stride_width, + 1, &operation->pooling.padding_left, &operation->pooling.padding_right); + } break; } case kTfLiteBuiltinAdd: { diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index d2099d22952..8d73c1a0bc4 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -1117,9 +1117,14 @@ struct pipe_ml_operation unsigned stride_y; /** - * Whether to use padding of type same when accessing the input tensor. + * Explicit per-side padding. Frontends always compute these + * from their own padding representation (e.g. TFLite same/valid, + * PyTorch (pad_h, pad_w)). Drivers use them directly. */ - bool padding_same; + unsigned padding_top; + unsigned padding_bottom; + unsigned padding_left; + unsigned padding_right; /** * Whether this is a pointwise (1x1 kernels) convolution. @@ -1167,9 +1172,13 @@ struct pipe_ml_operation unsigned filter_height; /** - * Whether to use padding of type same when accessing the input tensor. + * Explicit per-side padding. Frontends always compute these + * from their own padding representation. */ - bool padding_same; + unsigned padding_top; + unsigned padding_bottom; + unsigned padding_left; + unsigned padding_right; } pooling; struct { /**