gallium: replace padding_same with per-side padding

Replace the boolean padding_same field in pipe_ml_operation.conv
and .pooling with explicit per-side padding fields: padding_top,
padding_bottom, padding_left, padding_right.

Frontends always compute these from their own padding representation
(e.g. TFLite same/valid, PyTorch (pad_h, pad_w)). Drivers use
them directly, removing the need for drivers to derive padding.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40167>
This commit is contained in:
Tomeu Vizoso 2026-02-27 11:47:55 +01:00 committed by Marge Bot
parent db866eca28
commit 1d4d1fc61d
9 changed files with 88 additions and 70 deletions

View file

@ -140,20 +140,10 @@ ethosu_lower_convolution(struct ethosu_subgraph *subgraph,
operation->kernel.zero_points = NULL;
}
if (poperation->conv.padding_same) {
unsigned vert = needed_total_padding(input_tensor->dims[1], poperation->conv.stride_y, poperation->conv.weight_tensor->dims[1]);
unsigned horiz = needed_total_padding(input_tensor->dims[2], poperation->conv.stride_x, poperation->conv.weight_tensor->dims[2]);
operation->pad.top = vert / 2;
operation->pad.left = horiz / 2;
operation->pad.bottom = (vert + 1) / 2;
operation->pad.right = (horiz + 1) / 2;
} else {
operation->pad.top = 0;
operation->pad.left = 0;
operation->pad.bottom = 0;
operation->pad.right = 0;
}
operation->pad.top = poperation->conv.padding_top;
operation->pad.bottom = poperation->conv.padding_bottom;
operation->pad.left = poperation->conv.padding_left;
operation->pad.right = poperation->conv.padding_right;
allocate_feature_maps(subgraph, operation);
@ -194,20 +184,10 @@ ethosu_lower_pooling(struct ethosu_subgraph *subgraph,
operation->kernel.dilation_y = 1;
operation->kernel.dilation_x = 1;
if (poperation->pooling.padding_same) {
unsigned vert = needed_total_padding(operation->ifm.shape.height, poperation->pooling.stride_y, poperation->pooling.filter_height);
unsigned horiz = needed_total_padding(operation->ifm.shape.width, poperation->pooling.stride_x, poperation->pooling.filter_width);
operation->pad.top = vert / 2;
operation->pad.left = horiz / 2;
operation->pad.bottom = (vert + 1) / 2;
operation->pad.right = (horiz + 1) / 2;
} else {
operation->pad.top = 0;
operation->pad.left = 0;
operation->pad.bottom = 0;
operation->pad.right = 0;
}
operation->pad.top = poperation->pooling.padding_top;
operation->pad.bottom = poperation->pooling.padding_bottom;
operation->pad.left = poperation->pooling.padding_left;
operation->pad.right = poperation->pooling.padding_right;
allocate_feature_maps(subgraph, operation);
ethosu_sched_operation(subgraph, operation);

View file

@ -140,6 +140,10 @@ needs_reshuffle(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operatio
unsigned nn_core_version = ctx->screen->specs.nn_core_version;
bool has_stride = poperation->conv.stride_x > 1 || poperation->conv.stride_y > 1;
bool pointwise = poperation->conv.pointwise;
bool has_padding = poperation->conv.padding_top > 0 ||
poperation->conv.padding_bottom > 0 ||
poperation->conv.padding_left > 0 ||
poperation->conv.padding_right > 0;
unsigned input_width = poperation->input_tensors[0]->dims[1];
if (!has_stride)
@ -156,7 +160,7 @@ needs_reshuffle(struct etna_ml_subgraph *subgraph, const struct pipe_ml_operatio
if (poperation->conv.pointwise && input_width >= 3 && input_channels > 1)
return false;
if (poperation->conv.pointwise && poperation->conv.padding_same)
if (poperation->conv.pointwise && has_padding)
return false;
return true;

View file

@ -521,7 +521,9 @@ calc_pooling_first_pixel(struct etna_ml_subgraph *subgraph,
if (poperation->conv.pointwise && input_width >= 3 && input_channels > 1)
return true;
if (poperation->conv.pointwise && poperation->conv.padding_same)
if (poperation->conv.pointwise &&
(poperation->conv.padding_top > 0 || poperation->conv.padding_bottom > 0 ||
poperation->conv.padding_left > 0 || poperation->conv.padding_right > 0))
return true;
}
@ -566,7 +568,10 @@ etna_ml_lower_convolution(struct etna_ml_subgraph *subgraph,
operation->pointwise = poperation->conv.pointwise;
operation->relu = poperation->conv.relu;
operation->pooling_first_pixel = calc_pooling_first_pixel(subgraph, poperation);
operation->padding_same = poperation->conv.padding_same;
operation->padding_same = poperation->conv.padding_top > 0 ||
poperation->conv.padding_bottom > 0 ||
poperation->conv.padding_left > 0 ||
poperation->conv.padding_right > 0;
operation->stride = poperation->conv.stride_x;
operation->input_count = 1;

View file

@ -950,7 +950,10 @@ etna_ml_lower_reshuffle(struct etna_ml_subgraph *subgraph,
operation->type = ETNA_JOB_TYPE_TP;
operation->tp_type = ETNA_ML_TP_RESHUFFLE;
operation->stride = convolution->conv.stride_x;
operation->padding_same = convolution->conv.padding_same;
operation->padding_same = convolution->conv.padding_top > 0 ||
convolution->conv.padding_bottom > 0 ||
convolution->conv.padding_left > 0 ||
convolution->conv.padding_right > 0;
operation->input_count = 1;
operation->input_width = convolution->input_tensors[0]->dims[1];

View file

@ -171,7 +171,10 @@ lower_convolution(struct rkt_ml_subgraph *subgraph,
operation->tasks = UTIL_DYNARRAY_INIT;
operation->depthwise = rkt_is_depthwise(poperation);
operation->padding_same = poperation->conv.padding_same;
operation->padding_top = poperation->conv.padding_top;
operation->padding_bottom = poperation->conv.padding_bottom;
operation->padding_left = poperation->conv.padding_left;
operation->padding_right = poperation->conv.padding_right;
operation->stride = poperation->conv.stride_x;
operation->input_index = poperation->input_tensors[0]->index;

View file

@ -80,7 +80,10 @@ struct rkt_operation {
bool depthwise;
bool reuse_weights_cbuf;
unsigned truncate_bits;
bool padding_same;
unsigned padding_top;
unsigned padding_bottom;
unsigned padding_left;
unsigned padding_right;
unsigned stride;
bool addition_input;

View file

@ -59,33 +59,6 @@ calc_line_stride(unsigned width)
return width * ATOMIC_K_SIZE * sizeof(uint8_t);
}
static void
calc_explicit_padding(const struct rkt_operation *operation,
unsigned *pad_top, unsigned *pad_bottom,
unsigned *pad_left, unsigned *pad_right)
{
if (operation->padding_same && operation->weights_width > 1) {
/* Convert from implicit to explicit padding */
unsigned pad_along_width =
MAX2((operation->output_width - 1) * operation->stride +
operation->weights_width - operation->input_width,
0);
unsigned pad_along_height =
MAX2((operation->output_height - 1) * operation->stride +
operation->weights_height - operation->input_height,
0);
*pad_left = pad_along_height / 2;
*pad_right = pad_along_height - *pad_left;
*pad_top = pad_along_width / 2;
*pad_bottom = pad_along_width - *pad_top;
} else {
*pad_left = 0;
*pad_right = 0;
*pad_top = 0;
*pad_bottom = 0;
}
}
static void
fill_task(struct rkt_ml_subgraph *subgraph,
struct rkt_operation *operation,
@ -192,8 +165,10 @@ rkt_split_tasks(struct rkt_ml_subgraph *subgraph,
unsigned pad_left;
unsigned pad_right;
calc_explicit_padding(operation, &pad_top, &pad_bottom, &pad_left,
&pad_right);
pad_top = operation->padding_top;
pad_bottom = operation->padding_bottom;
pad_left = operation->padding_left;
pad_right = operation->padding_right;
if (weights_banks_required + 1 < CBUF_BANKS) {
/* Full weights, partial input */

View file

@ -58,6 +58,18 @@ struct teflon_subgraph {
unsigned output_count;
};
static void
calc_same_padding(unsigned input_size, unsigned filter_size, unsigned stride,
unsigned dilation, unsigned *pad_before, unsigned *pad_after)
{
unsigned effective_filter = (filter_size - 1) * dilation + 1;
unsigned output_size = (input_size + stride - 1) / stride;
unsigned total = (output_size - 1) * stride + effective_filter;
unsigned padding = total > input_size ? total - input_size : 0;
*pad_before = padding / 2;
*pad_after = padding - *pad_before;
}
static bool
fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLiteNode *node, TfLiteRegistration *node_registration, struct pipe_ml_operation *operation)
{
@ -94,7 +106,16 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi
}
operation->conv.stride_x = params->stride_width;
operation->conv.stride_y = params->stride_height;
operation->conv.padding_same = params->padding == kTfLitePaddingSame;
if (params->padding == kTfLitePaddingSame) {
struct pipe_tensor *in = operation->input_tensors[0];
struct pipe_tensor *wt = &tensors[node->inputs->data[1]];
calc_same_padding(in->dims[1], wt->dims[1], params->stride_height,
operation->conv.dilation_height_factor,
&operation->conv.padding_top, &operation->conv.padding_bottom);
calc_same_padding(in->dims[2], wt->dims[2], params->stride_width,
operation->conv.dilation_width_factor,
&operation->conv.padding_left, &operation->conv.padding_right);
}
operation->conv.depthwise = false;
operation->conv.relu = params->activation == kTfLiteActRelu ||
params->activation == kTfLiteActRelu6;
@ -117,7 +138,16 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi
}
operation->conv.stride_x = params->stride_width;
operation->conv.stride_y = params->stride_height;
operation->conv.padding_same = params->padding == kTfLitePaddingSame;
if (params->padding == kTfLitePaddingSame) {
struct pipe_tensor *in = operation->input_tensors[0];
struct pipe_tensor *wt = &tensors[node->inputs->data[1]];
calc_same_padding(in->dims[1], wt->dims[1], params->stride_height,
operation->conv.dilation_height_factor,
&operation->conv.padding_top, &operation->conv.padding_bottom);
calc_same_padding(in->dims[2], wt->dims[2], params->stride_width,
operation->conv.dilation_width_factor,
&operation->conv.padding_left, &operation->conv.padding_right);
}
operation->conv.depthwise = true;
operation->conv.relu = params->activation == kTfLiteActRelu ||
params->activation == kTfLiteActRelu6;
@ -142,7 +172,13 @@ fill_operation(struct teflon_delegate *delegate, TfLiteContext *tf_context, TfLi
operation->pooling.filter_width = params->filter_width;
operation->pooling.stride_x = params->stride_width;
operation->pooling.stride_y = params->stride_height;
operation->pooling.padding_same = params->padding == kTfLitePaddingSame;
if (params->padding == kTfLitePaddingSame) {
struct pipe_tensor *in = operation->input_tensors[0];
calc_same_padding(in->dims[1], params->filter_height, params->stride_height,
1, &operation->pooling.padding_top, &operation->pooling.padding_bottom);
calc_same_padding(in->dims[2], params->filter_width, params->stride_width,
1, &operation->pooling.padding_left, &operation->pooling.padding_right);
}
break;
}
case kTfLiteBuiltinAdd: {

View file

@ -1117,9 +1117,14 @@ struct pipe_ml_operation
unsigned stride_y;
/**
* Whether to use padding of type same when accessing the input tensor.
* Explicit per-side padding. Frontends always compute these
* from their own padding representation (e.g. TFLite same/valid,
* PyTorch (pad_h, pad_w)). Drivers use them directly.
*/
bool padding_same;
unsigned padding_top;
unsigned padding_bottom;
unsigned padding_left;
unsigned padding_right;
/**
* Whether this is a pointwise (1x1 kernels) convolution.
@ -1167,9 +1172,13 @@ struct pipe_ml_operation
unsigned filter_height;
/**
* Whether to use padding of type same when accessing the input tensor.
* Explicit per-side padding. Frontends always compute these
* from their own padding representation.
*/
bool padding_same;
unsigned padding_top;
unsigned padding_bottom;
unsigned padding_left;
unsigned padding_right;
} pooling;
struct {
/**