diff --git a/shared/nm-glib-aux/nm-shared-utils.c b/shared/nm-glib-aux/nm-shared-utils.c index fffd761b7a..807786c530 100644 --- a/shared/nm-glib-aux/nm-shared-utils.c +++ b/shared/nm-glib-aux/nm-shared-utils.c @@ -1528,6 +1528,19 @@ _char_lookup_has (const guint8 lookup[static 256], return lookup[(guint8) ch] != 0; } +static gboolean +_char_lookup_has_all (const guint8 lookup[static 256], + const char *candidates) +{ + if (candidates) { + while (candidates[0] != '\0') { + if (!_char_lookup_has (lookup, (candidates++)[0])) + return FALSE; + } + } + return TRUE; +} + /** * nm_utils_strsplit_set_full: * @str: the string to split. @@ -1755,65 +1768,131 @@ done2: /*****************************************************************************/ const char * -nm_utils_escaped_tokens_escape (const char *str, - const char *delimiters, - char **out_to_free) +nm_utils_escaped_tokens_escape_full (const char *str, + const char *delimiters, + const char *delimiters_as_needed, + NMUtilsEscapedTokensEscapeFlags flags, + char **out_to_free) { guint8 ch_lookup[256]; + guint8 ch_lookup_as_needed[256]; + gboolean has_ch_lookup_as_needed = FALSE; char *ret; gsize str_len; gsize alloc_len; gsize n_escapes; gsize i, j; + gboolean escape_leading_space; gboolean escape_trailing_space; + gboolean escape_backslash_as_needed; - if (!delimiters) { - nm_assert (delimiters); - delimiters = NM_ASCII_SPACES; - } + nm_assert ( !delimiters_as_needed + || ( delimiters_as_needed[0] + && NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED))); if (!str || str[0] == '\0') { *out_to_free = NULL; return str; } - _char_lookup_table_init (ch_lookup, delimiters); + str_len = strlen (str); - /* also mark '\\' as requiring escaping. */ - _char_lookup_table_set_one (ch_lookup, '\\'); + _char_lookup_table_init (ch_lookup, delimiters); + if ( !delimiters + || NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_SPACES)) { + flags &= ~( NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE + | NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE); + _char_lookup_table_set_all (ch_lookup, NM_ASCII_SPACES); + } + + if (NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_ALWAYS)) { + _char_lookup_table_set_one (ch_lookup, '\\'); + escape_backslash_as_needed = FALSE; + } else if (_char_lookup_has (ch_lookup, '\\')) + escape_backslash_as_needed = FALSE; + else { + escape_backslash_as_needed = NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED); + if (escape_backslash_as_needed) { + if ( NM_FLAGS_ANY (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE + | NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE) + && !_char_lookup_has_all (ch_lookup, NM_ASCII_SPACES)) { + /* ESCAPE_LEADING_SPACE and ESCAPE_TRAILING_SPACE implies that we escape backslash + * before whitespaces. */ + if (!has_ch_lookup_as_needed) { + has_ch_lookup_as_needed = TRUE; + _char_lookup_table_init (ch_lookup_as_needed, NULL); + } + _char_lookup_table_set_all (ch_lookup_as_needed, NM_ASCII_SPACES); + } + if ( delimiters_as_needed + && !_char_lookup_has_all (ch_lookup, delimiters_as_needed)) { + if (!has_ch_lookup_as_needed) { + has_ch_lookup_as_needed = TRUE; + _char_lookup_table_init (ch_lookup_as_needed, NULL); + } + _char_lookup_table_set_all (ch_lookup_as_needed, delimiters_as_needed); + } + } + } + + escape_leading_space = NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE) + && g_ascii_isspace (str[0]) + && !_char_lookup_has (ch_lookup, str[0]); + if (str_len == 1) + escape_trailing_space = FALSE; + else { + escape_trailing_space = NM_FLAGS_HAS (flags, NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE) + && g_ascii_isspace (str[str_len - 1]) + && !_char_lookup_has (ch_lookup, str[str_len - 1]); + } n_escapes = 0; for (i = 0; str[i] != '\0'; i++) { if (_char_lookup_has (ch_lookup, str[i])) n_escapes++; + else if ( str[i] == '\\' + && escape_backslash_as_needed + && ( _char_lookup_has (ch_lookup, str[i + 1]) + || NM_IN_SET (str[i + 1], '\0', '\\') + || ( has_ch_lookup_as_needed + && _char_lookup_has (ch_lookup_as_needed, str[i + 1])))) + n_escapes++; } + if (escape_leading_space) + n_escapes++; + if (escape_trailing_space) + n_escapes++; - str_len = i; - nm_assert (str_len > 0 && strlen (str) == str_len); - - escape_trailing_space = !_char_lookup_has (ch_lookup, str[str_len - 1]) - && g_ascii_isspace (str[str_len - 1]); - - if ( n_escapes == 0 - && !escape_trailing_space) { + if (n_escapes == 0u) { *out_to_free = NULL; return str; } - alloc_len = str_len + n_escapes + ((gsize) escape_trailing_space) + 1; + alloc_len = str_len + n_escapes + 1u; ret = g_new (char, alloc_len); j = 0; - for (i = 0; str[i] != '\0'; i++) { - if (_char_lookup_has (ch_lookup, str[i])) { - nm_assert (j < alloc_len); + i = 0; + + if (escape_leading_space) { + ret[j++] = '\\'; + ret[j++] = str[i++]; + } + for (; str[i] != '\0'; i++) { + if (_char_lookup_has (ch_lookup, str[i])) + ret[j++] = '\\'; + else if ( str[i] == '\\' + && escape_backslash_as_needed + && ( _char_lookup_has (ch_lookup, str[i + 1]) + || NM_IN_SET (str[i + 1], '\0', '\\') + || ( has_ch_lookup_as_needed + && _char_lookup_has (ch_lookup_as_needed, str[i + 1])))) ret[j++] = '\\'; - } - nm_assert (j < alloc_len); ret[j++] = str[i]; } if (escape_trailing_space) { - nm_assert (!_char_lookup_has (ch_lookup, ret[j - 1]) && g_ascii_isspace (ret[j - 1])); + nm_assert ( !_char_lookup_has (ch_lookup, ret[j - 1]) + && g_ascii_isspace (ret[j - 1])); ret[j] = ret[j - 1]; ret[j - 1] = '\\'; j++; @@ -1821,6 +1900,7 @@ nm_utils_escaped_tokens_escape (const char *str, nm_assert (j == alloc_len - 1); ret[j] = '\0'; + nm_assert (strlen (ret) == j); *out_to_free = ret; return ret; diff --git a/shared/nm-glib-aux/nm-shared-utils.h b/shared/nm-glib-aux/nm-shared-utils.h index 79f6eb4abb..57a6fb8fcb 100644 --- a/shared/nm-glib-aux/nm-shared-utils.h +++ b/shared/nm-glib-aux/nm-shared-utils.h @@ -542,9 +542,65 @@ nm_utils_escaped_tokens_split (const char *str, | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP); } -const char *nm_utils_escaped_tokens_escape (const char *str, - const char *delimiters, - char **out_to_free); +typedef enum { + NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_NONE = 0, + NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_SPACES = (1ull << 0), + NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE = (1ull << 1), + NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE = (1ull << 2), + + /* Backslash characters will be escaped as "\\\\" if they precede another + * character that makes it necessary. Such characters are: + * + * 1) before another '\\' backslash. + * 2) before any delimiter in @delimiters. + * 3) before any delimiter in @delimiters_as_needed. + * 4) before a white space, if ESCAPE_LEADING_SPACE or ESCAPE_TRAILING_SPACE is set. + * 5) before the end of the word + * + * Rule 4) is an extension. It's not immediately clear why with ESCAPE_LEADING_SPACE + * and ESCAPE_TRAILING_SPACE we want *all* backslashes before a white space escaped. + * The reason is, that we obviously want to use ESCAPE_LEADING_SPACE and ESCAPE_TRAILING_SPACE + * in cases, where we later parse the backslash escaped strings back, but allowing to strip + * unescaped white spaces. That means, we want that " a " gets escaped as "\\ a\\ ". + * On the other hand, we also want that " a\\ b " gets escaped as "\\ a\\\\ b\\ ", + * and not "\\ a\\ b\\ ". Because otherwise, the parser would need to treat "\\ " + * differently depending on whether the sequence is at the beginning, end or middle + * of the word. + * + * Rule 5) is also not immediately obvious. When used with ESCAPE_TRAILING_SPACE, + * we clearly want to allow that an escaped word can have arbitrary + * whitespace suffixes. That's why this mode exists. So we must escape "a\\" as + * "a\\\\", so that appending " " does not change the meaning. + * Also without ESCAPE_TRAILING_SPACE, we want in general that we can concatenate + * two escaped words without changing their meaning. If the words would be "a\\" + * and "," (with ',' being a delimiter), then the result must be "a\\\\" and "\\," + * so that the concatenated word ("a\\\\\\,") is still the same. If we would escape + * them instead as "a\\" + "\\,", then the concatenated word would be "a\\\\," and + * different. + * */ + NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED = (1ull << 3), + + NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_ALWAYS = (1ull << 4), +} NMUtilsEscapedTokensEscapeFlags; + +const char *nm_utils_escaped_tokens_escape_full (const char *str, + const char *delimiters, + const char *delimiters_as_needed, + NMUtilsEscapedTokensEscapeFlags flags, + char **out_to_free); + +static inline const char * +nm_utils_escaped_tokens_escape (const char *str, + const char *delimiters, + char **out_to_free) +{ + return nm_utils_escaped_tokens_escape_full (str, + delimiters, + NULL, + NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_ALWAYS + | NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE, + out_to_free); +} static inline GString * nm_utils_escaped_tokens_escape_gstr_assert (const char *str,