From 720bc30bd9d9cf5aaf0995b55413a8ac6515340f Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 12 Apr 2019 13:21:47 +0200 Subject: [PATCH 1/9] cli: avoid duplicate delimiters when printing objlist property Usually, obj_to_str_fcn() should not fail and always add something. If not, remove the delimiter again. --- clients/common/nm-meta-setting-desc.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/clients/common/nm-meta-setting-desc.c b/clients/common/nm-meta-setting-desc.c index 1de51dc191..83959ec653 100644 --- a/clients/common/nm-meta-setting-desc.c +++ b/clients/common/nm-meta-setting-desc.c @@ -3053,9 +3053,7 @@ _get_fcn_objlist (ARGS_GET_FCN) num = property_info->property_typ_data->subtype.objlist.get_num_fcn (setting); for (idx = 0; idx < num; idx++) { -#if NM_MORE_ASSERTS gsize start_offset; -#endif if (!str) str = g_string_new (NULL); @@ -3067,15 +3065,21 @@ _get_fcn_objlist (ARGS_GET_FCN) g_string_append (str, ", "); } -#if NM_MORE_ASSERTS start_offset = str->len; -#endif property_info->property_typ_data->subtype.objlist.obj_to_str_fcn (get_type, setting, idx, str); + if (start_offset == str->len) { + /* nothing was appended. Remove the delimiter again. */ + nm_assert_not_reached (); + if (str->len > 0) + g_string_truncate (str, str->len - 2); + continue; + } + #if NM_MORE_ASSERTS nm_assert (start_offset < str->len); if ( property_info->property_typ_data->subtype.objlist.strsplit_with_escape From 5b2b0dcadfd40883865f5ce4e3f8632008b2d973 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 11 Apr 2019 13:32:43 +0200 Subject: [PATCH 2/9] shared: add NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP to nm_utils_strsplit_set_full() This will essentially call g_strstrip() on each token. There are some specialties: - if the resulting word is empty after stripping, then according to %NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, the empty token will be removed. If that results in an empty string array, %NULL will be returned. - if %NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING is set, then whitespace that is backslash escaped is not removed. Since this is a post-operation that happens after tokeninzing, it could be done as a separate function. And we already have this function: _nm_utils_unescape_plain() and _nm_utils_unescape_spaces(). However, that is ugly for several reasons: - the stripping should be part of the tokenizing, you shouldn't need several steps. - nm_utils_strsplit_set_full() returns a "const char **" which indicates the strings must not be freed. However, it is perfectly valid to modify the string inplace. Hence, the post-op function would need to cast the strings to "char *", which seems ugly (although we do that on many places, and it's guaranteed to work). - _nm_utils_unescape_plain()/_nm_utils_unescape_spaces() is indeed already used together with nm_utils_strsplit_set_full(). However, it requires to initialize the cb_lookup buffer twice. I would expect that initializing the cb_lookup buffer is a large portion of what the function does already (for short strings). This issue will be solved in the next commit by adding yet another flag which allows to unescape. --- shared/nm-utils/nm-shared-utils.c | 72 +++++++++++++++++++++++-------- shared/nm-utils/nm-shared-utils.h | 11 +++++ 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 879d1e7b9e..c21f0f66ec 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -1028,7 +1028,8 @@ nm_utils_strsplit_set_full (const char *str, char *s; guint8 ch_lookup[256]; const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); - const gboolean f_preseve_empty = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); + const gboolean f_preserve_empty = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); + const gboolean f_strstrip = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP); if (!str) return NULL; @@ -1042,7 +1043,7 @@ nm_utils_strsplit_set_full (const char *str, nm_assert ( !f_allow_escaping || !_char_lookup_has (ch_lookup, '\\')); - if (!f_preseve_empty) { + if (!f_preserve_empty) { while (_char_lookup_has (ch_lookup, str[0])) str++; } @@ -1055,6 +1056,17 @@ nm_utils_strsplit_set_full (const char *str, return NULL; } +#define _char_is_escaped(str_start, str_cur) \ + ({ \ + const char *const _str_start = (str_start); \ + const char *const _str_cur = (str_cur); \ + const char *_str_i = (_str_cur); \ + \ + while ( _str_i > _str_start \ + && _str_i[-1] == '\\') \ + _str_i--; \ + (((_str_cur - _str_i) % 2) != 0); \ + }) num_tokens = 1; c_str = str; @@ -1069,23 +1081,17 @@ nm_utils_strsplit_set_full (const char *str, /* we assume escapings are not frequent. After we found * this delimiter, check whether it was escaped by counting * the backslashed before. */ - if (f_allow_escaping) { - const char *c2 = c_str; - - while ( c2 > str - && c2[-1] == '\\') - c2--; - if (((c_str - c2) % 2) != 0) { - /* the delimiter is escaped. This was not an accepted delimiter. */ - c_str++; - continue; - } + if ( f_allow_escaping + && _char_is_escaped (str, c_str)) { + /* the delimiter is escaped. This was not an accepted delimiter. */ + c_str++; + continue; } c_str++; /* if we drop empty tokens, then we now skip over all consecutive delimiters. */ - if (!f_preseve_empty) { + if (!f_preserve_empty) { while (_char_lookup_has (ch_lookup, c_str[0])) c_str++; if (c_str[0] == '\0') @@ -1115,10 +1121,10 @@ done1: ptr[i_token++] = s; if (s[0] == '\0') { - nm_assert (f_preseve_empty); + nm_assert (f_preserve_empty); goto done2; } - nm_assert ( f_preseve_empty + nm_assert ( f_preserve_empty || !_char_lookup_has (ch_lookup, s[0])); while (!_char_lookup_has (ch_lookup, s[0])) { @@ -1138,7 +1144,7 @@ done1: s[0] = '\0'; s++; - if (!f_preseve_empty) { + if (!f_preserve_empty) { while (_char_lookup_has (ch_lookup, s[0])) s++; if (s[0] == '\0') @@ -1150,6 +1156,38 @@ done2: nm_assert (i_token == num_tokens); ptr[i_token] = NULL; + if (f_strstrip) { + gsize i; + + i_token = 0; + for (i = 0; ptr[i]; i++) { + + s = (char *) nm_str_skip_leading_spaces (ptr[i]); + if (s[0] != '\0') { + char *s_last; + + s_last = &s[strlen (s) - 1]; + while ( s_last > s + && g_ascii_isspace (s_last[0]) + && ( ! f_allow_escaping + || !_char_is_escaped (s, s_last))) + (s_last--)[0] = '\0'; + } + + if ( !f_preserve_empty + && s[0] == '\0') + continue; + + ptr[i_token++] = s; + } + + if (i_token == 0) { + g_free (ptr); + return NULL; + } + ptr[i_token] = NULL; + } + return ptr; } diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index 8ec6fa2f5a..0beb75ff10 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -336,6 +336,17 @@ typedef enum { NM_UTILS_STRSPLIT_SET_FLAGS_NONE = 0, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY = (1u << 0), NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING = (1u << 1), + + /* If flag is set, does the same as g_strstrip() on the returned tokens. + * This will remove leading and trailing ascii whitespaces (g_ascii_isspace() + * and NM_ASCII_SPACES). + * + * - when combined with !%NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, + * empty tokens will be removed (and %NULL will be returned if that + * results in an empty string array). + * - when combined with %NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING, + * trailing whitespace escaped by backslash are not stripped. */ + NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP = (1u << 2), } NMUtilsStrsplitSetFlags; const char **nm_utils_strsplit_set_full (const char *str, From 9522aaf22678e57787e3def3baf956bb5457fe20 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 11 Apr 2019 19:24:15 +0200 Subject: [PATCH 3/9] shared: add NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED to nm_utils_strsplit_set_full() Add a new flag that will remove escape characters after splitting the string. This implements a special kind of backslash escaping. It's not C escape sequences (like '\n' or '\020'), but simply to take the special character following the backslash verbatim. Note that the backslash is only considered special, if it's followed by a delimiter, another backslash, or a whitespace (in combination with %NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP). The main purpose of this form of escaping is nmcli's list options, like $ nmcli connection modify "$PROFILE" +ipv4.routing-rules 'priority 5 from 192.168.7.5/32 table 5, priority 6 iif a\, from 192.168.7.5/32 table 6' It's a contrieved example, but the list options are a list of IP addresses, rules, etc. They implement their own syntax for one element, and are concatenated by ','. To support that one element may have arbitrary characters (including the delimiter and whitespaces), nmcli employs a tokenization with this special kind of escaping. --- shared/nm-utils/nm-shared-utils.c | 27 ++++++++++++++++++++++++++- shared/nm-utils/nm-shared-utils.h | 27 +++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index c21f0f66ec..1a86778e81 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -1027,7 +1027,8 @@ nm_utils_strsplit_set_full (const char *str, const char *c_str; char *s; guint8 ch_lookup[256]; - const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); + const gboolean f_escaped = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED); + const gboolean f_allow_escaping = f_escaped || NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); const gboolean f_preserve_empty = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); const gboolean f_strstrip = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP); @@ -1188,6 +1189,30 @@ done2: ptr[i_token] = NULL; } + if (f_escaped) { + gsize i, j; + + /* We no longer need ch_lookup for its original purpose. Modify it, so it + * can detect the delimiters, '\\', and (optionally) whitespaces. */ + ch_lookup[((guint8) '\\')] = 1; + if (f_strstrip) { + for (i = 0; NM_ASCII_SPACES[i]; i++) + ch_lookup[((guint8) (NM_ASCII_SPACES[i]))] = 1; + } + + for (i_token = 0; ptr[i_token]; i_token++) { + s = (char *) ptr[i_token]; + j = 0; + for (i = 0; s[i] != '\0'; ) { + if ( s[i] == '\\' + && _char_lookup_has (ch_lookup, s[i + 1])) + i++; + s[j++] = s[i++]; + } + s[j] = '\0'; + } + } + return ptr; } diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index 0beb75ff10..25e77b9b8b 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -347,6 +347,33 @@ typedef enum { * - when combined with %NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING, * trailing whitespace escaped by backslash are not stripped. */ NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP = (1u << 2), + + /* This implies %NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING. + * + * This will do a final run over all tokens and remove all backslash + * escape characters that + * - precede a delimiter. + * - precede a backslash. + * - preceed a whitespace (with %NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP). + * + * Note that with %NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP, it is only + * necessary to escape the very last whitespace (if the delimiters + * are not whitespace themself). So, technically, it would be sufficient + * to only unescape a backslash before the last whitespace and the user + * still could express everything. However, such a rule would be complicated + * to understand, so when using backslash escaping with nm_utils_strsplit_set_full(), + * then all characters (including backslash) are treated verbatim, except: + * + * - "\\$DELIMITER" (escaped delimiter) + * - "\\\\" (escaped backslash) + * - "\\$SPACE" (escaped space) (with %NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP). + * + * Note that all other escapes like "\\n" or "\\001" are left alone. + * That makes the escaping/unescaping rules simple. Also, for the most part + * a text is just taken as-is, with little additional rules. Only backslashes + * need extra care, and then only if they proceed one of the relevant characters. + */ + NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED = (1u << 3), } NMUtilsStrsplitSetFlags; const char **nm_utils_strsplit_set_full (const char *str, From e206e3d4d8ff1ce914b53eea650b9cceef4f3754 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 11 Apr 2019 21:20:11 +0200 Subject: [PATCH 4/9] shared: add nm_utils_escaped_tokens_escape() This escapes strings so that they can be concatenated with a delimiter and without loss tokenized with nm_utils_escaped_tokens_split(). Note that this is similar to _nm_utils_escape_plain() and _nm_utils_escape_spaces(). The difference is that nm_utils_escaped_tokens_escape() also escapes the last trailing whitespace. That means, if delimiters contains all NM_ASCII_SPACES, then it is identical to _nm_utils_escape_spaces(). Otherwise, the trailing space is treated specially. That is, because nm_utils_escaped_tokens_split() uses NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP, to strip leading and trailing whitespace. To still express a trailing whitespace, the last whitespace must be escaped. Note that NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP also honors escaping any whitespace (not only at the last position), but when escaping we don't need to escape them, because unescaped (non-trailing) whitespace are taken just fine. The pair nm_utils_escaped_tokens_split() and nm_utils_escaped_tokens_escape() are proposed as default way of tokenizing a list of items. For example, with $ nmcli connection modify "$PROFILE" +ipv4.routing-rules 'priority 5 from 192.168.7.5/32 table 5, priority 6 iif a\, from 192.168.7.5/32 table 6' Here we implement a to/from string function to handle one item (nm_ip_routing_rule_{from,to}_string()). When such elements are combined with ',', then we need to support an additional layer of escaping on top of that. The advantage is that the indvidual to/from string functions are agnostic to this second layer of escaping/tokenizing that nmcli employs to handle a list of these items. The disadvantage is that we possibly get multiple layers of backslash escapings. That is only mitigated by the fact that nm_utils_escaped_tokens_*() supports a syntax for which *most* characters don't need any special escaping. Only delimiters, backslash, and the trailing space needs escaping, and these are cases are expected to be few. --- shared/nm-utils/nm-shared-utils.c | 76 +++++++++++++++++++++++++++++++ shared/nm-utils/nm-shared-utils.h | 31 +++++++++++++ 2 files changed, 107 insertions(+) diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 1a86778e81..51d33b4911 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -1216,6 +1216,82 @@ done2: return ptr; } +/*****************************************************************************/ + +const char * +nm_utils_escaped_tokens_escape (const char *str, + const char *delimiters, + char **out_to_free) +{ + guint8 ch_lookup[256]; + char *ret; + gsize str_len; + gsize alloc_len; + gsize n_escapes; + gsize i, j; + gboolean escape_trailing_space; + + if (!delimiters) { + nm_assert (delimiters); + delimiters = NM_ASCII_SPACES; + } + + if (!str || str[0] == '\0') { + *out_to_free = NULL; + return str; + } + + _char_lookup_table_init (ch_lookup, delimiters); + + /* also mark '\\' as requiring escaping. */ + ch_lookup[((guint8) '\\')] = 1; + + n_escapes = 0; + for (i = 0; str[i] != '\0'; i++) { + if (_char_lookup_has (ch_lookup, str[i])) + n_escapes++; + } + + str_len = i; + nm_assert (str_len > 0 && strlen (str) == str_len); + + escape_trailing_space = !_char_lookup_has (ch_lookup, str[str_len - 1]) + && g_ascii_isspace (str[str_len - 1]); + + if ( n_escapes == 0 + && !escape_trailing_space) { + *out_to_free = NULL; + return str; + } + + alloc_len = str_len + n_escapes + ((gsize) escape_trailing_space) + 1; + ret = g_new (char, alloc_len); + + j = 0; + for (i = 0; str[i] != '\0'; i++) { + if (_char_lookup_has (ch_lookup, str[i])) { + nm_assert (j < alloc_len); + ret[j++] = '\\'; + } + nm_assert (j < alloc_len); + ret[j++] = str[i]; + } + if (escape_trailing_space) { + nm_assert (!_char_lookup_has (ch_lookup, ret[j - 1]) && g_ascii_isspace (ret[j - 1])); + ret[j] = ret[j - 1]; + ret[j - 1] = '\\'; + j++; + } + + nm_assert (j == alloc_len - 1); + ret[j] = '\0'; + + *out_to_free = ret; + return ret; +} + +/*****************************************************************************/ + /** * nm_utils_strv_find_first: * @list: the strv list to search diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index 25e77b9b8b..aeb1e2a1c6 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -409,6 +409,37 @@ char **_nm_utils_strv_cleanup (char **strv, /*****************************************************************************/ +static inline const char ** +nm_utils_escaped_tokens_split (const char *str, + const char *delimiters) +{ + return nm_utils_strsplit_set_full (str, + delimiters, + NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED + | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP); +} + +const char *nm_utils_escaped_tokens_escape (const char *str, + const char *delimiters, + char **out_to_free); + +static inline GString * +nm_utils_escaped_tokens_escape_gstr (const char *str, + const char *delimiters, + GString *gstring) +{ + gs_free char *str_to_free = NULL; + + nm_assert (str); + nm_assert (gstring); + + g_string_append (gstring, + nm_utils_escaped_tokens_escape (str, delimiters, &str_to_free)); + return gstring; +} + +/*****************************************************************************/ + #define NM_UTILS_CHECKSUM_LENGTH_MD5 16 #define NM_UTILS_CHECKSUM_LENGTH_SHA1 20 #define NM_UTILS_CHECKSUM_LENGTH_SHA256 32 From ba956bd499c1798e57fdfafa9d87710cf3704e37 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 12 Apr 2019 12:00:48 +0200 Subject: [PATCH 5/9] cli: add new style for tokenizing/concatenating list options in nmcli nmcli supports list options (optlist and multilist properties). These commonly are individual items, concatenated by a delimiter. It should be generally possibly to express every value. That means, we need some for of escaping mechanism for delimiters. Currently this is all inconsistent or no escaping is supported. I intend to fix that (which will be a change in behavior). For now, just add yet another style of tokenzing/concatenating list items in nmcli. This is the style to replace all other styles. --- clients/common/nm-meta-setting-desc.c | 28 ++++++++++++++++++++------- clients/common/nm-meta-setting-desc.h | 2 ++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/clients/common/nm-meta-setting-desc.c b/clients/common/nm-meta-setting-desc.c index 83959ec653..aa2bcd4f6f 100644 --- a/clients/common/nm-meta-setting-desc.c +++ b/clients/common/nm-meta-setting-desc.c @@ -167,12 +167,16 @@ _value_str_as_index_list (const char *value, gsize *out_len) #define MULTILIST_WITH_ESCAPE_CHARS NM_ASCII_SPACES"," +#define ESCAPED_TOKENS_DELIMTER ',' +#define ESCAPED_TOKENS_DELIMTERS "," + typedef enum { VALUE_STRSPLIT_MODE_STRIPPED, VALUE_STRSPLIT_MODE_OBJLIST, VALUE_STRSPLIT_MODE_OBJLIST_WITH_ESCAPE, VALUE_STRSPLIT_MODE_MULTILIST, VALUE_STRSPLIT_MODE_MULTILIST_WITH_ESCAPE, + VALUE_STRSPLIT_MODE_ESCAPED_TOKENS, } ValueStrsplitMode; static const char * @@ -211,6 +215,10 @@ _value_strsplit (const char *value, case VALUE_STRSPLIT_MODE_MULTILIST_WITH_ESCAPE: strv = nm_utils_strsplit_set_full (value, MULTILIST_WITH_ESCAPE_CHARS, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); break; + case VALUE_STRSPLIT_MODE_ESCAPED_TOKENS: + strv = nm_utils_escaped_tokens_split (value, ESCAPED_TOKENS_DELIMTERS); + NM_SET_OUT (out_len, NM_PTRARRAY_LEN (strv)); + return g_steal_pointer (&strv); default: nm_assert_not_reached (); break; @@ -1882,9 +1890,11 @@ _set_fcn_multilist (ARGS_SET_FCN) } strv = _value_strsplit (value, - property_info->property_typ_data->subtype.multilist.strsplit_with_escape - ? VALUE_STRSPLIT_MODE_MULTILIST_WITH_ESCAPE - : VALUE_STRSPLIT_MODE_MULTILIST, + property_info->property_typ_data->subtype.multilist.strsplit_escaped_tokens + ? VALUE_STRSPLIT_MODE_ESCAPED_TOKENS + : ( property_info->property_typ_data->subtype.multilist.strsplit_with_escape + ? VALUE_STRSPLIT_MODE_OBJLIST_WITH_ESCAPE + : VALUE_STRSPLIT_MODE_OBJLIST), &nstrv); j = 0; @@ -3061,8 +3071,10 @@ _get_fcn_objlist (ARGS_GET_FCN) if ( get_type == NM_META_ACCESSOR_GET_TYPE_PRETTY && property_info->property_typ_data->subtype.objlist.delimit_pretty_with_semicolon) g_string_append (str, "; "); - else + else { + G_STATIC_ASSERT_EXPR (ESCAPED_TOKENS_DELIMTER == ','); g_string_append (str, ", "); + } } start_offset = str->len; @@ -3271,9 +3283,11 @@ _set_fcn_objlist (ARGS_SET_FCN) } strv = _value_strsplit (value, - property_info->property_typ_data->subtype.objlist.strsplit_with_escape - ? VALUE_STRSPLIT_MODE_OBJLIST_WITH_ESCAPE - : VALUE_STRSPLIT_MODE_OBJLIST, + property_info->property_typ_data->subtype.objlist.strsplit_escaped_tokens + ? VALUE_STRSPLIT_MODE_ESCAPED_TOKENS + : ( property_info->property_typ_data->subtype.objlist.strsplit_with_escape + ? VALUE_STRSPLIT_MODE_OBJLIST_WITH_ESCAPE + : VALUE_STRSPLIT_MODE_OBJLIST), &nstrv); if (_SET_FCN_DO_SET_ALL (modifier, value)) { diff --git a/clients/common/nm-meta-setting-desc.h b/clients/common/nm-meta-setting-desc.h index 15c0e81eb0..44a6827b7e 100644 --- a/clients/common/nm-meta-setting-desc.h +++ b/clients/common/nm-meta-setting-desc.h @@ -281,6 +281,7 @@ struct _NMMetaPropertyTypData { void (*remove_by_idx_fcn_s) (NMSetting *setting, int idx); gboolean (*remove_by_value_fcn) (NMSetting *setting, const char *item); bool strsplit_with_escape:1; + bool strsplit_escaped_tokens:1; } multilist; struct { guint (*get_num_fcn) (NMSetting *setting); @@ -297,6 +298,7 @@ struct _NMMetaPropertyTypData { void (*remove_by_idx_fcn_s) (NMSetting *setting, int idx); bool delimit_pretty_with_semicolon:1; bool strsplit_with_escape:1; + bool strsplit_escaped_tokens:1; } objlist; struct { gboolean (*set_fcn) (NMSetting *setting, From b6d0be2d3b4caa30685abf46ae6a355cada8bf4e Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 12 Apr 2019 12:38:11 +0200 Subject: [PATCH 6/9] libnm: use nm_utils_escaped_tokens_*() for parsing NMIPRoutingRule Replace nm_utils_str_simpletokens_extract_next() by nm_utils_escaped_tokens_split(). nm_utils_escaped_tokens_split() should become our first choice for parsing and tokenizing. Note that both nm_utils_str_simpletokens_extract_next() and nm_utils_escaped_tokens_split() need to strdup the string once, and tokenizing takes O(n). So, they are roughtly the same performance wise. The only difference is, that as we iterate through the tokens, we might abort early on error with nm_utils_str_simpletokens_extract_next() and not parse the entire string. But that is a small benefit, since we anyway always strdup() the string (being O(n) already). Note that to-string will no longer escape ',' and ';'. This is a change in behavior, of unreleased API. Also note, that escaping these is no longer necessary, because nmcli soon will also use nm_utils_escaped_tokens_*(). Another change in behavior is that nm_utils_str_simpletokens_extract_next() treated invalid escape sequences (backslashes followed by an arbitrary character), buy stripping the backslash. nm_utils_escaped_tokens_*() leaves such backslashes as is, and only honors them if they are followed by a whitespace (the delimiter) or another backslash. The disadvantage of the new approach is that backslashes are treated differently depending on the following character. The benefit is, that most backslashes can now be written verbatim, not requiring them to escape them with a double-backslash. Yes, there is a problem with these nested escape schemes: - the caller may already need to escape backslash in shell. - then nmcli will use backslash escaping to split the rules at ','. - then nm_ip_routing_rule_from_string() will honor backslash escaping for spaces. - then iifname and oifname use backslash escaping for nm_utils_buf_utf8safe_escape() to express non-UTF-8 characters (because interface names are not necessarily UTF-8). This is only redeamed because escaping is really only necessary for very unusual cases, if you want to embed a backslash, a space, a comma, or a non-UTF-8 character. But if you have to, now you will be able to express that. The other upside of these layers of escaping is that they become all indendent from each other: - shell can accept quoted/escaped arguments and will unescape them. - nmcli can do the tokenizing for ',' (and escape the content unconditionally when converting to string). - nm_ip_routing_rule_from_string() can do its tokenizing without special consideration of utf8safe escaping. - NMIPRoutingRule takes iifname/oifname as-is and is not concerned about nm_utils_buf_utf8safe_escape(). However, before configuring the rule in kernel, this utf8safe escape will be unescaped to get the interface name (which is non-UTF8 binary). --- libnm-core/nm-setting-ip-config.c | 38 +++++++++---------------------- libnm-core/tests/test-setting.c | 6 ++--- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/libnm-core/nm-setting-ip-config.c b/libnm-core/nm-setting-ip-config.c index 48815c3484..d4a38161dd 100644 --- a/libnm-core/nm-setting-ip-config.c +++ b/libnm-core/nm-setting-ip-config.c @@ -2954,9 +2954,8 @@ nm_ip_routing_rule_from_string (const char *str, GError **error) { nm_auto_unref_ip_routing_rule NMIPRoutingRule *self = NULL; - gs_free char *str_clone = NULL; - char *str_remainder; - char *str_word; + gs_free const char **tokens = NULL; + gsize i_token; gboolean any_words = FALSE; char *word0 = NULL; char *word1 = NULL; @@ -3022,10 +3021,9 @@ nm_ip_routing_rule_from_string (const char *str, addr_family = _rr_string_addr_family_from_flags (to_string_flags); - str_clone = g_strdup (str); - str_remainder = str_clone; - - while ((str_word = nm_utils_str_simpletokens_extract_next (&str_remainder))) { + tokens = nm_utils_escaped_tokens_split (str, NM_ASCII_SPACES); + for (i_token = 0; tokens && tokens[i_token]; i_token++) { + char *str_word = (char *) tokens[i_token]; any_words = TRUE; if (!word0) @@ -3325,24 +3323,6 @@ _rr_string_append_inet_addr (GString *str, } } -static void -_rr_string_append_escaped (GString *str, - const char *s) -{ - for (; s[0]; s++) { - /* We need to escape spaces and '\\', because that - * is what nm_utils_str_simpletokens_extract_next() uses to split - * words. - * We also escape ',' because nmcli uses that to concatenate values. - * We also escape ';', in case somebody wants to use ';' instead of ','. - */ - if ( NM_IN_SET (s[0], '\\', ',', ';') - || g_ascii_isspace (s[0])) - g_string_append_c (str, '\\'); - g_string_append_c (str, s[0]); - } -} - /** * nm_ip_routing_rule_to_string: * @self: the #NMIPRoutingRule instance to convert to string. @@ -3486,13 +3466,17 @@ nm_ip_routing_rule_to_string (const NMIPRoutingRule *self, if (self->iifname) { g_string_append (nm_gstring_add_space_delimiter (str), "iif "); - _rr_string_append_escaped (str, self->iifname); + nm_utils_escaped_tokens_escape_gstr (self->iifname, + NM_ASCII_SPACES, + str); } if (self->oifname) { g_string_append (nm_gstring_add_space_delimiter (str), "oif "); - _rr_string_append_escaped (str, self->oifname); + nm_utils_escaped_tokens_escape_gstr (self->oifname, + NM_ASCII_SPACES, + str); } if (self->table != 0) { diff --git a/libnm-core/tests/test-setting.c b/libnm-core/tests/test-setting.c index 70c0d6a4c6..57af014021 100644 --- a/libnm-core/tests/test-setting.c +++ b/libnm-core/tests/test-setting.c @@ -2886,7 +2886,7 @@ test_routing_rule (gconstpointer test_data) char ifname_buf[16]; _rr_from_str ("priority 5 from 0.0.0.0 table 1", - " from 0.0.0\\.0 \\priority 5 lookup 1 "); + " from 0.0.0.0 priority 5 lookup 1 "); _rr_from_str ("priority 5 from 0.0.0.0/0 table 4"); _rr_from_str ("priority 5 to 0.0.0.0 table 6"); _rr_from_str ("priority 5 to 0.0.0.0 table 254", @@ -2905,7 +2905,7 @@ test_routing_rule (gconstpointer test_data) "priority 5 from :: to ::/0 table 0x19 fwmark 0x00/4294967295"); _rr_from_str ("priority 5 from :: iif aab table 25"); _rr_from_str ("priority 5 from :: iif aab oif er table 25", - "priority 5 from :: table 0x19 dev \\a\\a\\b oif er"); + "priority 5 from :: table 0x19 dev aab oif er"); _rr_from_str ("priority 5 from :: iif a\\\\303b table 25"); _rr_from_str ("priority 5 to 0.0.0.0 sport 10 table 6", "priority 5 to 0.0.0.0 sport 10-10 table 6"); @@ -2952,7 +2952,7 @@ test_routing_rule (gconstpointer test_data) g_assert_cmpint (0x10, ==, nm_ip_routing_rule_get_tos (rr1)); nm_clear_pointer (&rr1, nm_ip_routing_rule_unref); - rr1 = _rr_from_str_get ("priority 5 from :: iif a\\\\303\\\\261\\,x\\;b table 254", + rr1 = _rr_from_str_get ("priority 5 from :: iif a\\\\303\\\\261,x;b table 254", "priority 5 from :: iif a\\\\303\\\\261,x;b table 254"); g_assert_cmpstr (nm_ip_routing_rule_get_iifname (rr1), ==, "a\\303\\261,x;b"); success = nm_ip_routing_rule_get_xifname_bin (rr1, FALSE, ifname_buf); From d59f046bb62dc9b655a1c3f585ea8b06128fe07a Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 12 Apr 2019 12:10:33 +0200 Subject: [PATCH 7/9] cli: use nm_utils_escaped_tokens_*() for handling policy routes Optimally, all list types properties in nmcli support proper escaping. For that, we should use nm_utils_escaped_tokens_*() API. For now, just change that for policy routes. They were just added recently, so no change in behavior of released API. --- clients/common/nm-meta-setting-desc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clients/common/nm-meta-setting-desc.c b/clients/common/nm-meta-setting-desc.c index aa2bcd4f6f..2a4e5b8269 100644 --- a/clients/common/nm-meta-setting-desc.c +++ b/clients/common/nm-meta-setting-desc.c @@ -3400,7 +3400,7 @@ _objlist_obj_to_str_fcn_ip_config_routing_rules (NMMetaAccessorGetType get_type, NULL, NULL); if (s) - g_string_append (str, s); + nm_utils_escaped_tokens_escape_gstr (s, ESCAPED_TOKENS_DELIMTERS, str); } static gboolean @@ -5670,7 +5670,7 @@ static const NMMetaPropertyInfo *const property_infos_IP4_CONFIG[] = { .obj_to_str_fcn = _objlist_obj_to_str_fcn_ip_config_routing_rules, .set_fcn = _objlist_set_fcn_ip_config_routing_rules, .remove_by_idx_fcn_u = OBJLIST_REMOVE_BY_IDX_FCN_U (NMSettingIPConfig, nm_setting_ip_config_remove_routing_rule), - .strsplit_with_escape = TRUE, + .strsplit_escaped_tokens = TRUE, ), ), ), @@ -5878,7 +5878,7 @@ static const NMMetaPropertyInfo *const property_infos_IP6_CONFIG[] = { .obj_to_str_fcn = _objlist_obj_to_str_fcn_ip_config_routing_rules, .set_fcn = _objlist_set_fcn_ip_config_routing_rules, .remove_by_idx_fcn_u = OBJLIST_REMOVE_BY_IDX_FCN_U (NMSettingIPConfig, nm_setting_ip_config_remove_routing_rule), - .strsplit_with_escape = TRUE, + .strsplit_escaped_tokens = TRUE, ), ), ), From ced7dbe8bfb19fbccd2bbad8ea8bf44ef66d442e Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 12 Apr 2019 12:51:17 +0200 Subject: [PATCH 8/9] shared: remove unused nm_utils_str_simpletokens_extract_next() This can be replaced by nm_utils_escaped_tokens_split(). Note that nm_utils_escaped_tokens_split() does not behave exactly the same. For example, nm_utils_str_simpletokens_extract_next() would remove all backslashes and leave only the following character. nm_utils_escaped_tokens_split() instead only strips backslashes that preceed a delimiter, whitespace or another backslash. But we should have one preferred way of tokenizing, and I find this preferable, because it allows for most backslashes to appear verbatim. --- shared/nm-utils/nm-shared-utils.c | 53 ------------------------------- shared/nm-utils/nm-shared-utils.h | 2 -- 2 files changed, 55 deletions(-) diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 51d33b4911..06cd481ebb 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -2691,59 +2691,6 @@ _nm_utils_unescape_plain (char *str, const char *candidates, gboolean do_strip) return str; } -char * -nm_utils_str_simpletokens_extract_next (char **p_line_start) -{ - char *s_next; - char *s_start; - gsize j; - - s_start = *p_line_start; - if (!s_start) - return NULL; - - s_start = nm_str_skip_leading_spaces (s_start); - - if (s_start[0] == '\0') { - *p_line_start = s_start; - return NULL; - } - - s_next = s_start; - j = 0; - while (TRUE) { - if (s_next[0] == '\0') { - s_start[j] = '\0'; - *p_line_start = s_next; - return s_start; - } - if (s_next[0] == '\\') { - s_next++; - if (s_next[0] == '\0') { - /* trailing backslash at end of word. That's an error, - * but we silently drop the backslash and signal success. */ - *p_line_start = s_next; - if (j == 0) - return NULL; - s_start[j] = '\0'; - return s_start; - } - - s_start[j++] = (s_next++)[0]; - continue; - } - if (!g_ascii_isspace (s_next[0])) { - s_start[j++] = (s_next++)[0]; - continue; - } - - nm_assert (j > 0); - s_start[j] = '\0'; - *p_line_start = nm_str_skip_leading_spaces (&s_next[1]); - return s_start; - } -} - /*****************************************************************************/ typedef struct { diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index aeb1e2a1c6..4591cc8e9a 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -398,8 +398,6 @@ nm_utils_strsplit_set (const char *str, return nm_utils_strsplit_set_full (str, delimiters, NM_UTILS_STRSPLIT_SET_FLAGS_NONE); } -char *nm_utils_str_simpletokens_extract_next (char **p_line_start); - gssize nm_utils_strv_find_first (char **list, gssize len, const char *needle); char **_nm_utils_strv_cleanup (char **strv, From a7d1e14e6deb33dc51902bf76c51755e9915bb1a Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Tue, 16 Apr 2019 12:34:11 +0200 Subject: [PATCH 9/9] shared/tests: add unit tests for new flags of nm_utils_strsplit_set_full() --- libnm-core/tests/test-general.c | 64 ++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/libnm-core/tests/test-general.c b/libnm-core/tests/test-general.c index f6bbd626c3..dfaba1cac5 100644 --- a/libnm-core/tests/test-general.c +++ b/libnm-core/tests/test-general.c @@ -264,8 +264,8 @@ _do_test_nm_utils_strsplit_set_f_one (NMUtilsStrsplitSetFlags flags, gsize words_len, const char *const*exp_words) { - const char *DELIMITERS = " \t\n"; -#define DELIMITERS_C ' ', '\t', '\n' +#define DELIMITERS " \n" +#define DELIMITERS_C ' ', '\n' gs_free const char **words = NULL; gsize i, j, k; @@ -509,6 +509,47 @@ _do_test_nm_utils_strsplit_set_f (NMUtilsStrsplitSetFlags flags, str, \ ##__VA_ARGS__) +static void +_do_test_nm_utils_strsplit_set_simple (NMUtilsStrsplitSetFlags flags, + const char *str, + gsize words_len, + const char *const*exp_words) +{ + gs_free const char **tokens = NULL; + gsize n_tokens; + + tokens = nm_utils_strsplit_set_full (str, DELIMITERS, flags); + + if (!tokens) { + g_assert_cmpint (words_len, ==, 0); + return; + } + + g_assert (str && str[0]); + g_assert_cmpint (words_len, >, 0); + n_tokens = NM_PTRARRAY_LEN (tokens); + + if (_nm_utils_strv_cmp_n (exp_words, words_len, tokens, -1) != 0) { + gsize i; + + g_print (">>> split \"%s\" (flags %x) got %zu tokens (%zu expected)\n", str, (guint) flags, n_tokens, words_len); + for (i = 0; i < NM_MAX (n_tokens, words_len); i++) { + const char *s1 = i < n_tokens ? tokens[i] : NULL; + const char *s2 = i < words_len ? exp_words[i] : NULL; + + g_print (">>> [%zu]: %s - %s%s%s vs. %s%s%s\n", + i, + nm_streq0 (s1, s2) ? "same" : "diff", + NM_PRINT_FMT_QUOTE_STRING (s1), + NM_PRINT_FMT_QUOTE_STRING (s2)); + } + g_assert_not_reached (); + } + g_assert_cmpint (words_len, ==, NM_PTRARRAY_LEN (tokens)); +} +#define do_test_nm_utils_strsplit_set_simple(flags, str, ...) \ + _do_test_nm_utils_strsplit_set_simple ((flags), (str), NM_NARG (__VA_ARGS__), NM_MAKE_STRV (__VA_ARGS__)) + static void test_nm_utils_strsplit_set (void) { @@ -534,8 +575,8 @@ test_nm_utils_strsplit_set (void) do_test_nm_utils_strsplit_set (FALSE, NULL); do_test_nm_utils_strsplit_set (FALSE, ""); - do_test_nm_utils_strsplit_set (FALSE, "\t"); - do_test_nm_utils_strsplit_set (FALSE, " \t\n"); + do_test_nm_utils_strsplit_set (FALSE, "\n"); + do_test_nm_utils_strsplit_set (TRUE, " \t\n", "\t"); do_test_nm_utils_strsplit_set (FALSE, "a", "a"); do_test_nm_utils_strsplit_set (FALSE, "a b", "a", "b"); do_test_nm_utils_strsplit_set (FALSE, "a\rb", "a\rb"); @@ -610,6 +651,21 @@ test_nm_utils_strsplit_set (void) words_len, (const char *const*) words_exp->pdata); } + + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED, "\t", "\t"); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP, "\t"); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP | NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, + "\t", ""); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP | NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, + "\t\\\t\t\t\\\t", "\t\t\t\t"); + + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED, "\ta", "\ta"); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP, "\ta", "a"); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED, "\ta\\ b\t\\ ", "\ta b\t "); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP, "\ta\\ b\t\\ \t", "a b\t "); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED, "a\\ b", "a ", "b"); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED, "\ta\\ b", "\ta ", "b"); + do_test_nm_utils_strsplit_set_simple (NM_UTILS_STRSPLIT_SET_FLAGS_ESCAPED | NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP, "\ta\\ b", "a ", "b"); } /*****************************************************************************/