From c75a1d7e161190349f096a182beba4f0594fcbe6 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Thu, 11 Apr 2019 13:32:43 +0200 Subject: [PATCH] shared: add NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP to nm_utils_strsplit_set_full() This will essentially call g_strstrip() on each token. There are some specialties: - if the resulting word is empty after stripping, then according to %NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, the empty token will be removed. If that results in an empty string array, %NULL will be returned. - if %NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING is set, then whitespace that is backslash escaped is not removed. Since this is a post-operation that happens after tokeninzing, it could be done as a separate function. And we already have this function: _nm_utils_unescape_plain() and _nm_utils_unescape_spaces(). However, that is ugly for several reasons: - the stripping should be part of the tokenizing, you shouldn't need several steps. - nm_utils_strsplit_set_full() returns a "const char **" which indicates the strings must not be freed. However, it is perfectly valid to modify the string inplace. Hence, the post-op function would need to cast the strings to "char *", which seems ugly (although we do that on many places, and it's guaranteed to work). - _nm_utils_unescape_plain()/_nm_utils_unescape_spaces() is indeed already used together with nm_utils_strsplit_set_full(). However, it requires to initialize the cb_lookup buffer twice. I would expect that initializing the cb_lookup buffer is a large portion of what the function does already (for short strings). This issue will be solved in the next commit by adding yet another flag which allows to unescape. (cherry picked from commit 5b2b0dcadfd40883865f5ce4e3f8632008b2d973) --- shared/nm-utils/nm-shared-utils.c | 72 +++++++++++++++++++++++-------- shared/nm-utils/nm-shared-utils.h | 11 +++++ 2 files changed, 66 insertions(+), 17 deletions(-) diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 879d1e7b9e..c21f0f66ec 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -1028,7 +1028,8 @@ nm_utils_strsplit_set_full (const char *str, char *s; guint8 ch_lookup[256]; const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); - const gboolean f_preseve_empty = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); + const gboolean f_preserve_empty = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY); + const gboolean f_strstrip = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP); if (!str) return NULL; @@ -1042,7 +1043,7 @@ nm_utils_strsplit_set_full (const char *str, nm_assert ( !f_allow_escaping || !_char_lookup_has (ch_lookup, '\\')); - if (!f_preseve_empty) { + if (!f_preserve_empty) { while (_char_lookup_has (ch_lookup, str[0])) str++; } @@ -1055,6 +1056,17 @@ nm_utils_strsplit_set_full (const char *str, return NULL; } +#define _char_is_escaped(str_start, str_cur) \ + ({ \ + const char *const _str_start = (str_start); \ + const char *const _str_cur = (str_cur); \ + const char *_str_i = (_str_cur); \ + \ + while ( _str_i > _str_start \ + && _str_i[-1] == '\\') \ + _str_i--; \ + (((_str_cur - _str_i) % 2) != 0); \ + }) num_tokens = 1; c_str = str; @@ -1069,23 +1081,17 @@ nm_utils_strsplit_set_full (const char *str, /* we assume escapings are not frequent. After we found * this delimiter, check whether it was escaped by counting * the backslashed before. */ - if (f_allow_escaping) { - const char *c2 = c_str; - - while ( c2 > str - && c2[-1] == '\\') - c2--; - if (((c_str - c2) % 2) != 0) { - /* the delimiter is escaped. This was not an accepted delimiter. */ - c_str++; - continue; - } + if ( f_allow_escaping + && _char_is_escaped (str, c_str)) { + /* the delimiter is escaped. This was not an accepted delimiter. */ + c_str++; + continue; } c_str++; /* if we drop empty tokens, then we now skip over all consecutive delimiters. */ - if (!f_preseve_empty) { + if (!f_preserve_empty) { while (_char_lookup_has (ch_lookup, c_str[0])) c_str++; if (c_str[0] == '\0') @@ -1115,10 +1121,10 @@ done1: ptr[i_token++] = s; if (s[0] == '\0') { - nm_assert (f_preseve_empty); + nm_assert (f_preserve_empty); goto done2; } - nm_assert ( f_preseve_empty + nm_assert ( f_preserve_empty || !_char_lookup_has (ch_lookup, s[0])); while (!_char_lookup_has (ch_lookup, s[0])) { @@ -1138,7 +1144,7 @@ done1: s[0] = '\0'; s++; - if (!f_preseve_empty) { + if (!f_preserve_empty) { while (_char_lookup_has (ch_lookup, s[0])) s++; if (s[0] == '\0') @@ -1150,6 +1156,38 @@ done2: nm_assert (i_token == num_tokens); ptr[i_token] = NULL; + if (f_strstrip) { + gsize i; + + i_token = 0; + for (i = 0; ptr[i]; i++) { + + s = (char *) nm_str_skip_leading_spaces (ptr[i]); + if (s[0] != '\0') { + char *s_last; + + s_last = &s[strlen (s) - 1]; + while ( s_last > s + && g_ascii_isspace (s_last[0]) + && ( ! f_allow_escaping + || !_char_is_escaped (s, s_last))) + (s_last--)[0] = '\0'; + } + + if ( !f_preserve_empty + && s[0] == '\0') + continue; + + ptr[i_token++] = s; + } + + if (i_token == 0) { + g_free (ptr); + return NULL; + } + ptr[i_token] = NULL; + } + return ptr; } diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index 8ec6fa2f5a..0beb75ff10 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -336,6 +336,17 @@ typedef enum { NM_UTILS_STRSPLIT_SET_FLAGS_NONE = 0, NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY = (1u << 0), NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING = (1u << 1), + + /* If flag is set, does the same as g_strstrip() on the returned tokens. + * This will remove leading and trailing ascii whitespaces (g_ascii_isspace() + * and NM_ASCII_SPACES). + * + * - when combined with !%NM_UTILS_STRSPLIT_SET_FLAGS_PRESERVE_EMPTY, + * empty tokens will be removed (and %NULL will be returned if that + * results in an empty string array). + * - when combined with %NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING, + * trailing whitespace escaped by backslash are not stripped. */ + NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP = (1u << 2), } NMUtilsStrsplitSetFlags; const char **nm_utils_strsplit_set_full (const char *str,