From 5d2b609e7e7e18789c75645f49c4524bc95c1be5 Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Mon, 22 Jun 2020 19:43:35 +0200 Subject: [PATCH] shared: add nm_utils_strsplit_quoted() We want to parse "/proc/cmdline". That is space separated with support for quoting and escaping. Our implementation becomes part of stable behavior, and we should interpret the kernel command line the same way as the system does. That means, our implementation should match systemd's. (cherry picked from commit 10779d545a6fe0af8f29e065d251882ff25411fc) --- libnm-core/tests/test-general.c | 229 +++++++++++++++++++++++++++ shared/nm-glib-aux/nm-shared-utils.c | 94 +++++++++++ shared/nm-glib-aux/nm-shared-utils.h | 4 + 3 files changed, 327 insertions(+) diff --git a/libnm-core/tests/test-general.c b/libnm-core/tests/test-general.c index a6fb700715..c2e19a24fa 100644 --- a/libnm-core/tests/test-general.c +++ b/libnm-core/tests/test-general.c @@ -13,6 +13,8 @@ #include "nm-std-aux/c-list-util.h" #include "nm-glib-aux/nm-enum-utils.h" +#include "nm-glib-aux/nm-str-buf.h" +#include "systemd/nm-sd-utils-shared.h" #include "nm-utils.h" #include "nm-setting-private.h" @@ -8864,7 +8866,232 @@ test_connection_ovs_ifname (gconstpointer test_data) } } +/*****************************************************************************/ +static gboolean +_strsplit_quoted_char_needs_escaping (char ch) +{ + return NM_IN_SET (ch, '\'', '\"', '\\') + || strchr (NM_ASCII_WHITESPACES, ch); +} + +static char * +_strsplit_quoted_create_str_rand (gssize len) +{ + NMStrBuf strbuf = NM_STR_BUF_INIT (nmtst_get_rand_uint32 () % 200, nmtst_get_rand_bool ()); + + g_assert (len >= -1); + + if (len == -1) + len = nmtst_get_rand_word_length (NULL); + + while (len-- > 0) { + char ch; + + ch = nmtst_rand_select ('a', ' ', '\\', '"', '\'', nmtst_get_rand_uint32 () % 255 + 1); + g_assert (ch); + nm_str_buf_append_c (&strbuf, ch); + } + + if (!strbuf.allocated) + nm_str_buf_maybe_expand (&strbuf, 1, nmtst_get_rand_bool ()); + return nm_str_buf_finalize (&strbuf, NULL); +} + +static char ** +_strsplit_quoted_create_strv_rand (void) +{ + guint len = nmtst_get_rand_word_length (NULL); + char **ptr; + guint i; + + ptr = g_new (char *, len + 1); + for (i = 0; i < len; i++) + ptr[i] = _strsplit_quoted_create_str_rand (-1); + ptr[i] = NULL; + return ptr; +} + +static char * +_strsplit_quoted_join_strv_rand (const char *const*strv) +{ + NMStrBuf strbuf = NM_STR_BUF_INIT (nmtst_get_rand_uint32 () % 200, nmtst_get_rand_bool ()); + char *result; + gsize l; + gsize l2; + gsize *p_l2 = nmtst_get_rand_bool () ? &l2 : NULL; + gsize i; + + g_assert (strv); + + nm_str_buf_append_c_repeated (&strbuf, ' ', nmtst_get_rand_word_length (NULL) / 4); + for (i = 0; strv[i]; i++) { + const char *s = strv[i]; + gsize j; + char quote; + + nm_str_buf_append_c_repeated (&strbuf, ' ', 1 + nmtst_get_rand_word_length (NULL) / 4); + + j = 0; + quote = '\0'; + while (TRUE) { + char ch = s[j++]; + + /* extract_first_word*/ + if (quote != '\0') { + if (ch == '\0') { + nm_str_buf_append_c (&strbuf, quote); + break; + } + if ( ch == quote + || ch == '\\' + || nmtst_get_rand_uint32 () % 5 == 0) + nm_str_buf_append_c (&strbuf, '\\'); + nm_str_buf_append_c (&strbuf, ch); + if (nmtst_get_rand_uint32 () % 3 == 0) { + nm_str_buf_append_c (&strbuf, quote); + quote = '\0'; + goto next_maybe_quote; + } + continue; + } + + if (ch == '\0') { + if (s == strv[i]) { + quote = nmtst_rand_select ('\'', '"'); + nm_str_buf_append_c_repeated (&strbuf, quote, 2); + } + break; + } + + if ( _strsplit_quoted_char_needs_escaping (ch) + || nmtst_get_rand_uint32 () % 5 == 0) + nm_str_buf_append_c (&strbuf, '\\'); + + nm_str_buf_append_c (&strbuf, ch); + +next_maybe_quote: + if (nmtst_get_rand_uint32 () % 5 == 0) { + quote = nmtst_rand_select ('\'', '\"'); + nm_str_buf_append_c (&strbuf, quote); + if (nmtst_get_rand_uint32 () % 5 == 0) { + nm_str_buf_append_c (&strbuf, quote); + quote = '\0'; + } + } + } + } + nm_str_buf_append_c_repeated (&strbuf, ' ', nmtst_get_rand_word_length (NULL) / 4); + + nm_str_buf_maybe_expand (&strbuf, 1, nmtst_get_rand_bool ()); + + l = strbuf.len; + result = nm_str_buf_finalize (&strbuf, p_l2); + g_assert (!p_l2 || l == *p_l2); + g_assert (strlen (result) == l); + return result; +} + +static void +_strsplit_quoted_assert_strv (const char *topic, + const char *str, + const char *const*strv1, + const char *const*strv2) +{ + nm_auto_str_buf NMStrBuf s1 = { }; + nm_auto_str_buf NMStrBuf s2 = { }; + gs_free char *str_escaped = NULL; + int i; + + g_assert (str); + g_assert (strv1); + g_assert (strv2); + + if (_nm_utils_strv_equal ((char **) strv1, (char **) strv2)) + return; + + for (i = 0; strv1[i]; i++) { + gs_free char *s = g_strescape (strv1[i], NULL); + + g_print (">>> [%s] strv1[%d] = \"%s\"\n", topic, i, s); + if (i > 0) + nm_str_buf_append_c (&s1, ' '); + nm_str_buf_append_printf (&s1, "\"%s\"", s); + } + + for (i = 0; strv2[i]; i++) { + gs_free char *s = g_strescape (strv2[i], NULL); + + g_print (">>> [%s] strv2[%d] = \"%s\"\n", topic, i, s); + if (i > 0) + nm_str_buf_append_c (&s2, ' '); + nm_str_buf_append_printf (&s2, "\"%s\"", s); + } + + nm_str_buf_maybe_expand (&s1, 1, FALSE); + nm_str_buf_maybe_expand (&s2, 1, FALSE); + + str_escaped = g_strescape (str, NULL); + g_error ("compared words differs: [%s] str=\"%s\"; strv1=%s; strv2=%s", topic, str_escaped, nm_str_buf_get_str (&s1), nm_str_buf_get_str (&s2)); +} + +static void +_strsplit_quoted_test (const char *str, + const char *const*strv_expected) +{ + gs_strfreev char **strv_systemd = NULL; + gs_strfreev char **strv_nm = NULL; + int r; + + g_assert (str); + + r = nmtst_systemd_extract_first_word_all (str, &strv_systemd); + g_assert_cmpint (r, ==, 1); + g_assert (strv_systemd); + + if (!strv_expected) + strv_expected = (const char *const*) strv_systemd; + + _strsplit_quoted_assert_strv ("systemd", str, strv_expected, (const char *const*) strv_systemd); + + strv_nm = nm_utils_strsplit_quoted (str); + g_assert (strv_nm); + _strsplit_quoted_assert_strv ("nm", str, strv_expected, (const char *const*) strv_nm); +} + +static void +test_strsplit_quoted (void) +{ + int i_run; + + _strsplit_quoted_test ("", NM_MAKE_STRV ()); + _strsplit_quoted_test (" ", NM_MAKE_STRV ()); + _strsplit_quoted_test (" ", NM_MAKE_STRV ()); + _strsplit_quoted_test (" \t", NM_MAKE_STRV ()); + _strsplit_quoted_test ("a b", NM_MAKE_STRV ("a", "b")); + _strsplit_quoted_test ("a\\ b", NM_MAKE_STRV ("a b")); + _strsplit_quoted_test (" a\\ \"b\"", NM_MAKE_STRV ("a b")); + _strsplit_quoted_test (" a\\ \"b\" c \n", NM_MAKE_STRV ("a b", "c")); + + for (i_run = 0; i_run < 1000; i_run++) { + gs_strfreev char **strv = NULL; + gs_free char *str = NULL; + + /* create random strv array and join them carefully so that splitting + * them will yield the original value. */ + strv = _strsplit_quoted_create_strv_rand (); + str = _strsplit_quoted_join_strv_rand ((const char *const*) strv); + _strsplit_quoted_test (str, (const char *const*) strv); + } + + /* Create random words and assert that systemd and our implementation can + * both split them (and in the exact same way). */ + for (i_run = 0; i_run < 1000; i_run++) { + gs_free char *s = _strsplit_quoted_create_str_rand (nmtst_get_rand_uint32 () % 150); + + _strsplit_quoted_test (s, NULL); + } +} /*****************************************************************************/ @@ -9042,5 +9269,7 @@ int main (int argc, char **argv) g_test_add_func ("/core/general/test_nm_ip_addr_zero", test_nm_ip_addr_zero); + g_test_add_func ("/core/general/test_strsplit_quoted", test_strsplit_quoted); + return g_test_run (); } diff --git a/shared/nm-glib-aux/nm-shared-utils.c b/shared/nm-glib-aux/nm-shared-utils.c index 47fa7d2ff8..e7b9b043b4 100644 --- a/shared/nm-glib-aux/nm-shared-utils.c +++ b/shared/nm-glib-aux/nm-shared-utils.c @@ -2020,6 +2020,100 @@ nm_utils_escaped_tokens_options_split (char *str, /*****************************************************************************/ +/** + * nm_utils_strsplit_quoted: + * @str: the string to split (e.g. from /proc/cmdline). + * + * This basically does that systemd's extract_first_word() does + * with the flags "EXTRACT_UNQUOTE | EXTRACT_RELAX". This is what + * systemd uses to parse /proc/cmdline, and we do too. + * + * Splits the string. We have nm_utils_strsplit_set() which + * supports a variety of flags. However, extending that already + * complex code to also support quotation and escaping is hard. + * Instead, add a naive implementation. + * + * Returns: (transfer full): the split string. + */ +char ** +nm_utils_strsplit_quoted (const char *str) +{ + gs_unref_ptrarray GPtrArray *arr = NULL; + gs_free char *str_out = NULL; + guint8 ch_lookup[256]; + + nm_assert (str); + + _char_lookup_table_init (ch_lookup, NM_ASCII_WHITESPACES); + + for (;;) { + char quote; + gsize j; + + while (_char_lookup_has (ch_lookup, str[0])) + str++; + + if (str[0] == '\0') + break; + + if (!str_out) + str_out = g_new (char, strlen (str) + 1); + + quote = '\0'; + j = 0; + for (;;) { + if (str[0] == '\\') { + str++; + if (str[0] == '\0') + break; + str_out[j++] = str[0]; + str++; + continue; + } + if (quote) { + if (str[0] == '\0') + break; + if (str[0] == quote) { + quote = '\0'; + str++; + continue; + } + str_out[j++] = str[0]; + str++; + continue; + } + if (str[0] == '\0') + break; + if (NM_IN_SET (str[0], '\'', '"')) { + quote = str[0]; + str++; + continue; + } + if (_char_lookup_has (ch_lookup, str[0])) { + str++; + break; + } + str_out[j++] = str[0]; + str++; + } + + if (!arr) + arr = g_ptr_array_new (); + g_ptr_array_add (arr, g_strndup (str_out, j)); + } + + if (!arr) + return g_new0 (char *, 1); + + g_ptr_array_add (arr, NULL); + + /* We want to return an optimally sized strv array, with no excess + * memory allocated. Hence, clone once more. */ + return nm_memdup (arr->pdata, sizeof (char *) * arr->len); +} + +/*****************************************************************************/ + /** * nm_utils_strv_find_first: * @list: the strv list to search diff --git a/shared/nm-glib-aux/nm-shared-utils.h b/shared/nm-glib-aux/nm-shared-utils.h index 16fbb04dcb..6fb26192d3 100644 --- a/shared/nm-glib-aux/nm-shared-utils.h +++ b/shared/nm-glib-aux/nm-shared-utils.h @@ -680,6 +680,10 @@ nm_utils_escaped_tokens_escape_gstr (const char *str, /*****************************************************************************/ +char **nm_utils_strsplit_quoted (const char *str); + +/*****************************************************************************/ + static inline const char ** nm_utils_escaped_tokens_options_split_list (const char *str) {