diff --git a/libnm-core/tests/test-general.c b/libnm-core/tests/test-general.c index ad40ee947f..31dd704189 100644 --- a/libnm-core/tests/test-general.c +++ b/libnm-core/tests/test-general.c @@ -6559,74 +6559,161 @@ test_nm_utils_enum (void) /*****************************************************************************/ static void -do_test_utils_str_utf8safe (const char *str, const char *expected, NMUtilsStrUtf8SafeFlags flags) +_do_test_utils_str_utf8safe_unescape (const char *str, const char *expected, gsize expected_len) { - const char *str_safe, *s; - gs_free char *str2 = NULL; - gs_free char *str3 = NULL; + gsize l; + const char *s; + gs_free gpointer buf_free_1 = NULL; + gs_free char *str_free_1 = NULL; - str_safe = nm_utils_str_utf8safe_escape (str, flags, &str2); + s = nm_utils_buf_utf8safe_unescape (str, &l, &buf_free_1); + g_assert_cmpint (expected_len, ==, l); + g_assert_cmpstr (s, ==, expected); - str3 = nm_utils_str_utf8safe_escape_cp (str, flags); - g_assert_cmpstr (str3, ==, str_safe); - g_assert ((!str && !str3) || (str != str3)); - g_clear_pointer (&str3, g_free); + if (str == NULL) { + g_assert (!s); + g_assert (!buf_free_1); + g_assert_cmpint (l, ==, 0); + } else { + g_assert (s); + if (!strchr (str, '\\')) { + g_assert (!buf_free_1); + g_assert (s == str); + g_assert_cmpint (l, ==, strlen (str)); + } else { + g_assert (buf_free_1); + g_assert (s == buf_free_1); + g_assert (memcmp (s, expected, expected_len) == 0); + } + } + + if ( expected + && l == strlen (expected)) { + /* there are no embeeded NULs. Check that nm_utils_str_utf8safe_unescape() yields the same result. */ + s = nm_utils_str_utf8safe_unescape (str, &str_free_1); + g_assert_cmpstr (s, ==, expected); + if (strchr (str, '\\')) { + g_assert (str_free_1 != str); + g_assert (s == str_free_1); + } else + g_assert (s == str); + } +} + +#define do_test_utils_str_utf8safe_unescape(str, expected) \ + _do_test_utils_str_utf8safe_unescape (""str"", expected, NM_STRLEN (expected)) + +static void +_do_test_utils_str_utf8safe (const char *str, gsize str_len, const char *expected, NMUtilsStrUtf8SafeFlags flags) +{ + const char *str_safe; + const char *buf_safe; + const char *s; + gs_free gpointer buf_free_1 = NULL; + gs_free char *str_free_1 = NULL; + gs_free char *str_free_2 = NULL; + gs_free char *str_free_3 = NULL; + gs_free char *str_free_4 = NULL; + gs_free char *str_free_5 = NULL; + gs_free char *str_free_6 = NULL; + gs_free char *str_free_7 = NULL; + gs_free char *str_free_8 = NULL; + gboolean str_has_nul = FALSE; + + buf_safe = nm_utils_buf_utf8safe_escape (str, str_len, flags, &str_free_1); + + str_safe = nm_utils_str_utf8safe_escape (str, flags, &str_free_2); + + if (str_len == 0) { + g_assert (buf_safe == NULL); + g_assert (str_free_1 == NULL); + g_assert (str_safe == str); + g_assert (str == NULL || str[0] == '\0'); + g_assert (str_free_2 == NULL); + } else if (str_len == strlen (str)) { + g_assert (buf_safe); + g_assert_cmpstr (buf_safe, ==, str_safe); + + /* nm_utils_buf_utf8safe_escape() can only return a pointer equal to the input string, + * if and only if str_len is negative. Otherwise, the input str won't be NUL terminated + * and cannot be returned. */ + g_assert (buf_safe != str); + g_assert (buf_safe == str_free_1); + } else + str_has_nul = TRUE; + + str_free_3 = nm_utils_str_utf8safe_escape_cp (str, flags); + g_assert_cmpstr (str_free_3, ==, str_safe); + g_assert ((!str && !str_free_3) || (str != str_free_3)); + + if (str_len > 0) + _do_test_utils_str_utf8safe_unescape (buf_safe, str, str_len); if (expected == NULL) { + g_assert (!str_has_nul); + g_assert (str_safe == str); - g_assert (!str2); + g_assert (!str_free_2); if (str) { g_assert (!strchr (str, '\\')); g_assert (g_utf8_validate (str, -1, NULL)); } - g_assert (str == nm_utils_str_utf8safe_unescape (str_safe, &str3)); - g_assert (!str3); + g_assert (str == nm_utils_str_utf8safe_unescape (str_safe, &str_free_4)); + g_assert (!str_free_4); - str3 = nm_utils_str_utf8safe_unescape_cp (str_safe); + str_free_5 = nm_utils_str_utf8safe_unescape_cp (str_safe); if (str) { - g_assert (str3 != str); - g_assert_cmpstr (str3, ==, str); + g_assert (str_free_5 != str); + g_assert_cmpstr (str_free_5, ==, str); } else - g_assert (!str3); - g_clear_pointer (&str3, g_free); + g_assert (!str_free_5); return; } - g_assert (str); - g_assert (str_safe != str); - g_assert (str_safe == str2); - g_assert ( strchr (str, '\\') - || !g_utf8_validate (str, -1, NULL) - || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) - && NM_STRCHAR_ANY (str, ch, (guchar) ch >= 127)) - || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) - && NM_STRCHAR_ANY (str, ch, (guchar) ch < ' '))); - g_assert (g_utf8_validate (str_safe, -1, NULL)); + if (!str_has_nul) { + g_assert (str); + g_assert (str_safe != str); + g_assert (str_safe == str_free_2); + g_assert ( strchr (str, '\\') + || !g_utf8_validate (str, -1, NULL) + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) + && NM_STRCHAR_ANY (str, ch, (guchar) ch >= 127)) + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) + && NM_STRCHAR_ANY (str, ch, (guchar) ch < ' '))); + g_assert (g_utf8_validate (str_safe, -1, NULL)); - str3 = g_strcompress (str_safe); - g_assert_cmpstr (str, ==, str3); - g_clear_pointer (&str3, g_free); + str_free_6 = g_strcompress (str_safe); + g_assert_cmpstr (str, ==, str_free_6); - str3 = nm_utils_str_utf8safe_unescape_cp (str_safe); - g_assert (str3 != str); - g_assert_cmpstr (str3, ==, str); - g_clear_pointer (&str3, g_free); + str_free_7 = nm_utils_str_utf8safe_unescape_cp (str_safe); + g_assert (str_free_7 != str); + g_assert_cmpstr (str_free_7, ==, str); - s = nm_utils_str_utf8safe_unescape (str_safe, &str3); - g_assert (str3 != str); - g_assert (s == str3); - g_assert_cmpstr (str3, ==, str); - g_clear_pointer (&str3, g_free); + s = nm_utils_str_utf8safe_unescape (str_safe, &str_free_8); + g_assert (str_free_8 != str); + g_assert (s == str_free_8); + g_assert_cmpstr (str_free_8, ==, str); + + g_assert_cmpstr (str_safe, ==, expected); + + return; + } + + g_assert_cmpstr (buf_safe, ==, expected); - g_assert_cmpstr (str_safe, ==, expected); } +#define do_test_utils_str_utf8safe(str, expected, flags) \ + _do_test_utils_str_utf8safe (""str"", NM_STRLEN (str), expected, flags) static void test_utils_str_utf8safe (void) { - do_test_utils_str_utf8safe (NULL, NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + _do_test_utils_str_utf8safe (NULL, 0, NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\\", "\\\\", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\\a", "\\\\a", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); do_test_utils_str_utf8safe ("\314", "\\314", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); do_test_utils_str_utf8safe ("\314\315x\315\315x", "\\314\\315x\\315\\315x", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); do_test_utils_str_utf8safe ("\314\315xx", "\\314\\315xx", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); @@ -6648,6 +6735,18 @@ test_utils_str_utf8safe (void) do_test_utils_str_utf8safe ("㈞abä㈞b", NULL, NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); do_test_utils_str_utf8safe ("abäb", "ab\\303\\244b", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII); do_test_utils_str_utf8safe ("ab\ab", "ab\\007b", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); + + do_test_utils_str_utf8safe ("\0", "\\000", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\0a\0", "\\000a\\000", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\\\0", "\\\\\\000", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\n\0", "\n\\000", NM_UTILS_STR_UTF8_SAFE_FLAG_NONE); + do_test_utils_str_utf8safe ("\n\0", "\\012\\000", NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL); + + do_test_utils_str_utf8safe_unescape ("\n\\0", "\n\0"); + do_test_utils_str_utf8safe_unescape ("\n\\01", "\n\01"); + do_test_utils_str_utf8safe_unescape ("\n\\012", "\n\012"); + do_test_utils_str_utf8safe_unescape ("\n\\.", "\n."); + do_test_utils_str_utf8safe_unescape ("\\n\\.3\\r", "\n.3\r"); } /*****************************************************************************/ diff --git a/shared/nm-utils/nm-shared-utils.c b/shared/nm-utils/nm-shared-utils.c index 43981e3177..25661eaa7c 100644 --- a/shared/nm-utils/nm-shared-utils.c +++ b/shared/nm-utils/nm-shared-utils.c @@ -1077,6 +1077,231 @@ _str_append_escape (GString *s, char ch) g_string_append_c (s, '0' + ( ((guchar) ch) & 07)); } +gconstpointer +nm_utils_buf_utf8safe_unescape (const char *str, gsize *out_len, gpointer *to_free) +{ + GString *gstr; + gsize len; + const char *s; + + g_return_val_if_fail (to_free, NULL); + g_return_val_if_fail (out_len, NULL); + + if (!str) { + *out_len = 0; + *to_free = NULL; + return NULL; + } + + len = strlen (str); + + s = memchr (str, '\\', len); + if (!s) { + *out_len = len; + *to_free = NULL; + return str; + } + + gstr = g_string_new_len (NULL, len); + + g_string_append_len (gstr, str, s - str); + str = s; + + for (;;) { + char ch; + guint v; + + nm_assert (str[0] == '\\'); + + ch = (++str)[0]; + + if (ch == '\0') { + // error. Trailing '\\' + break; + } + + if (ch >= '0' && ch <= '9') { + v = ch - '0'; + ch = (++str)[0]; + if (ch >= '0' && ch <= '7') { + v = v * 8 + (ch - '0'); + ch = (++str)[0]; + if (ch >= '0' && ch <= '7') { + v = v * 8 + (ch - '0'); + ch = (++str)[0]; + } + } + ch = v; + } else { + switch (ch) { + case 'b': ch = '\b'; break; + case 'f': ch = '\f'; break; + case 'n': ch = '\n'; break; + case 'r': ch = '\r'; break; + case 't': ch = '\t'; break; + case 'v': ch = '\v'; break; + default: + /* Here we handle "\\\\", but all other unexpected escape sequences are really a bug. + * Take them literally, after removing the escape character */ + break; + } + str++; + } + + g_string_append_c (gstr, ch); + + s = strchr (str, '\\'); + if (!s) { + g_string_append (gstr, str); + break; + } + + g_string_append_len (gstr, str, s - str); + str = s; + } + + *out_len = gstr->len; + *to_free = gstr->str; + return g_string_free (gstr, FALSE); +} + +/** + * nm_utils_buf_utf8safe_escape: + * @buf: byte array, possibly in utf-8 encoding, may have NUL characters. + * @buflen: the length of @buf in bytes, or -1 if @buf is a NUL terminated + * string. + * @flags: #NMUtilsStrUtf8SafeFlags flags + * @to_free: (out): return the pointer location of the string + * if a copying was necessary. + * + * Based on the assumption, that @buf contains UTF-8 encoded bytes, + * this will return valid UTF-8 sequence, and invalid sequences + * will be escaped with backslash (C escaping, like g_strescape()). + * This is sanitize non UTF-8 characters. The result is valid + * UTF-8. + * + * The operation can be reverted with nm_utils_buf_utf8safe_unescape(). + * Note that if, and only if @buf contains no NUL bytes, the operation + * can also be reverted with g_strcompress(). + * + * Depending on @flags, valid UTF-8 characters are not escaped at all + * (except the escape character '\\'). This is the difference to g_strescape(), + * which escapes all non-ASCII characters. This allows to pass on + * valid UTF-8 characters as-is and can be directly shown to the user + * as UTF-8 -- with exception of the backslash escape character, + * invalid UTF-8 sequences, and other (depending on @flags). + * + * Returns: the escaped input buffer, as valid UTF-8. If no escaping + * is necessary, it returns the input @buf. Otherwise, an allocated + * string @to_free is returned which must be freed by the caller + * with g_free. The escaping can be reverted by g_strcompress(). + **/ +const char * +nm_utils_buf_utf8safe_escape (gconstpointer buf, gssize buflen, NMUtilsStrUtf8SafeFlags flags, char **to_free) +{ + const char *const str = buf; + const char *p = NULL; + const char *s; + gboolean nul_terminated = FALSE; + GString *gstr; + + g_return_val_if_fail (to_free, NULL); + + *to_free = NULL; + + if (buflen == 0) + return NULL; + + if (buflen < 0) { + if (!str) + return NULL; + buflen = strlen (str); + if (buflen == 0) + return str; + nul_terminated = TRUE; + } + + if ( g_utf8_validate (str, buflen, &p) + && nul_terminated) { + /* note that g_utf8_validate() does not allow NUL character inside @str. Good. + * We can treat @str like a NUL terminated string. */ + if (!NM_STRCHAR_ANY (str, ch, + ( ch == '\\' \ + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \ + && ch < ' ') \ + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \ + && ((guchar) ch) >= 127)))) + return str; + } + + gstr = g_string_sized_new (buflen + 5); + + s = str; + do { + buflen -= p - s; + nm_assert (buflen >= 0); + + for (; s < p; s++) { + char ch = s[0]; + + if (ch == '\\') + g_string_append (gstr, "\\\\"); + else if ( ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \ + && ch < ' ') \ + || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \ + && ((guchar) ch) >= 127)) + _str_append_escape (gstr, ch); + else + g_string_append_c (gstr, ch); + } + + if (buflen <= 0) + break; + + _str_append_escape (gstr, p[0]); + + buflen--; + if (buflen == 0) + break; + + s = &p[1]; + g_utf8_validate (s, buflen, &p); + } while (TRUE); + + *to_free = g_string_free (gstr, FALSE); + return *to_free; +} + +const char * +nm_utils_buf_utf8safe_escape_bytes (GBytes *bytes, NMUtilsStrUtf8SafeFlags flags, char **to_free) +{ + gconstpointer p; + gsize l; + + if (bytes) + p = g_bytes_get_data (bytes, &l); + else { + p = NULL; + l = 0; + } + + return nm_utils_buf_utf8safe_escape (p, l, flags, to_free); +} + +/*****************************************************************************/ + +const char * +nm_utils_str_utf8safe_unescape (const char *str, char **to_free) +{ + g_return_val_if_fail (to_free, NULL); + + if (!str || !strchr (str, '\\')) { + *to_free = NULL; + return str; + } + return (*to_free = g_strcompress (str)); +} + /** * nm_utils_str_utf8safe_escape: * @str: NUL terminated input string, possibly in utf-8 encoding @@ -1107,63 +1332,7 @@ _str_append_escape (GString *s, char ch) const char * nm_utils_str_utf8safe_escape (const char *str, NMUtilsStrUtf8SafeFlags flags, char **to_free) { - const char *p = NULL; - GString *s; - - g_return_val_if_fail (to_free, NULL); - - *to_free = NULL; - if (!str || !str[0]) - return str; - - if ( g_utf8_validate (str, -1, &p) - && !NM_STRCHAR_ANY (str, ch, - ( ch == '\\' \ - || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \ - && ch < ' ') \ - || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \ - && ((guchar) ch) >= 127)))) - return str; - - s = g_string_sized_new ((p - str) + strlen (p) + 5); - - do { - for (; str < p; str++) { - char ch = str[0]; - - if (ch == '\\') - g_string_append (s, "\\\\"); - else if ( ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_CTRL) \ - && ch < ' ') \ - || ( NM_FLAGS_HAS (flags, NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII) \ - && ((guchar) ch) >= 127)) - _str_append_escape (s, ch); - else - g_string_append_c (s, ch); - } - - if (p[0] == '\0') - break; - _str_append_escape (s, p[0]); - - str = &p[1]; - g_utf8_validate (str, -1, &p); - } while (TRUE); - - *to_free = g_string_free (s, FALSE); - return *to_free; -} - -const char * -nm_utils_str_utf8safe_unescape (const char *str, char **to_free) -{ - g_return_val_if_fail (to_free, NULL); - - if (!str || !strchr (str, '\\')) { - *to_free = NULL; - return str; - } - return (*to_free = g_strcompress (str)); + return nm_utils_buf_utf8safe_escape (str, -1, flags, to_free); } /** diff --git a/shared/nm-utils/nm-shared-utils.h b/shared/nm-utils/nm-shared-utils.h index 0670c6c2f6..a85497e854 100644 --- a/shared/nm-utils/nm-shared-utils.h +++ b/shared/nm-utils/nm-shared-utils.h @@ -472,6 +472,10 @@ typedef enum { NM_UTILS_STR_UTF8_SAFE_FLAG_ESCAPE_NON_ASCII = 0x0002, } NMUtilsStrUtf8SafeFlags; +const char *nm_utils_buf_utf8safe_escape (gconstpointer buf, gssize buflen, NMUtilsStrUtf8SafeFlags flags, char **to_free); +const char *nm_utils_buf_utf8safe_escape_bytes (GBytes *bytes, NMUtilsStrUtf8SafeFlags flags, char **to_free); +gconstpointer nm_utils_buf_utf8safe_unescape (const char *str, gsize *out_len, gpointer *to_free); + const char *nm_utils_str_utf8safe_escape (const char *str, NMUtilsStrUtf8SafeFlags flags, char **to_free); const char *nm_utils_str_utf8safe_unescape (const char *str, char **to_free);