shared: add nm_utils_escaped_tokens_options_*() API

This will be used for splitting and escaping option parameters in
nmcli (vpn.data).
This commit is contained in:
Thomas Haller 2020-03-27 09:29:43 +01:00
parent d1a9c2bd42
commit 5cc7abd7a4
3 changed files with 454 additions and 0 deletions

View file

@ -659,6 +659,333 @@ test_nm_utils_strsplit_set (void)
/*****************************************************************************/
static char *
_escaped_tokens_create_random_word_full (const char *const*tokens,
gsize n_tokens,
gsize len)
{
GString *gstr = g_string_new (NULL);
gsize i;
char random_token[2] = { 0 };
for (i = 0; i < len; i++) {
const char *token = tokens[nmtst_get_rand_uint32 () % n_tokens];
if (!token[0]) {
do {
random_token[0] = nmtst_get_rand_uint32 ();
} while (random_token[0] == '\0');
token = random_token;
}
g_string_append (gstr, token);
}
/* reallocate the string, so that we don't have any excess memory from
* the GString buffer. This is so that valgrind may better detect an out
* or range access. */
return nm_str_realloc (g_string_free (gstr, FALSE));
}
/* set to 1 to exclude characters that are annoying to see in the debugger
* and printf() output. */
#define ESCAPED_TOKENS_ONLY_NICE_CHARS 0
static char *
_escaped_tokens_create_random_whitespace (void)
{
static const char *tokens[] = {
" ",
#if !ESCAPED_TOKENS_ONLY_NICE_CHARS
"\n",
"\t",
"\r",
"\f",
#endif
};
return _escaped_tokens_create_random_word_full (tokens, G_N_ELEMENTS (tokens), nmtst_get_rand_word_length (NULL) / 4u);
}
static char *
_escaped_tokens_create_random_word (void)
{
static const char *tokens[] = {
"a",
"b",
"c",
" ",
",",
"=",
"\\",
#if !ESCAPED_TOKENS_ONLY_NICE_CHARS
"\n",
"\f",
":",
"",
#endif
};
return _escaped_tokens_create_random_word_full (tokens, G_N_ELEMENTS (tokens), nmtst_get_rand_word_length (NULL));
}
static void
_escaped_tokens_str_append_delimiter (GString *str,
gboolean strict,
gboolean needs_delimiter)
{
guint len = nmtst_get_rand_word_length (NULL) / 10u;
char *s;
again:
if (!strict) {
g_string_append (str, (s = _escaped_tokens_create_random_whitespace ()));
nm_clear_g_free (&s);
}
if (needs_delimiter)
g_string_append_c (str, ',');
if (!strict) {
g_string_append (str, (s = _escaped_tokens_create_random_whitespace ()));
nm_clear_g_free (&s);
if (len-- > 0) {
needs_delimiter = TRUE;
goto again;
}
}
}
static void
_escaped_tokens_split (char *str, const char **out_key, const char **out_val)
{
const char *key;
const char *val;
gsize len = strlen (str);
g_assert (str);
nm_utils_escaped_tokens_options_split (str, &key, &val);
g_assert (key);
g_assert (key == str);
if (val) {
g_assert (val > str);
g_assert (val > key);
g_assert (val <= &str[len]);
}
NM_SET_OUT (out_key, key);
NM_SET_OUT (out_val, val);
}
static void
_escaped_tokens_combine (GString *combined,
const char *key,
const char *val,
gboolean strict,
gboolean allow_append_delimiter_before,
gboolean needs_delimiter_after)
{
gs_free char *escaped_key = NULL;
gs_free char *escaped_val = NULL;
if (allow_append_delimiter_before)
_escaped_tokens_str_append_delimiter (combined, strict, FALSE);
g_string_append (combined, nm_utils_escaped_tokens_options_escape_key (key, &escaped_key));
if (val) {
char *s;
if (!strict) {
g_string_append (combined, (s = _escaped_tokens_create_random_whitespace ()));
nm_clear_g_free (&s);
}
g_string_append_c (combined, '=');
if (!strict) {
g_string_append (combined, (s = _escaped_tokens_create_random_whitespace ()));
nm_clear_g_free (&s);
}
g_string_append (combined, nm_utils_escaped_tokens_options_escape_val (val, &escaped_val));
}
_escaped_tokens_str_append_delimiter (combined, strict, needs_delimiter_after);
}
static void
_escaped_tokens_check_one_impl (const char *expected_key,
const char *expected_val,
const char *expected_combination,
const char *const*other,
gsize n_other)
{
nm_auto_free_gstring GString *combined = g_string_new (NULL);
gsize i;
g_assert (expected_key);
g_assert (expected_combination);
g_assert (other);
_escaped_tokens_combine (combined,
expected_key,
expected_val,
TRUE,
TRUE,
FALSE);
g_assert_cmpstr (combined->str, ==, expected_combination);
for (i = 0; i < n_other + 2u; i++) {
nm_auto_free_gstring GString *str0 = NULL;
gs_free const char **strv_split = NULL;
gs_free char *strv_split0 = NULL;
const char *comb;
const char *key;
const char *val;
if (i == 0)
comb = expected_combination;
else if (i == 1) {
_escaped_tokens_combine (nm_gstring_prepare (&str0),
expected_key,
expected_val,
FALSE,
TRUE,
FALSE);
comb = str0->str;
} else
comb = other[i - 2];
strv_split = nm_utils_escaped_tokens_options_split_list (comb);
if (!strv_split) {
g_assert_cmpstr (expected_key, ==, "");
g_assert_cmpstr (expected_val, ==, NULL);
continue;
}
g_assert (expected_val || expected_key[0]);
g_assert_cmpuint (NM_PTRARRAY_LEN (strv_split), ==, 1u);
strv_split0 = g_strdup (strv_split[0]);
_escaped_tokens_split (strv_split0, &key, &val);
g_assert_cmpstr (key, ==, expected_key);
g_assert_cmpstr (val, ==, expected_val);
}
}
#define _escaped_tokens_check_one(expected_key, expected_val, expected_combination, ...) \
_escaped_tokens_check_one_impl (expected_key, expected_val, expected_combination, NM_MAKE_STRV (__VA_ARGS__), NM_NARG (__VA_ARGS__))
static void
test_nm_utils_escaped_tokens (void)
{
int i_run;
for (i_run = 0; i_run < 1000; i_run++) {
const guint num_options = nmtst_get_rand_word_length (NULL);
gs_unref_ptrarray GPtrArray *options = g_ptr_array_new_with_free_func (g_free);
nm_auto_free_gstring GString *combined = g_string_new (NULL);
gs_free const char **strv_split = NULL;
guint i_option;
guint i;
/* Generate a list of random words for option key-value pairs. */
for (i_option = 0; i_option < 2u * num_options; i_option++) {
char *word = NULL;
if ( i_option % 2u == 1
&& nmtst_get_rand_uint32 () % 5 == 0
&& strlen (options->pdata[options->len - 1]) > 0u) {
/* For some options, leave the value unset and only generate a key.
*
* If key is "", then we cannot do that, because the test below would try
* to append "" to the combined list, which the parser then would drop.
* Only test omitting the value, if strlen() of the key is positive. */
} else
word = _escaped_tokens_create_random_word ();
g_ptr_array_add (options, word);
}
/* Combine the options in one comma separated list, with proper escaping. */
for (i_option = 0; i_option < num_options; i_option++) {
_escaped_tokens_combine (combined,
options->pdata[2u*i_option + 0u],
options->pdata[2u*i_option + 1u],
FALSE,
i_option == 0,
i_option != num_options - 1);
}
/* ensure that we can split and parse the options without difference. */
strv_split = nm_utils_escaped_tokens_options_split_list (combined->str);
for (i_option = 0; i_option < num_options; i_option++) {
const char *expected_key = options->pdata[2u*i_option + 0u];
const char *expected_val = options->pdata[2u*i_option + 1u];
gs_free char *s_split = i_option < NM_PTRARRAY_LEN (strv_split) ? g_strdup (strv_split[i_option]) : NULL;
const char *key = NULL;
const char *val = NULL;
if (s_split)
_escaped_tokens_split (s_split, &key, &val);
if ( !nm_streq0 (key, expected_key)
|| !nm_streq0 (val, expected_val)) {
g_print (">>> ASSERTION IS ABOUT TO FAIL for item %5d of %5d\n", i_option, num_options);
g_print (">>> combined = \"%s\"\n", combined->str);
g_print (">>> %c parsed[%5d].key = \"%s\"\n", nm_streq (key, expected_key) ? ' ' : 'X', i_option, key);
g_print (">>> %c parsed[%5d].val = %s%s%s\n", nm_streq0 (val, expected_val) ? ' ' : 'X', i_option, NM_PRINT_FMT_QUOTE_STRING (val));
for (i = 0; i < num_options; i++) {
g_print (">>> %c original[%5d].key = \"%s\"\n", i == i_option ? '*' : ' ', i, (char *) options->pdata[2u*i + 0u]);
g_print (">>> %c original[%5d].val = %s%s%s\n", i == i_option ? '*' : ' ', i, NM_PRINT_FMT_QUOTE_STRING ((char *) options->pdata[2u*i + 1u]));
}
for (i = 0; i < NM_PTRARRAY_LEN (strv_split); i++)
g_print (">>> split[%5d] = \"%s\"\n", i, strv_split[i]);
}
g_assert_cmpstr (key, ==, expected_key);
g_assert_cmpstr (val, ==, expected_val);
}
g_assert_cmpint (NM_PTRARRAY_LEN (strv_split), ==, num_options);
/* Above we show a full round-trip of random option key-value pairs, that they can
* without loss escape, concatenate, split-list, and split. This proofed that every
* option key-value pair can be represented as a combined string and parsed back.
*
* Now, just check that we can also parse arbitrary random words in nm_utils_escaped_tokens_options_split().
* split() is a non-injective surjective function. As we check the round-trip above for random words, where
* options-split() is the last step, we show that every random word can be the output of the function
* (which shows, the surjective part).
*
* But multiple random input arguments, may map to the same output argument (non-injective).
* Just test whether we can handle random input words without crashing. For that, just use the
* above generate list of random words.
*/
for (i = 0; i < 1u + 2u * i_option; i++) {
gs_free char *str = NULL;
const char *cstr;
if (i == 0)
cstr = combined->str;
else
cstr = options->pdata[i - 1u];
if (!cstr)
continue;
str = g_strdup (cstr);
_escaped_tokens_split (str, NULL, NULL);
}
}
_escaped_tokens_check_one ("", NULL, "");
_escaped_tokens_check_one ("", "", "=", " =");
_escaped_tokens_check_one ("a", "b", "a=b", "a = b");
_escaped_tokens_check_one ("a\\=", "b\\=", "a\\\\\\==b\\\\=", "a\\\\\\==b\\\\\\=");
_escaped_tokens_check_one ("\\=", "\\=", "\\\\\\==\\\\=", "\\\\\\==\\\\\\=");
_escaped_tokens_check_one (" ", "bb=", "\\ =bb=", "\\ =bb\\=");
_escaped_tokens_check_one (" ", "bb\\=", "\\ =bb\\\\=", "\\ =bb\\\\\\=");
_escaped_tokens_check_one ("a b", "a b", "a b=a b");
_escaped_tokens_check_one ("a b", "a b", "a b=a b");
_escaped_tokens_check_one ("a = b", "a = b", "a \\= b=a = b", "a \\= b=a \\= b");
}
/*****************************************************************************/
typedef struct {
int val;
CList lst;
@ -8553,6 +8880,7 @@ int main (int argc, char **argv)
g_test_add_func ("/core/general/test_dedup_multi", test_dedup_multi);
g_test_add_func ("/core/general/test_utils_str_utf8safe", test_utils_str_utf8safe);
g_test_add_func ("/core/general/test_nm_utils_strsplit_set", test_nm_utils_strsplit_set);
g_test_add_func ("/core/general/test_nm_utils_escaped_tokens", test_nm_utils_escaped_tokens);
g_test_add_func ("/core/general/test_nm_in_set", test_nm_in_set);
g_test_add_func ("/core/general/test_nm_in_strset", test_nm_in_strset);
g_test_add_func ("/core/general/test_setting_vpn_items", test_setting_vpn_items);

View file

@ -1906,6 +1906,91 @@ nm_utils_escaped_tokens_escape_full (const char *str,
return ret;
}
/**
* nm_utils_escaped_tokens_options_split:
* @str: the src string. This string will be modified in-place.
* The output values will point into @str.
* @out_key: (allow-none): the returned output key. This will always be set to @str
* itself. @str will be modified to contain only the unescaped, truncated
* key name.
* @out_val: returns the parsed (and unescaped) value or %NULL, if @str contains
* no '=' delimiter.
*
* Honors backslash escaping to parse @str as "key=value" pairs. Optionally, if no '='
* is present, @out_val will be returned as %NULL. Backslash can be used to escape
* '=', ',', '\\', and ascii whitespace. Other backslash sequences are taken verbatim.
*
* For keys, '=' obviously must be escaped. For values, that is optional because an
* unescaped '=' is just taken verbatim. For example, in a key, the sequence "\\="
* must be escaped as "\\\\\\=". For the value, that works too, but "\\\\=" is also
* accepted.
*
* Unescaped Space around the key and value are also removed. Space in general must
* not be escaped, unless they are at the beginning or the end of key/value.
*/
void
nm_utils_escaped_tokens_options_split (char *str,
const char **out_key,
const char **out_val)
{
const char *val = NULL;
gsize i;
gsize j;
gsize last_space_idx;
gboolean last_space_has;
nm_assert (str);
i = 0;
while (g_ascii_isspace (str[i]))
i++;
j = 0;
last_space_idx = 0;
last_space_has = FALSE;
while (str[i] != '\0') {
if (g_ascii_isspace (str[i])) {
if (!last_space_has) {
last_space_has = TRUE;
last_space_idx = j;
}
} else {
if (str[i] == '\\') {
if ( NM_IN_SET (str[i + 1u], '\\', ',', '=')
|| g_ascii_isspace (str[i + 1u]))
i++;
} else if (str[i] == '=') {
/* Encounter an unescaped '=' character. When we still parse the key, this
* is the separator we were waiting for. If we are parsing the value,
* we take the character verbatim. */
if (!val) {
if (last_space_has) {
str[last_space_idx] = '\0';
j = last_space_idx + 1;
last_space_has = FALSE;
} else
str[j++] = '\0';
val = &str[j];
i++;
while (g_ascii_isspace (str[i]))
i++;
continue;
}
}
last_space_has = FALSE;
}
str[j++] = str[i++];
}
if (last_space_has)
str[last_space_idx] = '\0';
else
str[j] = '\0';
*out_key = str;
*out_val = val;
}
/*****************************************************************************/
/**

View file

@ -652,6 +652,47 @@ nm_utils_escaped_tokens_escape_gstr (const char *str,
/*****************************************************************************/
static inline const char **
nm_utils_escaped_tokens_options_split_list (const char *str)
{
return nm_utils_strsplit_set_full (str,
",",
NM_UTILS_STRSPLIT_SET_FLAGS_STRSTRIP
| NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING);
}
void nm_utils_escaped_tokens_options_split (char *str,
const char **out_key,
const char **out_val);
static inline const char *
nm_utils_escaped_tokens_options_escape_key (const char *key,
char **out_to_free)
{
return nm_utils_escaped_tokens_escape_full (key,
",=",
NULL,
NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED
| NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE
| NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE,
out_to_free);
}
static inline const char *
nm_utils_escaped_tokens_options_escape_val (const char *val,
char **out_to_free)
{
return nm_utils_escaped_tokens_escape_full (val,
",",
"=",
NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_BACKSLASH_AS_NEEDED
| NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_LEADING_SPACE
| NM_UTILS_ESCAPED_TOKENS_ESCAPE_FLAGS_ESCAPE_TRAILING_SPACE,
out_to_free);
}
/*****************************************************************************/
#define NM_UTILS_CHECKSUM_LENGTH_MD5 16
#define NM_UTILS_CHECKSUM_LENGTH_SHA1 20
#define NM_UTILS_CHECKSUM_LENGTH_SHA256 32