shared: add nm_utils_strsplit_set() helper

A replacement for g_strsplit_set(). While g_strsplit_set()
does (n+1) malloc and n slice allocations, this needs
roughtly (O(log(n))) mallocs.

Another difference from g_strsplit_set() is that this function
treats multiple delimiters as one (and thus never returns empty
words). While I can see that sometimes you may want to keep empty
words (like parsing a CSV file and preserve empty cells), we usually
use this function for splitting user input. In such case, we want
to treat multiple delimiters as one.
This commit is contained in:
Thomas Haller 2017-09-14 19:14:01 +02:00
parent f6a727685e
commit daa4604c12
3 changed files with 176 additions and 0 deletions

View file

@ -98,6 +98,67 @@ test_nm_g_slice_free_fcn (void)
/*****************************************************************************/
static void
_do_test_nm_utils_strsplit_set (const char *str, ...)
{
gs_unref_ptrarray GPtrArray *args_array = g_ptr_array_new ();
const char *const*args;
gs_free const char **words = NULL;
const char *arg;
gsize i;
va_list ap;
va_start (ap, str);
while ((arg = va_arg (ap, const char *)))
g_ptr_array_add (args_array, (gpointer) arg);
va_end (ap);
g_ptr_array_add (args_array, NULL);
args = (const char *const*) args_array->pdata;
words = nm_utils_strsplit_set (str, " \t\n");
if (!args[0]) {
g_assert (!words);
g_assert ( !str
|| NM_STRCHAR_ALL (str, ch, NM_IN_SET (ch, ' ', '\t', '\n')));
return;
}
g_assert (words);
for (i = 0; args[i] || words[i]; i++) {
g_assert (args[i]);
g_assert (words[i]);
g_assert (args[i][0]);
g_assert (NM_STRCHAR_ALL (args[i], ch, !NM_IN_SET (ch, ' ', '\t', '\n')));
g_assert_cmpstr (args[i], ==, words[i]);
}
}
#define do_test_nm_utils_strsplit_set(str, ...) \
_do_test_nm_utils_strsplit_set (str, ##__VA_ARGS__, NULL)
static void
test_nm_utils_strsplit_set (void)
{
do_test_nm_utils_strsplit_set (NULL);
do_test_nm_utils_strsplit_set ("");
do_test_nm_utils_strsplit_set ("\t");
do_test_nm_utils_strsplit_set (" \t\n");
do_test_nm_utils_strsplit_set ("a", "a");
do_test_nm_utils_strsplit_set ("a b", "a", "b");
do_test_nm_utils_strsplit_set ("a\rb", "a\rb");
do_test_nm_utils_strsplit_set (" a\rb ", "a\rb");
do_test_nm_utils_strsplit_set (" a bbbd afds ere", "a", "bbbd", "afds", "ere");
do_test_nm_utils_strsplit_set ("1 2 3 4 5 6 7 8 9 0 "
"1 2 3 4 5 6 7 8 9 0 "
"1 2 3 4 5 6 7 8 9 0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0",
"1", "2", "3", "4", "5", "6", "7", "8", "9", "0");
}
/*****************************************************************************/
typedef struct {
int val;
int idx;
@ -6201,6 +6262,7 @@ int main (int argc, char **argv)
g_test_add_func ("/core/general/test_c_list_sort", test_c_list_sort);
g_test_add_func ("/core/general/test_dedup_multi", test_dedup_multi);
g_test_add_func ("/core/general/test_utils_str_utf8safe", test_utils_str_utf8safe);
g_test_add_func ("/core/general/test_nm_utils_strsplit_set", test_nm_utils_strsplit_set);
g_test_add_func ("/core/general/test_nm_in_set", test_nm_in_set);
g_test_add_func ("/core/general/test_nm_in_strset", test_nm_in_strset);
g_test_add_func ("/core/general/test_setting_vpn_items", test_setting_vpn_items);

View file

@ -324,6 +324,118 @@ _nm_utils_ascii_str_to_int64 (const char *str, guint base, gint64 min, gint64 ma
/*****************************************************************************/
/**
* nm_utils_strsplit_set:
* @str: the string to split.
* @delimiters: the set of delimiters. If %NULL, defaults to " \t\n",
* like bash's $IFS.
*
* This is a replacement for g_strsplit_set() which avoids copying
* each word once (the entire strv array), but instead copies it once
* and all words point into that internal copy.
*
* Another difference from g_strsplit_set() is that this never returns
* empty words. Multiple delimiters are combined and treated as one.
*
* Returns: %NULL if @str is %NULL or contains only delimiters.
* Otherwise, a %NULL terminated strv array containing non-empty
* words, split at the delimiter characters (delimiter characters
* are removed).
* The strings to which the result strv array points to are allocated
* after the returned result itself. Don't free the strings themself,
* but free everything with g_free().
*/
const char **
nm_utils_strsplit_set (const char *str, const char *delimiters)
{
const char **ptr, **ptr0;
gsize alloc_size, plen, i;
gsize str_len;
char *s0;
char *s;
guint8 delimiters_table[256];
if (!str)
return NULL;
/* initialize lookup table for delimiter */
if (!delimiters)
delimiters = " \t\n";
memset (delimiters_table, 0, sizeof (delimiters_table));
for (i = 0; delimiters[i]; i++)
delimiters_table[(guint8) delimiters[i]] = 1;
#define _is_delimiter(ch, delimiters_table) \
((delimiters_table)[(guint8) (ch)] != 0)
/* skip initial delimiters, and return of the remaining string is
* empty. */
while (_is_delimiter (str[0], delimiters_table))
str++;
if (!str[0])
return NULL;
str_len = strlen (str) + 1;
alloc_size = 8;
/* we allocate the buffer larger, so to copy @str at the
* end of it as @s0. */
ptr0 = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len);
s0 = (char *) &ptr0[alloc_size + 1];
memcpy (s0, str, str_len);
plen = 0;
s = s0;
ptr = ptr0;
while (TRUE) {
if (plen >= alloc_size) {
const char **ptr_old = ptr;
/* reallocate the buffer. Note that for now the string
* continues to be in ptr0/s0. We fix that at the end. */
alloc_size += 2;
ptr = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len);
memcpy (ptr, ptr_old, sizeof (const char *) * plen);
if (ptr_old != ptr0)
g_free (ptr_old);
}
ptr[plen++] = s;
nm_assert (s[0] && !_is_delimiter (s[0], delimiters_table));
while (TRUE) {
s++;
if (_is_delimiter (s[0], delimiters_table))
break;
if (s[0] == '\0')
goto done;
}
s[0] = '\0';
s++;
while (_is_delimiter (s[0], delimiters_table))
s++;
if (s[0] == '\0')
break;
}
done:
ptr[plen] = NULL;
if (ptr != ptr0) {
/* we reallocated the buffer. We must copy over the
* string @s0 and adjust the pointers. */
s = (char *) &ptr[alloc_size + 1];
memcpy (s, s0, str_len);
for (i = 0; i < plen; i++)
ptr[i] = &s[ptr[i] - s0];
g_free (ptr0);
}
return ptr;
}
/**
* nm_utils_strv_find_first:
* @list: the strv list to search

View file

@ -153,6 +153,8 @@ void nm_utils_strbuf_append_str (char **buf, gsize *len, const char *str);
/*****************************************************************************/
const char **nm_utils_strsplit_set (const char *str, const char *delimiters);
gssize nm_utils_strv_find_first (char **list, gssize len, const char *needle);
char **_nm_utils_strv_cleanup (char **strv,