shared: pre-calculate number of tokens in nm_utils_strsplit_set_full()

Instead of growing the buffer for the tokens (and reallocating),
do one pre-run over the string and count the delimiters. This
way we know how much space we need and we don't need to
reallocate.

Interestingly, this is notably slower than the previous implementation,
because previously if would not bother determining the right number of
tokens but just over-allocate with a reasonable guess of 8 and grow the
buffer exponentially. Still, I like this better because while it may
be slower in common scenarios, it allocates the exact number of buffer
space.
This commit is contained in:
Thomas Haller 2019-04-05 22:44:49 +02:00
parent c1f340401f
commit a1425a4c91

View file

@ -1020,12 +1020,11 @@ nm_utils_strsplit_set_full (const char *str,
const char *delimiters, const char *delimiters,
NMUtilsStrsplitSetFlags flags) NMUtilsStrsplitSetFlags flags)
{ {
const char **ptr, **ptr0; const char **ptr;
gsize alloc_size; gsize num_tokens;
gsize plen; gsize i_token;
gsize i; gsize str_len_p1;
gsize str_len; const char *c_str;
char *s0;
char *s; char *s;
guint8 ch_lookup[256]; guint8 ch_lookup[256];
const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING); const gboolean f_allow_escaping = NM_FLAGS_HAS (flags, NM_UTILS_STRSPLIT_SET_FLAGS_ALLOW_ESCAPING);
@ -1056,37 +1055,68 @@ nm_utils_strsplit_set_full (const char *str,
return NULL; return NULL;
} }
str_len = strlen (str) + 1;
alloc_size = 8;
/* we allocate the buffer larger, so to copy @str at the
* end of it as @s0. */
ptr0 = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len);
s0 = (char *) &ptr0[alloc_size + 1];
memcpy (s0, str, str_len);
plen = 0;
s = s0;
ptr = ptr0;
num_tokens = 1;
c_str = str;
while (TRUE) { while (TRUE) {
if (plen >= alloc_size) {
const char **ptr_old = ptr;
/* reallocate the buffer. Note that for now the string while (G_LIKELY (!_char_lookup_has (ch_lookup, c_str[0]))) {
* continues to be in ptr0/s0. We fix that at the end. */ if (c_str[0] == '\0')
alloc_size *= 2; goto done1;
ptr = g_malloc ((sizeof (const char *) * (alloc_size + 1)) + str_len); c_str++;
memcpy (ptr, ptr_old, sizeof (const char *) * plen);
if (ptr_old != ptr0)
g_free (ptr_old);
} }
ptr[plen++] = s; /* we assume escapings are not frequent. After we found
* this delimiter, check whether it was escaped by counting
* the backslashed before. */
if (f_allow_escaping) {
const char *c2 = c_str;
while ( c2 > str
&& c2[-1] == '\\')
c2--;
if (((c_str - c2) % 2) != 0) {
/* the delimiter is escaped. This was not an accepted delimiter. */
c_str++;
continue;
}
}
c_str++;
/* if we drop empty tokens, then we now skip over all consecutive delimiters. */
if (!f_preseve_empty) {
while (_char_lookup_has (ch_lookup, c_str[0]))
c_str++;
if (c_str[0] == '\0')
break;
}
num_tokens++;
}
done1:
nm_assert (c_str[0] == '\0');
str_len_p1 = (c_str - str) + 1;
nm_assert (str[str_len_p1 - 1] == '\0');
ptr = g_malloc ((sizeof (const char *) * (num_tokens + 1)) + str_len_p1);
s = (char *) &ptr[num_tokens + 1];
memcpy (s, str, str_len_p1);
i_token = 0;
while (TRUE) {
nm_assert (i_token < num_tokens);
ptr[i_token++] = s;
if (s[0] == '\0') { if (s[0] == '\0') {
nm_assert (f_preseve_empty); nm_assert (f_preseve_empty);
goto done; goto done2;
} }
nm_assert ( f_preseve_empty nm_assert ( f_preseve_empty
|| !_char_lookup_has (ch_lookup, s[0])); || !_char_lookup_has (ch_lookup, s[0]));
@ -1096,10 +1126,10 @@ nm_utils_strsplit_set_full (const char *str,
&& f_allow_escaping)) { && f_allow_escaping)) {
s++; s++;
if (s[0] == '\0') if (s[0] == '\0')
goto done; goto done2;
s++; s++;
} else if (s[0] == '\0') } else if (s[0] == '\0')
goto done; goto done2;
else else
s++; s++;
} }
@ -1107,26 +1137,18 @@ nm_utils_strsplit_set_full (const char *str,
nm_assert (_char_lookup_has (ch_lookup, s[0])); nm_assert (_char_lookup_has (ch_lookup, s[0]));
s[0] = '\0'; s[0] = '\0';
s++; s++;
if (!f_preseve_empty) { if (!f_preseve_empty) {
while (_char_lookup_has (ch_lookup, s[0])) while (_char_lookup_has (ch_lookup, s[0]))
s++; s++;
if (s[0] == '\0') if (s[0] == '\0')
goto done; goto done2;
} }
} }
done: done2:
ptr[plen] = NULL; nm_assert (i_token == num_tokens);
ptr[i_token] = NULL;
if (ptr != ptr0) {
/* we reallocated the buffer. We must copy over the
* string @s0 and adjust the pointers. */
s = (char *) &ptr[alloc_size + 1];
memcpy (s, s0, str_len);
for (i = 0; i < plen; i++)
ptr[i] = &s[ptr[i] - s0];
g_free (ptr0);
}
return ptr; return ptr;
} }