glib-aux: add nm_ascii_is_ctrl() helper (and similar)

These functions have overlap with g_ascii_is*() functions.

However g_ascii_is*() (and the is* functions from <ctype.h>) are
always confusing to me, in the sense that it's not clearly stated
which characters qualify for a certain category. And review is not
easy either, because they are implemented via a table lookup.

E.g. were you aware that 127 is considered g_ascii_iscntrl()? Probably
you were, but it's not clear to see that anywhere.

The main point of our own functions is to have is easier to see how
characters get categorized, by using comparison instead of table lookup.

Also, several existing code did in fact not use the g_ascii_is*()
macros, possibly because of the (perceived) difficulty to understand
their exact meaning. As a consequence, several checks got wrong.

For example, (ch < ' ') is not a valid check for testing whether
the character is a ASCII control character, for two reasons:

 - if char is a signed type (as likely it is), then this also evaluates
   to TRUE for all non-ASCII, UTF-8 characters that are greater than
   127.

 - it does not consider DEL character (127) a control character.
This commit is contained in:
Thomas Haller 2021-07-16 07:46:23 +02:00
parent bdfaa4520e
commit 5b6005d06e
No known key found for this signature in database
GPG key ID: 29C2366E4DFC5728
2 changed files with 78 additions and 0 deletions

View file

@ -2481,6 +2481,40 @@ nm_hexchar(int x, gboolean upper_case)
return upper_case ? _nm_hexchar_table_upper[x & 15] : _nm_hexchar_table_lower[x & 15];
}
static inline gboolean
nm_ascii_is_ctrl(char ch)
{
/* 0 to ' '-1 is the C0 range.
*
* Other ranges may also be considered control characters, but NOT
* CONSIDERED by this function. For example:
* - DEL (127) is also a control character.
* - SP (' ', 0x20) is also considered a control character.
* - DEL+1 (0x80) to 0x9F is C1 range.
* - NBSP (0xA0) and SHY (0xAD) are ISO 8859 special characters
*/
return ((guchar) ch) < ' ';
}
static inline gboolean
nm_ascii_is_ctrl_or_del(char ch)
{
return ((guchar) ch) < ' ' || ch == 127;
}
static inline gboolean
nm_ascii_is_non_ascii(char ch)
{
return ((guchar) ch) > 127;
}
static inline gboolean
nm_ascii_is_regular(char ch)
{
/* same as(!nm_ascii_is_ctrl_or_del(ch) && !nm_ascii_is_non_ascii(ch)) */
return ch >= ' ' && ch < 127;
}
char *nm_utils_bin2hexstr_full(gconstpointer addr,
gsize length,
char delimiter,

View file

@ -1370,6 +1370,49 @@ test_nm_g_source_sentinel(void)
/*****************************************************************************/
static void
test_nm_ascii(void)
{
int i;
for (i = 0; i < 256; i++) {
const char ch = i;
gboolean is_space;
if (ch == 127) {
g_assert(nm_ascii_is_ctrl_or_del(ch));
g_assert(!nm_ascii_is_ctrl(ch));
} else
g_assert(nm_ascii_is_ctrl_or_del(ch) == nm_ascii_is_ctrl(ch));
g_assert(nm_ascii_is_ctrl_or_del(ch) == g_ascii_iscntrl(ch));
g_assert(nm_ascii_is_non_ascii(ch) == (i >= 128));
g_assert(!nm_ascii_is_ctrl_or_del(ch) || !nm_ascii_is_non_ascii(ch));
g_assert((nm_ascii_is_ctrl_or_del(ch) || nm_ascii_is_regular(ch))
!= nm_ascii_is_non_ascii(ch));
g_assert(nm_ascii_is_regular(ch)
== (!nm_ascii_is_ctrl_or_del(ch) && !nm_ascii_is_non_ascii(ch)));
is_space = g_ascii_isspace(ch);
if (NM_IN_SET(ch, '\t', '\n', '\f', '\r')) {
/* hack is-space, so that the check below works to check for regular ASCII characters. */
g_assert(!nm_ascii_is_regular(ch));
g_assert(is_space);
is_space = FALSE;
}
g_assert(nm_ascii_is_regular(ch)
== (g_ascii_isalnum(ch) || g_ascii_isalpha(ch) || g_ascii_isdigit(ch)
|| g_ascii_isgraph(ch) || g_ascii_islower(ch) || g_ascii_isprint(ch)
|| g_ascii_ispunct(ch) || is_space || g_ascii_isupper(ch)
|| g_ascii_isxdigit(ch)));
}
}
/*****************************************************************************/
NMTST_DEFINE();
int
@ -1402,6 +1445,7 @@ main(int argc, char **argv)
g_test_add_func("/general/test_strv_dup_packed", test_strv_dup_packed);
g_test_add_func("/general/test_utils_hashtable_cmp", test_utils_hashtable_cmp);
g_test_add_func("/general/test_nm_g_source_sentinel", test_nm_g_source_sentinel);
g_test_add_func("/general/test_nm_ascii", test_nm_ascii);
return g_test_run();
}