From 5877928b467451ffd55318edc3c149d4b175ee6d Mon Sep 17 00:00:00 2001 From: Thomas Haller Date: Fri, 9 Jul 2021 09:11:40 +0200 Subject: [PATCH] ifcfg: ANSIC escape non-UTF-8 "strings" and preserve valid unicode Note that previously the check if (s[slen] < ' ') { ... return (*to_free = _escape_ansic(s)); } would be TRUE for all UTF-8 characters if `char` is signed. That means, depending on the compiler, we would always ANSI escape all UTF-8 characters. With this patch, we no longer do that! Instead, valid unicode gets now preserved (albeit quoted). On the other hand, always ANSIC escape invalid UTF-8 (regardless of the compiler). ifcfg-rh is really a text based format. If a caller wants to store binary data, they need to escape it first, for example with some own escaping scheme, base64 or bin2hexstr. A caller passing a non-text to svEscape() is likely a bug already and they should have not done that. Still, let svEscape() handle that by using ANSIC escaping. That works as far as escaping is concerned, but likely later will be a problem during unescaping, when the reader expects a valid UTF-8 string. svEscape() is in no place to signal a sensible error, so proceed the best it can, by escaping. --- src/core/settings/plugins/ifcfg-rh/shvar.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/core/settings/plugins/ifcfg-rh/shvar.c b/src/core/settings/plugins/ifcfg-rh/shvar.c index 80644b64fd..548a5b43e3 100644 --- a/src/core/settings/plugins/ifcfg-rh/shvar.c +++ b/src/core/settings/plugins/ifcfg-rh/shvar.c @@ -255,20 +255,36 @@ svEscape(const char *s, char **to_free) gsize slen; gsize i; gsize j; + gboolean all_ascii = TRUE; for (slen = 0; s[slen]; slen++) { if (_char_req_escape(s[slen])) mangle++; else if (_char_req_quotes(s[slen])) requires_quotes = TRUE; - else if (s[slen] < ' ') { + else if (((guchar) s[slen]) < ' ') { /* if the string contains newline we can only express it using ANSI C quotation * (as we don't support line continuation). * Additionally, ANSI control characters look odd with regular quotation, so handle * them too. */ return (*to_free = _escape_ansic(s)); + } else if (((guchar) s[slen]) >= 0177) { + all_ascii = FALSE; + requires_quotes = TRUE; } } + + if (!all_ascii && !g_utf8_validate(s, -1, NULL)) { + /* The string is not valid ASCII/UTF-8. We can escape that via + * _escape_ansic(), however the reader might have a problem to + * do something sensible with the blob later. + * + * This is really a bug of the caller, which should not present us with + * non-text in the first place. But at this place, we cannot handle the + * error better, so just escape it. */ + return (*to_free = _escape_ansic(s)); + } + if (!mangle && !requires_quotes) { *to_free = NULL; return s;