ifcfg-rh: rework parsing of shell (svUnescape())

Better support parsing of shell. Now we support:

- combining values, like
    FOO=a"b"
    FOO=$'\n'b

- bash style ANSI C quotation ($''). This will allow us to properly
  handle newlines in string values.

- comments at the end of a line (after whitespace)
    FOO=val #comment
  Note that this is different from a # without space
    FOO=val#with#hashes

- trailing spaces are ignored like

    FOO=a[space]
    FOR=[space]

- history expansion via ! is not done (this is not new).

We don't support:

- line continuation like

    FOO='
    '
    FOO=a\
    b

- any form of shell expansion via $, ``.

    FOO="$a"

  Such values are recognized to name a variable FOO, but with an
  empty value, like

    FOO=%{nil}

  which is not the same as a valid empty value

    FOO=

- any other form of (unquoted) shell meta characters, like ; < > ( ).
  This especially means, that the command invocations are invalid, like

    ls -1
    LANG=C ls -1
    FOO1=a; FOO2=b

  This also means, that spaces immidiately after the assignment are invalid:

    FOO= val

Also, svUnescape() can now return %NULL to signal an invalid line like

    FOO='
This commit is contained in:
Thomas Haller 2016-10-31 17:48:15 +01:00
parent f3c7d009b5
commit 7b548fb9a8
2 changed files with 550 additions and 105 deletions

View file

@ -148,73 +148,313 @@ svEscape (const char *s, char **to_free)
return new;
}
/* remove escaped characters in place */
static gboolean
_ch_octal_is (char ch)
{
return ch >= '0' && ch < '8';
}
static guint8
_ch_octal_get (char ch)
{
nm_assert (_ch_octal_is (ch));
return (ch - '0');
}
static gboolean
_ch_hex_is (char ch)
{
return g_ascii_isxdigit (ch);
}
static guint8
_ch_hex_get (char ch)
{
nm_assert (_ch_hex_is (ch));
return ch <= '9' ? ch - '0' : (ch & 0x4F) - 'A' + 10;
}
static void
_gstr_init (GString **str, const char *value, gsize i)
{
nm_assert (str);
nm_assert (value);
if (!(*str)) {
/* if @str is not yet initialized, it allocates
* a new GString and copies @i characters from
* @value over.
*
* Unescaping usually does not extend the length of a string,
* so we might be tempted to allocate a fixed buffer of length
* (strlen(value)+CONST).
* However, due to $'\Ux' escapes, the maxium length is some
* (FACTOR*strlen(value) + CONST), which is non trivial to get
* right in all cases. Also, we would have to provision for the
* very unlikely extreme case.
* Instead, use a GString buffer which can grow as needed. But for an
* initial guess, strlen(value) is a good start */
*str = g_string_new_len (NULL, strlen (value) + 3);
if (i)
g_string_append_len (*str, value, i);
}
}
const char *
svUnescape (const char *value, char **to_free)
{
size_t len, idx_rd = 0, idx_wr = 0;
char c;
char *s;
gsize i, j;
nm_auto_free_gstring GString *str = NULL;
/* we handle bash syntax here (note that ifup has #!/bin/bash.
* Thus, see https://www.gnu.org/software/bash/manual/html_node/Quoting.html#Quoting */
/* @value shall start with the first character after "FOO=" */
nm_assert (value);
nm_assert (to_free);
/* TODO: avoid copying the string if there is nothing to do. */
s = g_strchomp (g_strdup (value));
*to_free = s;
/* we don't expect any newlines. They must be filtered out before-hand.
* We also don't support line continuation. */
nm_assert (!NM_STRCHAR_ANY (value, ch, ch == '\n'));
len = strlen (s);
if (len < 2) {
if (s[0] == '\\')
s[0] = '\0';
return s;
i = 0;
while (TRUE) {
if (value[i] == '\0')
goto out_value;
if ( g_ascii_isspace (value[i])
|| value[i] == ';') {
gboolean has_semicolon = (value[i] == ';');
/* starting with space is only allowed, if the entire
* string consists of spaces (possibly terminated by a comment).
* This disallows for example
* LANG=C ls -1
* LANG= ls -1
* but allows
* LANG= #comment
*
* As a special case, we also allow one trailing semicolon, as long
* it is only followed by whitespace or a #-comment.
* FOO=;
* FOO=a;
* FOO=b ; #hallo
*/
j = i + 1;
while ( g_ascii_isspace (value[j])
|| ( !has_semicolon
&& (has_semicolon = (value[j] == ';'))))
j++;
if (!NM_IN_SET (value[j], '\0', '#'))
goto out_error;
goto out_value;
}
if (value[i] == '\\') {
/* backslash escape */
_gstr_init (&str, value, i);
i++;
if (G_UNLIKELY (value[i] == '\0')) {
/* we don't support line continuation */
goto out_error;
}
g_string_append_c (str, value[i]);
i++;
goto loop1_next;
}
if (value[i] == '\'') {
/* single quotes */
_gstr_init (&str, value, i);
i++;
j = i;
while (TRUE) {
if (value[j] == '\0') {
/* unterminated single quote. We don't support line continuation */
goto out_error;
}
if (value[j] == '\'')
break;
j++;
}
g_string_append_len (str, &value[i], j - i);
i = j + 1;
goto loop1_next;
}
if (value[i] == '"') {
/* double quotes */
_gstr_init (&str, value, i);
i++;
while (TRUE) {
if (value[i] == '"') {
i++;
break;
}
if (value[i] == '\0') {
/* unterminated double quote. We don't support line continuation. */
goto out_error;
}
if (NM_IN_SET (value[i], '`', '$')) {
/* we don't support shell expansion. */
goto out_error;
}
if (value[i] == '\\') {
i++;
if (value[i] == '\0') {
/* we don't support line continuation */
goto out_error;
}
if (!NM_IN_SET (value[i], '$', '`', '"', '\\')) {
/* TODO: svEscape() is not yet ready to handle properly treating
* double quotes. */
//g_string_append_c (str, '\\');
}
}
g_string_append_c (str, value[i]);
i++;
}
goto loop1_next;
}
if ( value[i] == '$'
&& value[i + 1] == '\'') {
/* ANSI-C Quoting */
_gstr_init (&str, value, i);
i += 2;
while (TRUE) {
char ch;
if (value[i] == '\'') {
i++;
break;
}
if (value[i] == '\0') {
/* unterminated double quote. We don't support line continuation. */
goto out_error;
}
if (value[i] == '\\') {
i++;
if (value[i] == '\0') {
/* we don't support line continuation */
goto out_error;
}
switch (value[i]) {
case 'a': ch = '\a'; break;
case 'b': ch = '\b'; break;
case 'e': ch = '\e'; break;
case 'E': ch = '\E'; break;
case 'f': ch = '\f'; break;
case 'n': ch = '\n'; break;
case 'r': ch = '\r'; break;
case 't': ch = '\t'; break;
case 'v': ch = '\v'; break;
case '?': ch = '\?'; break;
case '"': ch = '"'; break;
case '\\': ch = '\\'; break;
case '\'': ch = '\''; break;
default:
if (_ch_octal_is (value[i])) {
guint v;
v = _ch_octal_get (value[i]);
i++;
if (_ch_octal_is (value[i])) {
v = (v * 8) + _ch_octal_get (value[i]);
i++;
if (_ch_octal_is (value[i])) {
v = (v * 8) + _ch_octal_get (value[i]);
i++;
}
}
/* like bash, we cut too large numbers off. E.g. A=$'\772' becomes 0xfa */
g_string_append_c (str, (guint8) v);
} else if (NM_IN_SET (value[i], 'x', 'u', 'U')) {
const char escape_type = value[i];
int max_digits = escape_type == 'x' ? 2 : escape_type == 'u' ? 4 : 8;
guint64 v;
i++;
if (!_ch_hex_is (value[i])) {
/* missing hex value after "\x" escape. This is treated like no escaping. */
g_string_append_c (str, '\\');
g_string_append_c (str, escape_type);
} else {
v = _ch_hex_get (value[i]);
i++;
while (--max_digits > 0) {
if (!_ch_hex_is (value[i]))
break;
v = v * 16 + _ch_hex_get (value[i]);
i++;
}
if (escape_type == 'x')
g_string_append_c (str, v);
else {
/* we treat the unicode escapes as utf-8 encoded values. */
g_string_append_unichar (str, v);
}
}
} else {
g_string_append_c (str, '\\');
g_string_append_c (str, value[i]);
i++;
}
goto loop_ansic_next;
}
} else
ch = value[i];
g_string_append_c (str, ch);
i++;
loop_ansic_next: ;
}
goto loop1_next;
}
if (NM_IN_SET (value[i], '|', '&', '(', ')', '<', '>')) {
/* shell metacharacters are not supported without quoting.
* Note that ';' is already handled above. */
goto out_error;
}
/* an unquoted, regular character. Just consume it directly. */
if (str)
g_string_append_c (str, value[i]);
i++;
loop1_next: ;
}
if ((s[0] == '"' || s[0] == '\'') && s[0] == s[len-1]) {
if (len == 2) {
s[0] = '\0';
return s;
}
if (len == 3) {
if (s[1] == '\\') {
s[0] = '\0';
} else {
s[0] = s[1];
s[1] = '\0';
}
return s;
}
s[--len] = '\0';
idx_rd = 1;
nm_assert_not_reached ();
out_value:
if (str) {
*to_free = g_string_free (str, FALSE);
str = NULL;
return *to_free;
} else if (i == 0) {
*to_free = NULL;
/* we could just return "", but I prefer returning a
* pointer into @value for consistency. Thus, seek to the
* end. */
while (value[0])
value++;
return value;
} else if (value[i] != '\0') {
*to_free = g_strndup (value, i);
return *to_free;
} else {
/* seek for the first escape... */
char *p = strchr (s, '\\');
if (!p)
return s;
if (p[1] == '\0') {
p[0] = '\0';
return s;
}
idx_wr = idx_rd = (p - s);
*to_free = NULL;
return value;
}
/* idx_rd points to the first escape. Walk the string and shift the
* characters from idx_rd to idx_wr.
*/
while ((c = s[idx_rd++])) {
if (c == '\\') {
if (s[idx_rd] == '\0') {
s[idx_wr] = '\0';
return s;
}
s[idx_wr++] = s[idx_rd++];
continue;
}
s[idx_wr++] = c;
}
s[idx_wr] = '\0';
return s;
out_error:
*to_free = NULL;
return NULL;
}
/*****************************************************************************/
@ -539,7 +779,7 @@ svSetValue (shvarFile *s, const char *key, const char *value)
current = last;
oldval = svUnescape (oldval, &oldval_free);
if (!nm_streq (oldval, value)) {
if (!nm_streq0 (oldval, value)) {
g_free (current->data);
current->data = line_construct (key, value);
s->modified = TRUE;

View file

@ -365,8 +365,8 @@ test_read_variables_corner_cases (void)
/* ===== CONNECTION SETTING ===== */
s_con = nm_connection_get_setting_connection (connection);
g_assert (s_con);
g_assert_cmpstr (nm_setting_connection_get_id (s_con), ==, "\"");
g_assert_cmpstr (nm_setting_connection_get_zone (s_con), ==, "'");
g_assert_cmpstr (nm_setting_connection_get_id (s_con), ==, "System test-variables-corner-cases-1");
g_assert_cmpstr (nm_setting_connection_get_zone (s_con), ==, NULL);
g_assert_cmpint (nm_setting_connection_get_timestamp (s_con), ==, 0);
g_assert (nm_setting_connection_get_autoconnect (s_con));
@ -451,7 +451,7 @@ test_read_unrecognized (void)
/* ===== CONNECTION SETTING ===== */
s_con = nm_connection_get_setting_connection (connection);
g_assert (s_con);
g_assert_cmpstr (nm_setting_connection_get_id (s_con), ==, "U Can't Touch This");
g_assert_cmpstr (nm_setting_connection_get_id (s_con), ==, "System test-unrecognized");
g_assert_cmpint (nm_setting_connection_get_timestamp (s_con), ==, expected_timestamp);
g_object_unref (connection);
@ -8695,71 +8695,276 @@ test_read_team_port_empty_config (void)
g_object_unref (connection);
}
/*****************************************************************************/
static const char *
_svUnescape (const char *str, char **to_free)
{
const char *s;
g_assert (str);
g_assert (to_free);
s = svUnescape (str, to_free);
if (*to_free)
g_assert (s == *to_free);
else {
g_assert ( s == NULL
|| (s >= str && s <= strchr (str, '\0')));
}
return s;
}
typedef struct {
const char *val;
const char *exp;
bool can_concat:1;
bool needs_ascii_separator:1;
} UnescapeTestData;
static void
test_svUnescape_assert (const char *str)
do_svUnescape_assert (const char *str, const char *expected)
{
gs_free char *to_free = NULL;
const char *s;
s = svUnescape (str, &to_free);
s = _svUnescape (str, &to_free);
g_assert_cmpstr (s, ==, expected);
}
static void
do_svUnescape_combine_ansi_append (GString *str_val, GString *str_exp, const UnescapeTestData *data, gboolean honor_needs_ascii_separator)
{
g_string_append (str_val, data->val);
g_string_append (str_exp, data->exp);
if (honor_needs_ascii_separator && data->needs_ascii_separator) {
/* the string has an open escape sequence. We must ensure that when
* combining it with another sequence, that they don't merge into
* something diffent. for example "\xa" + "a" must not result in
* "\xaa". Instead, we add a space in between to get "\xa a". */
g_string_append (str_val, " ");
g_string_append (str_exp, " ");
}
}
static void
do_svUnescape_combine_ansi (GString *str_val, GString *str_exp, const UnescapeTestData *data_ansi, gsize data_len, gssize idx)
{
gsize i, j;
g_string_set_size (str_val, 0);
g_string_set_size (str_exp, 0);
g_string_append (str_val, "$'");
if (idx < 0) {
for (i = -idx; i > 0; i--) {
j = nmtst_get_rand_int () % data_len;
if (!data_ansi[j].can_concat) {
i++;
continue;
}
do_svUnescape_combine_ansi_append (str_val, str_exp, &data_ansi[j], i > 1);
}
} else {
g_assert_cmpint (idx, <, data_len);
do_svUnescape_combine_ansi_append (str_val, str_exp, &data_ansi[idx], FALSE);
}
g_string_append (str_val, "'");
}
static void
test_svUnescape (void)
{
int len, repeat, i, k;
GRand *r = g_rand_new ();
guint32 seed = g_random_int ();
#define V0(v_value, v_expected) { .val = ""v_value"", .exp = v_expected, .can_concat = FALSE, }
#define V1(v_value, v_expected) { .val = ""v_value"", .exp = v_expected, .can_concat = !!v_expected, }
#define V2(v_value, v_expected) { .val = ""v_value"", .exp = v_expected, .can_concat = TRUE, .needs_ascii_separator = TRUE, }
const UnescapeTestData data_full[] = {
V1 ("", ""),
V0 ("'", NULL),
V1 ("'x'", "x"),
V1 ("' '", " "),
V1 ("'x'", "x"),
V0 ("\"", NULL),
V0 ("\\", NULL),
V0 (" ", ""),
V0 (" ", ""),
V0 ("a; #", "a"),
V0 (" ; #", ""),
V0 ("; ", ""),
V0 ("; ;", NULL),
V0 (" ; a #", NULL),
V0 (" ; a;; #", NULL),
V0 ("a; ; #", NULL),
V0 ("\t # ", ""),
V0 ("\t #a", ""),
V0 ("\t #a\r", ""),
V0 ("\r", ""),
V0 ("ab\r", "ab"),
V0 ("a'b'\r ", "ab"),
V0 ("a'b' \r", "ab"),
V0 ("a#b", "a#b"),
V0 ("#b", "#b"),
V1 ("\'some string\'", "some string"),
V0 ("Bob outside LAN", NULL),
V1 ("x", "x"),
V1 ("'{ \"device\": \"team0\", \"link_watch\": { \"name\": \"ethtool\" } }'",
"{ \"device\": \"team0\", \"link_watch\": { \"name\": \"ethtool\" } }"),
V1 ("x\"\"b", "xb"),
V1 ("x\"c\"b", "xcb"),
V1 ("\"c\"b", "cb"),
V1 ("\"c\"\\'b", "c'b"),
V1 ("$''", ""),
V1 ("$'\\n'", "\n"),
V0 ("$'\\'", NULL),
V1 ("$'\\x'", "\\x"),
V1 ("$'\\xa'", "\xa"),
V0 ("$'\\x0'", ""),
V1 ("$'\\x12'", "\x12"),
V1 ("$'\\x12A'", "\x12""A"),
V1 ("$'\\x12t'", "\x12t"),
V1 ("\"aa\\\"\"", "aa\""),
V1 ("\"aa\\\"b\"c", "aa\"bc"),
V1 ("\"aa\\\"\"b", "aa\"b"),
};
const UnescapeTestData data_ansi[] = {
/* strings inside $''. They cannot be compared directly, but must
* be wrapped by do_svUnescape_combine_ansi(). */
V1 ("", ""),
V1 ("a", "a"),
V1 ("b", "b"),
V1 ("x", "x"),
V1 (" ", " "),
V1 ("\\a", "\a"),
V1 ("\\b", "\b"),
V1 ("\\e", "\e"),
V1 ("\\E", "\E"),
V1 ("\\f", "\f"),
V1 ("\\n", "\n"),
V1 ("\\r", "\r"),
V1 ("\\t", "\t"),
V1 ("\\v", "\v"),
V1 ("\\\\", "\\"),
V1 ("\\'", "'"),
V1 ("\\\"", "\""),
V1 ("\\?", "\?"),
V1 ("\\?", "?"),
V2 ("\\8", "\\8"),
V2 ("\\1", "\1"),
V1 ("\\1A", "\1A"),
V1 ("\\18", "\18"),
V2 ("\\01", "\1"),
V1 ("\\001", "\1"),
V0 ("\\008", ""),
V1 ("\\018", "\0018"),
V0 ("\\08", ""),
V1 ("\\18", "\0018"),
V1 ("\\x", "\\x"),
V2 ("\\xa", "\xa"),
V1 ("\\x12", "\x12"),
V1 ("\\x12A", "\x12""A"),
V1 ("\\x12a", "\x12""a"),
V1 ("\\x12t", "\x12t"),
V1 ("\\x1a", "\x1a"),
V1 ("\\x1A", "\x1A"),
V1 ("\\ut", "\\ut"),
V2 ("\\ua", "\xa"),
V1 ("\\uat", "\xat"),
V2 ("\\uab", "\xc2\xab"),
V1 ("\\uabt", "\xc2\xabt"),
V2 ("\\uabc", "\xe0\xaa\xbc"),
V1 ("\\uabct", "\xe0\xaa\xbct"),
V2 ("\\uabcd", "\xea\xaf\x8d"),
V1 ("\\uabcdt", "\xea\xaf\x8dt"),
V2 ("\\uabcde", "\xea\xaf\x8d""e"),
V1 ("\\uabcdet", "\xea\xaf\x8d""et"),
V1 ("\\Ut", "\\Ut"),
V2 ("\\Ua", "\xa"),
V1 ("\\Uat", "\xat"),
V2 ("\\Uab", "\xc2\xab"),
V1 ("\\Uabt", "\xc2\xabt"),
V2 ("\\Uabc", "\xe0\xaa\xbc"),
V1 ("\\Uabct", "\xe0\xaa\xbct"),
V2 ("\\Uabcd", "\xea\xaf\x8d"),
V1 ("\\Uabcdt", "\xea\xaf\x8dt"),
V2 ("\\Uabcde", "\xf2\xab\xb3\x9e"),
V1 ("\\Uabcdet", "\xf2\xab\xb3\x9et"),
V2 ("\\Uabcde0", "\xf8\xaa\xbc\xb7\xa0"),
V1 ("\\Uabcde0t", "\xf8\xaa\xbc\xb7\xa0t"),
V2 ("\\Uabcde01", "\xfc\x8a\xaf\x8d\xb8\x81"),
V1 ("\\Uabcde01t", "\xfc\x8a\xaf\x8d\xb8\x81t"),
V2 ("\\U0abcde01", "\xfc\x8a\xaf\x8d\xb8\x81"),
V1 ("\\U0abcde01t", "\xfc\x8a\xaf\x8d\xb8\x81t"),
V1 ("\\U00abcde01", "\xf8\xaa\xbc\xb7\xa0""1"),
V1 ("\\U00abcde01t", "\xf8\xaa\xbc\xb7\xa0""1t"),
g_rand_set_seed (r, seed);
/* control-x sequence is not supported */
V1 ("\\c", "\\c"),
V1 ("\\c1", "\\c1"),
};
#undef V0
#undef V1
#undef V2
gsize i;
nm_auto_free_gstring GString *str_val = g_string_new (NULL);
nm_auto_free_gstring GString *str_val2 = g_string_new (NULL);
nm_auto_free_gstring GString *str_exp = g_string_new (NULL);
nm_auto_free_gstring GString *str_exp2 = g_string_new (NULL);
test_svUnescape_assert ("");
test_svUnescape_assert ("'");
test_svUnescape_assert ("\"");
test_svUnescape_assert ("\\");
test_svUnescape_assert ("x");
test_svUnescape_assert (" ");
test_svUnescape_assert ("' '");
test_svUnescape_assert ("'x'");
test_svUnescape_assert ("\'some string\'");
test_svUnescape_assert ("Bob outside LAN");
test_svUnescape_assert ("{ \"device\": \"team0\", \"link_watch\": { \"name\": \"ethtool\" } }");
do_svUnescape_assert ( "' '' '", " ");
for (len = 1; len < 25; len++) {
char *s = g_new0 (char, len+1);
for (i = 0; i < G_N_ELEMENTS (data_full); i++)
do_svUnescape_assert (data_full[i].val, data_full[i].exp);
for (repeat = 0; repeat < MAX (4*len, 20); repeat++) {
/* fill the entire string with random. */
for (i = 0; i < len; i++)
s[i] = g_rand_int (r);
/* randomly place escape characters into the string */
k = g_rand_int (r) % (len);
while (k-- > 0)
s[g_rand_int (r) % len] = '\\';
if (len > 1) {
/* quote the string. */
k = g_rand_int (r) % (10);
if (k < 4) {
char quote = k < 2 ? '"' : '\'';
s[0] = quote;
s[len-1] = quote;
}
}
/*g_message (">>%s<<", s);*/
test_svUnescape_assert (s);
}
g_free (s);
for (i = 0; i < G_N_ELEMENTS (data_ansi); i++) {
do_svUnescape_combine_ansi (str_val, str_exp, data_ansi, G_N_ELEMENTS (data_ansi), i);
do_svUnescape_assert (str_val->str, str_exp->str);
}
/* different values can be just concatenated... */
for (i = 0; i < 200; i++) {
gsize num_concat = (nmtst_get_rand_int () % 5) + 2;
g_string_set_size (str_val, 0);
g_string_set_size (str_exp, 0);
while (num_concat > 0) {
gsize idx;
if ((nmtst_get_rand_int () % 3 == 0)) {
do_svUnescape_combine_ansi (str_val2, str_exp2, data_ansi, G_N_ELEMENTS (data_ansi), -((int) ((nmtst_get_rand_int () % 5) + 1)));
continue;
}
idx = nmtst_get_rand_int () % G_N_ELEMENTS (data_full);
if (!data_full[idx].can_concat)
continue;
g_string_append (str_val, data_full[idx].val);
g_string_append (str_exp, data_full[idx].exp);
num_concat--;
}
switch (nmtst_get_rand_int () % 3) {
case 0:
g_string_append (str_val, " ");
break;
case 1:
g_string_append (str_val, " ");
break;
}
switch (nmtst_get_rand_int () % 3) {
case 0:
g_string_append (str_val, " #");
break;
case 1:
g_string_append (str_val, " #foo");
break;
}
do_svUnescape_assert (str_val->str, str_exp->str);
}
g_rand_free (r);
}
/*****************************************************************************/
static void
test_read_vlan_trailing_spaces (void)
{