Don't return SPACE tokens unless strictly needed.

This reverts the unconditional return of SPACE tokens from the lexer
from commit 48b94da099 .

That commit seemed useful because it kept the lexer simpler, but the
presence of SPACE tokens is causing lots of extra complication for the
parser itself, (redundant productions other than whitespace
differences, several productions buggy in the case of extra
whitespace, etc.)

Of course, we'd prefer to never have any whitespace token, but that's
not possible with the need to distinguish between "#define foo()" and
"#define foo ()". So we'll accept a little bit of pain in the lexer,
(enough state to support this special-case token), in exchange for
keeping most of the parser blissffully ignorant of whether tokens are
separated by whitespace or not.

This change does mean that our output now differs from that of "gcc -E",
but only in whitespace. So we test with "diff -w now to ignore those
differences.
This commit is contained in:
Carl Worth 2010-05-14 17:08:45 -07:00
parent 4eb2ccf261
commit 81f01432bd
3 changed files with 34 additions and 19 deletions

View file

@ -32,6 +32,9 @@
%option reentrant noyywrap
%option extra-type="glcpp_parser_t *"
%x ST_DEFINE
%x ST_DEFVAL
SPACE [[:space:]]
NONSPACE [^[:space:]]
NEWLINE [\n]
@ -52,9 +55,31 @@ TOKEN [^[:space:](),]+
* "#define foo()" from "#define foo ()".
*/
{HASH}define{HSPACE}* {
BEGIN ST_DEFINE;
return DEFINE;
}
<ST_DEFINE>{IDENTIFIER} {
BEGIN ST_DEFVAL;
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<ST_DEFVAL>\n {
BEGIN INITIAL;
return NEWLINE;
}
<ST_DEFVAL>{HSPACE}+ {
BEGIN INITIAL;
return SPACE;
}
<ST_DEFVAL>"(" {
BEGIN INITIAL;
return '(';
}
{IDENTIFIER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
switch (glcpp_parser_macro_type (yyextra, yylval.str))
@ -84,8 +109,6 @@ TOKEN [^[:space:](),]+
return NEWLINE;
}
{HSPACE}+ {
return SPACE;
}
{SPACE}+
%%

View file

@ -135,7 +135,6 @@ content:
| '(' { printf ("("); }
| ')' { printf (")"); }
| ',' { printf (","); }
| SPACE { printf (" "); }
;
macro:
@ -156,10 +155,6 @@ argument_list:
$$ = _argument_list_create (parser);
_argument_list_append ($$, $1);
}
| argument_list ',' SPACE argument {
_argument_list_append ($1, $4);
$$ = $1;
}
| argument_list ',' argument {
_argument_list_append ($1, $3);
$$ = $1;
@ -179,12 +174,6 @@ argument:
talloc_free ($2);
$$ = $1;
}
| argument SPACE word {
_string_list_append_item ($1, " ");
_string_list_append_item ($1, $3);
talloc_free ($3);
$$ = $1;
}
| argument '(' argument ')' {
_string_list_append_item ($1, "(");
_string_list_append_list ($1, $3);
@ -209,8 +198,8 @@ directive:
string_list_t *list = _string_list_create (parser);
_define_function_macro (parser, $2, $4, list);
}
| DEFINE IDENTIFIER '(' parameter_list ')' SPACE replacement_list {
_define_function_macro (parser, $2, $4, $7);
| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list {
_define_function_macro (parser, $2, $4, $6);
}
| UNDEF FUNC_MACRO {
string_list_t *replacement = hash_table_find (parser->defines, $2);
@ -256,7 +245,6 @@ replacement_word:
| '(' { $$ = xtalloc_strdup (parser, "("); }
| ')' { $$ = xtalloc_strdup (parser, ")"); }
| ',' { $$ = xtalloc_strdup (parser, ","); }
| SPACE { $$ = xtalloc_strdup (parser, " "); }
;
parameter_list:
@ -373,8 +361,11 @@ _print_string_list (string_list_t *list)
if (list == NULL)
return;
for (node = list->head; node; node = node->next)
for (node = list->head; node; node = node->next) {
printf ("%s", node->str);
if (node->next)
printf (" ");
}
}
argument_list_t *
@ -623,6 +614,7 @@ _expand_function_macro (glcpp_parser_t *parser,
argument_list_t *arguments)
{
string_list_t *result;
macro_t *macro;
result = _string_list_create (parser);

View file

@ -5,5 +5,5 @@ for test in *.c; do
../glcpp < $test > $test.out
gcc -E $test -o $test.gcc
grep -v '^#' < $test.gcc > $test.expected
diff -u $test.expected $test.out
diff -w -u $test.expected $test.out
done