Pass through literal space values from replacement lists.

This makes test 15 pass and also dramatically simplifies the lexer.

We were previously using a CONTROL state in the lexer to only emit
SPACE tokens when on text lines. But that's not actually what we
want. We need SPACE tokens in the replacement lists as well. Instead
of a lexer state for this, we now simply set a "space_tokens" flag
whenever we start constructing a pp_tokens list and clear the flag
whenever we see a '#' introducing a directive.

Much cleaner this way.
This commit is contained in:
Carl Worth 2010-05-25 16:59:02 -07:00
parent b1854fdfb6
commit f34a0009dd
3 changed files with 48 additions and 94 deletions

View file

@ -32,21 +32,6 @@
%option reentrant noyywrap
%option extra-type="glcpp_parser_t *"
/* This lexer has two states:
*
* The CONTROL state is for control lines (directives)
* It lexes exactly as specified in the C99 specification.
*
* The INITIAL state is for input lines. In this state, we
* make the OTHER token much more broad in that it now
* includes tokens consisting entirely of whitespace. This
* allows us to pass text through verbatim. It avoids the
* "inadvertent token pasting" problem that would occur if we
* just printed tokens, while also avoiding excess whitespace
* insertion in the output.*/
%x CONTROL
SPACE [[:space:]]
NONSPACE [^[:space:]]
NEWLINE [\n]
@ -63,97 +48,68 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
%%
{HASH}define{HSPACE}+/{IDENTIFIER}"(" {
BEGIN CONTROL;
yyextra->space_tokens = 0;
return HASH_DEFINE_FUNC;
}
{HASH}define {
BEGIN CONTROL;
yyextra->space_tokens = 0;
return HASH_DEFINE_OBJ;
}
{HASH}undef {
BEGIN CONTROL;
yyextra->space_tokens = 0;
return HASH_UNDEF;
}
{HASH} {
BEGIN CONTROL;
yyextra->space_tokens = 0;
return HASH;
}
<CONTROL>{IDENTIFIER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<CONTROL>"<<" {
return LEFT_SHIFT;
}
<CONTROL>">>" {
return RIGHT_SHIFT;
}
<CONTROL>"<=" {
return LESS_OR_EQUAL;
}
<CONTROL>">=" {
return GREATER_OR_EQUAL;
}
<CONTROL>"==" {
return EQUAL;
}
<CONTROL>"!=" {
return NOT_EQUAL;
}
<CONTROL>"&&" {
return AND;
}
<CONTROL>"||" {
return OR;
}
<CONTROL>"##" {
return PASTE;
}
<CONTROL>{PUNCTUATION} {
return yytext[0];
}
<CONTROL>{OTHER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return OTHER;
}
<CONTROL>{HSPACE}+
<CONTROL>\n {
BEGIN INITIAL;
return NEWLINE;
}
{IDENTIFIER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
"(" {
return '(';
"<<" {
return LEFT_SHIFT;
}
")" {
return ')';
">>" {
return RIGHT_SHIFT;
}
"," {
return ',';
"<=" {
return LESS_OR_EQUAL;
}
">=" {
return GREATER_OR_EQUAL;
}
"==" {
return EQUAL;
}
"!=" {
return NOT_EQUAL;
}
"&&" {
return AND;
}
"||" {
return OR;
}
"##" {
return PASTE;
}
{PUNCTUATION} {
return yytext[0];
}
{OTHER}+ {
@ -162,17 +118,14 @@ HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]?
}
{HSPACE}+ {
yylval.str = xtalloc_strdup (yyextra, yytext);
return SPACE;
if (yyextra->space_tokens) {
yylval.str = xtalloc_strdup (yyextra, yytext);
return SPACE;
}
}
\n {
return NEWLINE;
}
. {
yylval.str = xtalloc_strdup (yyextra, yytext);
return OTHER;
}
%%

View file

@ -160,7 +160,7 @@ line:
;
control_line:
HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE {
HASH_DEFINE_OBJ IDENTIFIER replacement_list NEWLINE {
_define_object_macro (parser, $2, $3);
}
| HASH_DEFINE_FUNC IDENTIFIER '(' ')' replacement_list NEWLINE {
@ -212,6 +212,7 @@ replacement_list:
pp_tokens:
preprocessing_token {
parser->space_tokens = 1;
$$ = _token_list_create (parser);
_token_list_append ($$, $1);
talloc_unlink (parser, $1);
@ -234,7 +235,7 @@ preprocessing_token:
$$ = _token_create_str (parser, OTHER, $1);
}
| SPACE {
$$ = _token_create_str (parser, OTHER, $1);
$$ = _token_create_str (parser, SPACE, $1);
}
;
@ -494,6 +495,7 @@ _token_print (token_t *token)
switch (token->type) {
case IDENTIFIER:
case OTHER:
case SPACE:
printf ("%s", token->value.str);
break;
case LEFT_SHIFT:
@ -589,6 +591,7 @@ glcpp_parser_create (void)
parser->defines = hash_table_ctor (32, hash_table_string_hash,
hash_table_string_compare);
parser->active = _string_list_create (parser);
parser->space_tokens = 1;
parser->expansions = NULL;
parser->just_printed_separator = 1;
@ -835,9 +838,6 @@ _glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser,
for (node = list->head; node; node = node->next) {
if (_glcpp_parser_print_expanded_token (parser, node->token))
_glcpp_parser_print_expanded_function (parser, &node);
if (node->next)
printf (" ");
}
}

View file

@ -126,6 +126,7 @@ struct glcpp_parser {
yyscan_t scanner;
struct hash_table *defines;
string_list_t *active;
int space_tokens;
expansion_node_t *expansions;
int just_printed_separator;
int need_newline;