Rewrite macro handling to support function-like macro invocation in macro values

The rewrite her discards the functions that did direct, recursive
expansion of macro values. Instead, the parser now pushes the macro
definition string over to a stack of buffers for the lexer. This way,
macro expansion gets access to all parsing machinery.

This isn't a small change, but the result is simpler than before (I
think). It passes the entire test suite, including the four tests
added with the previous commit that were failing before.
This commit is contained in:
Carl Worth 2010-05-18 22:10:04 -07:00
parent d476db38fe
commit a807fb72c4
4 changed files with 343 additions and 271 deletions

View file

@ -27,34 +27,15 @@
#include "glcpp.h"
#include "glcpp-parse.h"
/* Yes, a macro with a return statement in it is evil. But surely no
* more evil than all the code generation happening with flex in the
* first place. */
#define LEXIFY_IDENTIFIER do { \
yylval.str = xtalloc_strdup (yyextra, yytext); \
switch (glcpp_parser_macro_type (yyextra, yylval.str)) \
{ \
case MACRO_TYPE_UNDEFINED: \
return IDENTIFIER; \
break; \
case MACRO_TYPE_OBJECT: \
return OBJ_MACRO; \
break; \
case MACRO_TYPE_FUNCTION: \
return FUNC_MACRO; \
break; \
} \
} while (0)
%}
%option reentrant noyywrap
%option extra-type="glcpp_parser_t *"
%x ST_DEFINE
%x ST_DEFVAL_START
%x ST_DEFVAL
%x ST_DEFINE_OBJ_OR_FUNC
%x ST_DEFINE_PARAMETER
%x ST_DEFINE_VALUE
%x ST_UNDEF
%x ST_UNDEF_END
@ -75,12 +56,14 @@ TOKEN [^[:space:](),]+
<ST_UNDEF>{IDENTIFIER} {
BEGIN ST_UNDEF_END;
LEXIFY_IDENTIFIER;
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<ST_UNDEF_END>{HSPACE}*
<ST_UNDEF_END>\n {
BEGIN INITIAL;
return NEWLINE;
}
/* We use the ST_DEFINE and ST_DEFVAL states so that we can
@ -94,48 +77,73 @@ TOKEN [^[:space:](),]+
}
<ST_DEFINE>{IDENTIFIER} {
BEGIN ST_DEFVAL_START;
BEGIN ST_DEFINE_OBJ_OR_FUNC;
yylval.str = xtalloc_strdup (yyextra, yytext);
return IDENTIFIER;
}
<ST_DEFVAL_START>\n {
<ST_DEFINE_OBJ_OR_FUNC>\n {
BEGIN INITIAL;
return NEWLINE;
yylval.str = xtalloc_strdup (yyextra, "");
return REPLACEMENT;
}
<ST_DEFVAL_START>{HSPACE}+ {
BEGIN ST_DEFVAL;
return SPACE;
<ST_DEFINE_OBJ_OR_FUNC>{HSPACE}+ {
BEGIN ST_DEFINE_VALUE;
}
<ST_DEFVAL_START>"(" {
BEGIN ST_DEFVAL;
<ST_DEFINE_OBJ_OR_FUNC>"(" {
BEGIN ST_DEFINE_PARAMETER;
return '(';
}
<ST_DEFVAL>{IDENTIFIER} {
LEXIFY_IDENTIFIER;
}
<ST_DEFVAL>[(),] {
return yytext[0];
}
<ST_DEFVAL>{TOKEN} {
<ST_DEFINE_PARAMETER>{IDENTIFIER} {
yylval.str = xtalloc_strdup (yyextra, yytext);
return TOKEN;
return IDENTIFIER;
}
<ST_DEFVAL>\n {
<ST_DEFINE_PARAMETER>"," {
return ',';
}
<ST_DEFINE_PARAMETER>")" {
BEGIN ST_DEFINE_VALUE;
return ')';
}
<ST_DEFINE_PARAMETER>{HSPACE}+
<ST_DEFINE_VALUE>.*\n {
BEGIN INITIAL;
return NEWLINE;
yylval.str = xtalloc_strndup (yyextra, yytext, strlen (yytext) - 1);
return REPLACEMENT;
}
<ST_DEFVAL>{HSPACE}+
{IDENTIFIER} {
LEXIFY_IDENTIFIER;
int parameter_index;
yylval.str = xtalloc_strdup (yyextra, yytext);
switch (glcpp_parser_classify_token (yyextra, yylval.str,
&parameter_index))
{
case TOKEN_CLASS_ARGUMENT:
talloc_free (yylval.str);
/* We don't return a value here since the
* current token will be replaced by new
* tokens. */
glcpp_parser_push_expansion_argument (yyextra,
parameter_index);
break;
case TOKEN_CLASS_IDENTIFIER:
return IDENTIFIER;
break;
case TOKEN_CLASS_FUNC_MACRO:
return FUNC_MACRO;
break;
case TOKEN_CLASS_OBJ_MACRO:
return OBJ_MACRO;
break;
}
}
[(),] {
@ -153,4 +161,54 @@ TOKEN [^[:space:](),]+
{HSPACE}+
<<EOF>> {
int done;
done = glcpp_lex_stack_pop (yyextra->lex_stack);
if (done)
yyterminate ();
glcpp_parser_pop_expansion (yyextra);
}
%%
void
glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string)
{
struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner;
glcpp_lex_node_t *node;
/* Save the current buffer on the top of the stack. */
node = xtalloc (stack, glcpp_lex_node_t);
node->buffer = YY_CURRENT_BUFFER;
node->next = stack->head;
stack->head = node;
/* Then switch to a new scan buffer for string. */
yy_scan_string (string, stack->parser->scanner);
}
int
glcpp_lex_stack_pop (glcpp_lex_stack_t *stack)
{
struct yyguts_t *yyg = (struct yyguts_t*) stack->parser->scanner;
glcpp_lex_node_t *node;
node = stack->head;
if (node == NULL)
return 1;
stack->head = node->next;
yy_delete_buffer (YY_CURRENT_BUFFER, stack->parser->scanner);
yy_switch_to_buffer ((YY_BUFFER_STATE) node->buffer,
stack->parser->scanner);
talloc_free (node);
return 0;
}

View file

@ -25,41 +25,29 @@
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <talloc.h>
#include "glcpp.h"
#define YYLEX_PARAM parser->scanner
typedef struct {
int is_function;
string_list_t *parameters;
string_list_t *replacements;
} macro_t;
struct glcpp_parser {
yyscan_t scanner;
struct hash_table *defines;
};
void
yyerror (void *scanner, const char *error);
void
_define_object_macro (glcpp_parser_t *parser,
const char *macro,
string_list_t *replacements);
const char *replacement);
void
_define_function_macro (glcpp_parser_t *parser,
const char *macro,
string_list_t *parameters,
string_list_t *replacements);
const char *replacement);
string_list_t *
void
_expand_object_macro (glcpp_parser_t *parser, const char *identifier);
string_list_t *
void
_expand_function_macro (glcpp_parser_t *parser,
const char *identifier,
argument_list_t *arguments);
@ -76,12 +64,6 @@ _string_list_append_item (string_list_t *list, const char *str);
void
_string_list_append_list (string_list_t *list, string_list_t *tail);
void
_string_list_push (string_list_t *list, const char *str);
void
_string_list_pop (string_list_t *list);
int
_string_list_contains (string_list_t *list, const char *member, int *index);
@ -111,9 +93,9 @@ _argument_list_member_at (argument_list_t *list, int index);
%parse-param {glcpp_parser_t *parser}
%lex-param {void *scanner}
%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO SPACE TOKEN UNDEF
%type <str> FUNC_MACRO IDENTIFIER identifier_perhaps_macro OBJ_MACRO replacement_word TOKEN word
%type <string_list> argument macro parameter_list replacement_list
%token DEFINE FUNC_MACRO IDENTIFIER NEWLINE OBJ_MACRO REPLACEMENT TOKEN UNDEF
%type <str> FUNC_MACRO IDENTIFIER OBJ_MACRO REPLACEMENT TOKEN word
%type <string_list> argument macro parameter_list
%type <argument_list> argument_list
/* Hard to remove shift/reduce conflicts documented as follows:
@ -145,21 +127,21 @@ content:
printf ("%s", $1);
talloc_free ($1);
}
| macro {
_print_string_list ($1);
| directive {
printf ("\n");
}
| directive_with_newline { printf ("\n"); }
| '(' { printf ("("); }
| ')' { printf (")"); }
| ',' { printf (","); }
| macro
;
macro:
FUNC_MACRO '(' argument_list ')' {
$$ = _expand_function_macro (parser, $1, $3);
_expand_function_macro (parser, $1, $3);
}
| OBJ_MACRO {
$$ = _expand_object_macro (parser, $1);
_expand_object_macro (parser, $1);
talloc_free ($1);
}
;
@ -184,7 +166,7 @@ argument:
_string_list_append_item ($$, $1);
}
| macro {
$$ = $1;
$$ = _string_list_create (parser);
}
| argument word {
_string_list_append_item ($1, $2);
@ -199,93 +181,42 @@ argument:
}
;
directive_with_newline:
directive NEWLINE
;
directive:
DEFINE IDENTIFIER {
string_list_t *list = _string_list_create (parser);
_define_object_macro (parser, $2, list);
DEFINE IDENTIFIER REPLACEMENT {
_define_object_macro (parser, $2, $3);
}
| DEFINE IDENTIFIER SPACE replacement_list {
_define_object_macro (parser, $2, $4);
}
| DEFINE IDENTIFIER '(' parameter_list ')' {
string_list_t *list = _string_list_create (parser);
_define_function_macro (parser, $2, $4, list);
}
| DEFINE IDENTIFIER '(' parameter_list ')' replacement_list {
| DEFINE IDENTIFIER '(' parameter_list ')' REPLACEMENT {
_define_function_macro (parser, $2, $4, $6);
}
| UNDEF FUNC_MACRO {
string_list_t *replacement = hash_table_find (parser->defines, $2);
if (replacement) {
| UNDEF IDENTIFIER {
string_list_t *macro = hash_table_find (parser->defines, $2);
if (macro) {
/* XXX: Need hash table to support a real way
* to remove an element rather than prefixing
* a new node with data of NULL like this. */
hash_table_insert (parser->defines, NULL, $2);
talloc_free (replacement);
talloc_free (macro);
}
talloc_free ($2);
}
| UNDEF OBJ_MACRO {
string_list_t *replacement = hash_table_find (parser->defines, $2);
if (replacement) {
/* XXX: Need hash table to support a real way
* to remove an element rather than prefixing
* a new node with data of NULL like this. */
hash_table_insert (parser->defines, NULL, $2);
talloc_free (replacement);
}
talloc_free ($2);
}
;
replacement_list:
replacement_word {
$$ = _string_list_create (parser);
_string_list_append_item ($$, $1);
talloc_free ($1);
}
| replacement_list replacement_word {
_string_list_append_item ($1, $2);
talloc_free ($2);
$$ = $1;
}
;
replacement_word:
word { $$ = $1; }
| FUNC_MACRO { $$ = $1; }
| OBJ_MACRO { $$ = $1; }
| '(' { $$ = xtalloc_strdup (parser, "("); }
| ')' { $$ = xtalloc_strdup (parser, ")"); }
| ',' { $$ = xtalloc_strdup (parser, ","); }
;
parameter_list:
/* empty */ {
$$ = _string_list_create (parser);
}
| identifier_perhaps_macro {
| IDENTIFIER {
$$ = _string_list_create (parser);
_string_list_append_item ($$, $1);
talloc_free ($1);
}
| parameter_list ',' identifier_perhaps_macro {
| parameter_list ',' IDENTIFIER {
_string_list_append_item ($1, $3);
talloc_free ($3);
$$ = $1;
}
;
identifier_perhaps_macro:
IDENTIFIER { $$ = $1; }
| FUNC_MACRO { $$ = $1; }
| OBJ_MACRO { $$ = $1; }
;
word:
IDENTIFIER { $$ = $1; }
| TOKEN { $$ = $1; }
@ -336,45 +267,6 @@ _string_list_append_item (string_list_t *list, const char *str)
list->tail = node;
}
void
_string_list_push (string_list_t *list, const char *str)
{
string_node_t *node;
node = xtalloc (list, string_node_t);
node->str = xtalloc_strdup (node, str);
node->next = list->head;
if (list->tail == NULL) {
list->tail = node;
}
list->head = node;
}
void
_string_list_pop (string_list_t *list)
{
string_node_t *node;
node = list->head;
if (node == NULL) {
fprintf (stderr, "Internal error: _string_list_pop called on an empty list.\n");
exit (1);
}
list->head = node->next;
if (list->tail == node) {
assert (node->next == NULL);
list->tail = NULL;
}
talloc_free (node);
}
int
_string_list_contains (string_list_t *list, const char *member, int *index)
{
@ -512,6 +404,11 @@ glcpp_parser_create (void)
yylex_init_extra (parser, &parser->scanner);
parser->defines = hash_table_ctor (32, hash_table_string_hash,
hash_table_string_compare);
parser->expansions = NULL;
parser->lex_stack = xtalloc (parser, glcpp_lex_stack_t);
parser->lex_stack->parser = parser;
parser->lex_stack->head = NULL;
return parser;
}
@ -530,26 +427,46 @@ glcpp_parser_destroy (glcpp_parser_t *parser)
talloc_free (parser);
}
macro_type_t
glcpp_parser_macro_type (glcpp_parser_t *parser, const char *identifier)
token_class_t
glcpp_parser_classify_token (glcpp_parser_t *parser,
const char *identifier,
int *parameter_index)
{
macro_t *macro;
/* First we check if we are currently expanding a
* function-like macro, and if so, whether the parameter list
* contains a parameter matching this token name. */
if (parser->expansions &&
parser->expansions->macro &&
parser->expansions->macro->parameters)
{
string_list_t *list;
list = parser->expansions->macro->parameters;
if (_string_list_contains (list, identifier, parameter_index))
return TOKEN_CLASS_ARGUMENT;
}
/* If not a function-like macro parameter, we next check if
* this token is a macro itself. */
macro = hash_table_find (parser->defines, identifier);
if (macro == NULL)
return MACRO_TYPE_UNDEFINED;
return TOKEN_CLASS_IDENTIFIER;
if (macro->is_function)
return MACRO_TYPE_FUNCTION;
return TOKEN_CLASS_FUNC_MACRO;
else
return MACRO_TYPE_OBJECT;
return TOKEN_CLASS_OBJ_MACRO;
}
void
_define_object_macro (glcpp_parser_t *parser,
const char *identifier,
string_list_t *replacements)
const char *replacement)
{
macro_t *macro;
@ -557,7 +474,8 @@ _define_object_macro (glcpp_parser_t *parser,
macro->is_function = 0;
macro->parameters = NULL;
macro->replacements = talloc_steal (macro, replacements);
macro->identifier = talloc_strdup (macro, identifier);
macro->replacement = talloc_steal (macro, replacement);
hash_table_insert (parser->defines, macro, identifier);
}
@ -566,7 +484,7 @@ void
_define_function_macro (glcpp_parser_t *parser,
const char *identifier,
string_list_t *parameters,
string_list_t *replacements)
const char *replacement)
{
macro_t *macro;
@ -574,101 +492,126 @@ _define_function_macro (glcpp_parser_t *parser,
macro->is_function = 1;
macro->parameters = talloc_steal (macro, parameters);
macro->replacements = talloc_steal (macro, replacements);
macro->identifier = talloc_strdup (macro, identifier);
macro->replacement = talloc_steal (macro, replacement);
hash_table_insert (parser->defines, macro, identifier);
}
static string_list_t *
_expand_macro_recursive (glcpp_parser_t *parser,
const char *token,
string_list_t *active,
string_list_t *parameters,
argument_list_t *arguments);
static string_list_t *
_expand_string_list_recursive (glcpp_parser_t *parser,
string_list_t *list,
string_list_t *active,
string_list_t *parameters,
argument_list_t *arguments)
static void
_glcpp_parser_push_expansion_internal (glcpp_parser_t *parser,
macro_t *macro,
argument_list_t *arguments,
const char * replacement)
{
string_list_t *result;
string_list_t *child;
const char *token;
expansion_node_t *node;
node = xtalloc (parser, expansion_node_t);
node->macro = macro;
node->arguments = arguments;
node->next = parser->expansions;
parser->expansions = node;
glcpp_lex_stack_push (parser->lex_stack, replacement);
}
void
glcpp_parser_push_expansion_macro (glcpp_parser_t *parser,
macro_t *macro,
argument_list_t *arguments)
{
_glcpp_parser_push_expansion_internal (parser, macro, arguments,
macro->replacement);
}
void
glcpp_parser_push_expansion_argument (glcpp_parser_t *parser,
int argument_index)
{
argument_list_t *arguments;
string_list_t *argument;
string_node_t *node;
int index;
char *argument_str, *s;
int length;
result = _string_list_create (parser);
arguments = parser->expansions->arguments;
for (node = list->head ; node ; node = node->next) {
token = node->str;
argument = _argument_list_member_at (arguments, argument_index);
/* Don't expand this macro if it's on the active
* stack, (meaning we're already in the process of
* expanding it). */
if (_string_list_contains (active, token, NULL)) {
_string_list_append_item (result, token);
continue;
}
length = 0;
for (node = argument->head; node; node = node->next)
length += strlen (node->str) + 1;
if (_string_list_contains (parameters, token, &index)) {
string_list_t *argument;
argument_str = xtalloc_size (parser, length);
argument = _argument_list_member_at (arguments, index);
child = _expand_string_list_recursive (parser, argument,
active, NULL, NULL);
_string_list_append_list (result, child);
} else {
child = _expand_macro_recursive (parser, token,
active, parameters,
arguments);
_string_list_append_list (result, child);
*argument_str = '\0';
s = argument_str;
for (node = argument->head; node; node = node->next) {
strcpy (s, node->str);
s += strlen (node->str);
if (node->next) {
*s = ' ';
s++;
*s = '\0';
}
}
return result;
_glcpp_parser_push_expansion_internal (parser, NULL, NULL,
argument_str);
}
static string_list_t *
_expand_macro_recursive (glcpp_parser_t *parser,
const char *token,
string_list_t *active,
string_list_t *parameters,
argument_list_t *arguments)
/* The lexer calls this when it exhausts a string. */
void
glcpp_parser_pop_expansion (glcpp_parser_t *parser)
{
macro_t *macro;
string_list_t *replacements;
string_list_t *result;
expansion_node_t *node;
if (active == NULL)
active = _string_list_create (NULL);
node = parser->expansions;
_string_list_push (active, token);
macro = hash_table_find (parser->defines, token);
if (macro == NULL) {
string_list_t *result;
result = _string_list_create (parser);
_string_list_append_item (result, token);
return result;
if (node == NULL) {
fprintf (stderr, "Internal error: _expansion_list_pop called on an empty list.\n");
exit (1);
}
replacements = macro->replacements;
parser->expansions = node->next;
result = _expand_string_list_recursive (parser, replacements,
active, parameters, arguments);
_string_list_pop (active);
if (_string_list_length (active) == 0)
talloc_free (active);
return result;
talloc_free (node);
}
string_list_t *
int
glcpp_parser_is_expanding (glcpp_parser_t *parser, const char *member)
{
expansion_node_t *node;
for (node = parser->expansions; node; node = node->next) {
if (node->macro &&
strcmp (node->macro->identifier, member) == 0)
{
return 1;
}
}
return 0;
}
static void
_expand_macro (glcpp_parser_t *parser,
const char *token,
macro_t *macro,
argument_list_t *arguments)
{
/* Don't recurse if we're already actively expanding this token. */
if (glcpp_parser_is_expanding (parser, token)) {
printf ("%s", token);
return;
}
glcpp_parser_push_expansion_macro (parser, macro, arguments);
}
void
_expand_object_macro (glcpp_parser_t *parser, const char *identifier)
{
macro_t *macro;
@ -676,11 +619,10 @@ _expand_object_macro (glcpp_parser_t *parser, const char *identifier)
macro = hash_table_find (parser->defines, identifier);
assert (! macro->is_function);
return _expand_macro_recursive (parser, identifier, NULL,
NULL, NULL);
_expand_macro (parser, identifier, macro, NULL);
}
string_list_t *
void
_expand_function_macro (glcpp_parser_t *parser,
const char *identifier,
argument_list_t *arguments)
@ -698,9 +640,8 @@ _expand_function_macro (glcpp_parser_t *parser,
identifier,
_argument_list_length (arguments),
_string_list_length (macro->parameters));
return NULL;
return;
}
return _expand_macro_recursive (parser, identifier, NULL,
macro->parameters, arguments);
_expand_macro (parser, identifier, macro, arguments);
}

77
glcpp.h
View file

@ -24,11 +24,13 @@
#ifndef GLCPP_H
#define GLCPP_H
#include <talloc.h>
#include "hash_table.h"
#define yyscan_t void*
/* Some data types used for parser value. */
/* Some data types used for parser values. */
typedef struct string_node {
const char *str;
@ -52,6 +54,56 @@ typedef struct argument_list {
typedef struct glcpp_parser glcpp_parser_t;
/* Support for temporarily lexing/parsing tokens from a string. */
typedef struct glcpp_lex_node {
void *buffer;
struct glcpp_lex_node *next;
} glcpp_lex_node_t;
typedef struct {
glcpp_parser_t *parser;
glcpp_lex_node_t *head;
} glcpp_lex_stack_t;
void
glcpp_lex_stack_push (glcpp_lex_stack_t *stack, const char *string);
int
glcpp_lex_stack_pop (glcpp_lex_stack_t *stack);
typedef enum {
TOKEN_CLASS_ARGUMENT,
TOKEN_CLASS_IDENTIFIER,
TOKEN_CLASS_FUNC_MACRO,
TOKEN_CLASS_OBJ_MACRO
} token_class_t;
token_class_t
glcpp_parser_classify_token (glcpp_parser_t *parser,
const char *identifier,
int *parameter_index);
typedef struct {
int is_function;
string_list_t *parameters;
const char *identifier;
const char *replacement;
} macro_t;
typedef struct expansion_node {
macro_t *macro;
argument_list_t *arguments;
struct expansion_node *next;
} expansion_node_t;
struct glcpp_parser {
yyscan_t scanner;
struct hash_table *defines;
expansion_node_t *expansions;
glcpp_lex_stack_t *lex_stack;
};
glcpp_parser_t *
glcpp_parser_create (void);
@ -61,15 +113,17 @@ glcpp_parser_parse (glcpp_parser_t *parser);
void
glcpp_parser_destroy (glcpp_parser_t *parser);
typedef enum {
MACRO_TYPE_UNDEFINED,
MACRO_TYPE_OBJECT,
MACRO_TYPE_FUNCTION
} macro_type_t;
void
glcpp_parser_push_expansion_macro (glcpp_parser_t *parser,
macro_t *macro,
argument_list_t *arguments);
macro_type_t
glcpp_parser_macro_type (glcpp_parser_t *parser,
const char *identifier);
void
glcpp_parser_push_expansion_argument (glcpp_parser_t *parser,
int argument_index);
void
glcpp_parser_pop_expansion (glcpp_parser_t *parser);
/* Generated by glcpp-lex.l to glcpp-lex.c */
@ -91,10 +145,15 @@ yyparse (glcpp_parser_t *parser);
#define xtalloc(ctx, type) (type *)xtalloc_named_const(ctx, sizeof(type), #type)
#define xtalloc_size(ctx, size) xtalloc_named_const(ctx, size, __location__)
void *
xtalloc_named_const (const void *context, size_t size, const char *name);
char *
xtalloc_strdup (const void *t, const char *p);
char *
xtalloc_strndup (const void *t, const char *p, size_t n);
#endif

View file

@ -50,3 +50,17 @@ xtalloc_strdup (const void *t, const char *p)
return ret;
}
char *
xtalloc_strndup (const void *t, const char *p, size_t n)
{
char *ret;
ret = talloc_strndup (t, p, n);
if (ret == NULL) {
fprintf (stderr, "Out of memory.\n");
exit (1);
}
return ret;
}