From b21727a3fe3912511d60a24247a4baaf3544a0db Mon Sep 17 00:00:00 2001 From: Aleksey Chichenkov Date: Mon, 28 Jan 2019 15:08:59 +0300 Subject: [PATCH] first commit --- README.md | 84 + documentation/lemon.html | 987 +++++++ examples/calculator-c.y | 45 + examples/calculator-js.js | 917 ++++++ examples/calculator-js.out | 102 + examples/calculator-js.y | 74 + lemon-src/lemon-js | Bin 0 -> 86608 bytes lemon-src/lemon-js.c | 5442 +++++++++++++++++++++++++++++++++++ lemon-src/lemon.c | 5436 ++++++++++++++++++++++++++++++++++ lemon-src/lempar.js | 775 +++++ main.js | 54 + package-lock.json | 223 ++ package.json | 14 + parsers/filters/lexer.js | 1922 +++++++++++++ parsers/filters/parser.js | 1152 ++++++++ parsers/filters/parser.out | 155 + parsers/filters/parser.y | 152 + process.js | 370 +++ tests/out_test_and.json | 153 + tests/out_tree_address.json | 69 + 20 files changed, 18126 insertions(+) create mode 100644 README.md create mode 100644 documentation/lemon.html create mode 100644 examples/calculator-c.y create mode 100644 examples/calculator-js.js create mode 100644 examples/calculator-js.out create mode 100644 examples/calculator-js.y create mode 100755 lemon-src/lemon-js create mode 100644 lemon-src/lemon-js.c create mode 100644 lemon-src/lemon.c create mode 100644 lemon-src/lempar.js create mode 100644 main.js create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 parsers/filters/lexer.js create mode 100644 parsers/filters/parser.js create mode 100644 parsers/filters/parser.out create mode 100644 parsers/filters/parser.y create mode 100644 process.js create mode 100644 tests/out_test_and.json create mode 100644 tests/out_tree_address.json diff --git a/README.md b/README.md new file mode 100644 index 0000000..d687c88 --- /dev/null +++ b/README.md @@ -0,0 +1,84 @@ +# LEMON.JS - LALR(1) Parser Generator for JavaScript + +Lemon.JS is an LALR(1) parser generator for JavaScript based on Lemon parser generator for C included in SQLite package distribution. + +## Parser Code Base + +Files `lemon.c`, `lempar.c`, `lemon.html` are extracted from SQLite v3.17.0. Original parser generator code is slightly fixed to produce JavaScript compatible statements. Parser template translated from C to JavaScript. Source comments mostly not touched to keep it easy diff against original file. + +Both original C version and patched JS version are included for side by side comparison for reference. + +## Installation + +Compile lenon-js.c with any C compiler and place in anywhere with lempar.js side by side. + +## Compilation + +Prerequisites: C compiler, for example GCC. + +```bash +gcc -o lemon-js -O2 lemon-js.c +``` + +## Usage + +```bash +lemon-js .y +``` + +See http://www.hwaci.com/sw/lemon/lemon.html for more details. + +## Special Directives + +See lemon.html for additional documentation. + +- %name - Set parser class name (default is "Parse") +- %include - Include code in the beginning of file (usefull for imports) +- %code - Include code in the end of file (usefull for exports or main code) +- %token_destructor - Define code which will be executed on token destruction. +- %default_destructor +- %token_prefix - Define token name prefix. +- %syntax_error - Define custom error handler for syntax erorrs. +- %parse_accept - Define handler for all accepted tokens. +- %parse_failure - Define handler for parse errors. +- %stack_overflow - Define handler for stack overflow. +- %extra_argument - **NOT SUPPORTED** +- %token_type - **NOT SUPPORTED** +- %default_type - **NOT SUPPORTED** +- %stack_size - Set default stack size. +- %start_symbol +- %left - Set left associative tokens. +- %right - Set right associative tokens. +- %nonassoc - Set non associative tokens. +- %destructor - Define custom parser destructor. +- %type - **NOT SUPPORTED** +- %fallback - Define fallback logic for tokens. +- %wildcard - Define WILDCARD token. +- %token_class - **NOT SUPPORTED** + +Notes: + +- some expressions, for example, regular expression `/\*/` could break lemon parser in `%code` or `%include` sections. +- the best place to put something like `module.exports = ParserName;` or `export default ParserName;` is in `%code` section. + +## TODO + +- add some tests for different options +- document variables +- YYNOERRORRECOVERY ? +- YYERRORSYMBOL ? +- rename methods, variables, get rid of YY prefixes? +- enable asserts, could be usefull for testing + +## Alternative Lexers + +- https://github.com/tantaman/lexed.js +- https://github.com/aaditmshah/lexer +- https://github.com/YuhangGe/jslex + +## Alternative Parsers + +- https://github.com/sormy/flex-js +- http://jscc.brobston.com +- http://zaach.github.io/jison/ +- https://pegjs.org diff --git a/documentation/lemon.html b/documentation/lemon.html new file mode 100644 index 0000000..114526f --- /dev/null +++ b/documentation/lemon.html @@ -0,0 +1,987 @@ + + +The Lemon Parser Generator + + +

The Lemon Parser Generator

+ +

Lemon is an LALR(1) parser generator for C. +It does the same job as "bison" and "yacc". +But lemon is not a bison or yacc clone. Lemon +uses a different grammar syntax which is designed to +reduce the number of coding errors. Lemon also uses a +parsing engine that is faster than yacc and +bison and which is both reentrant and threadsafe. +(Update: Since the previous sentence was written, bison +has also been updated so that it too can generate a +reentrant and threadsafe parser.) +Lemon also implements features that can be used +to eliminate resource leaks, making is suitable for use +in long-running programs such as graphical user interfaces +or embedded controllers.

+ +

This document is an introduction to the Lemon +parser generator.

+ +

Theory of Operation

+ +

The main goal of Lemon is to translate a context free grammar (CFG) +for a particular language into C code that implements a parser for +that language. +The program has two inputs: +

+Typically, only the grammar specification is supplied by the programmer. +Lemon comes with a default parser template which works fine for most +applications. But the user is free to substitute a different parser +template if desired.

+ +

Depending on command-line options, Lemon will generate between +one and three files of outputs. +

+By default, all three of these output files are generated. +The header file is suppressed if the "-m" command-line option is +used and the report file is omitted when "-q" is selected.

+ +

The grammar specification file uses a ".y" suffix, by convention. +In the examples used in this document, we'll assume the name of the +grammar file is "gram.y". A typical use of Lemon would be the +following command: +

+   lemon gram.y
+
+This command will generate three output files named "gram.c", +"gram.h" and "gram.out". +The first is C code to implement the parser. The second +is the header file that defines numerical values for all +terminal symbols, and the last is the report that explains +the states used by the parser automaton.

+ +

Command Line Options

+ +

The behavior of Lemon can be modified using command-line options. +You can obtain a list of the available command-line options together +with a brief explanation of what each does by typing +

+   lemon -?
+
+As of this writing, the following command-line options are supported: + + +

The Parser Interface

+ +

Lemon doesn't generate a complete, working program. It only generates +a few subroutines that implement a parser. This section describes +the interface to those subroutines. It is up to the programmer to +call these subroutines in an appropriate way in order to produce a +complete system.

+ +

Before a program begins using a Lemon-generated parser, the program +must first create the parser. +A new parser is created as follows: +

+   void *pParser = ParseAlloc( malloc );
+
+The ParseAlloc() routine allocates and initializes a new parser and +returns a pointer to it. +The actual data structure used to represent a parser is opaque — +its internal structure is not visible or usable by the calling routine. +For this reason, the ParseAlloc() routine returns a pointer to void +rather than a pointer to some particular structure. +The sole argument to the ParseAlloc() routine is a pointer to the +subroutine used to allocate memory. Typically this means malloc().

+ +

After a program is finished using a parser, it can reclaim all +memory allocated by that parser by calling +

+   ParseFree(pParser, free);
+
+The first argument is the same pointer returned by ParseAlloc(). The +second argument is a pointer to the function used to release bulk +memory back to the system.

+ +

After a parser has been allocated using ParseAlloc(), the programmer +must supply the parser with a sequence of tokens (terminal symbols) to +be parsed. This is accomplished by calling the following function +once for each token: +

+   Parse(pParser, hTokenID, sTokenData, pArg);
+
+The first argument to the Parse() routine is the pointer returned by +ParseAlloc(). +The second argument is a small positive integer that tells the parse the +type of the next token in the data stream. +There is one token type for each terminal symbol in the grammar. +The gram.h file generated by Lemon contains #define statements that +map symbolic terminal symbol names into appropriate integer values. +A value of 0 for the second argument is a special flag to the +parser to indicate that the end of input has been reached. +The third argument is the value of the given token. By default, +the type of the third argument is integer, but the grammar will +usually redefine this type to be some kind of structure. +Typically the second argument will be a broad category of tokens +such as "identifier" or "number" and the third argument will +be the name of the identifier or the value of the number.

+ +

The Parse() function may have either three or four arguments, +depending on the grammar. If the grammar specification file requests +it (via the extra_argument directive), +the Parse() function will have a fourth parameter that can be +of any type chosen by the programmer. The parser doesn't do anything +with this argument except to pass it through to action routines. +This is a convenient mechanism for passing state information down +to the action routines without having to use global variables.

+ +

A typical use of a Lemon parser might look something like the +following: +

+   01 ParseTree *ParseFile(const char *zFilename){
+   02    Tokenizer *pTokenizer;
+   03    void *pParser;
+   04    Token sToken;
+   05    int hTokenId;
+   06    ParserState sState;
+   07
+   08    pTokenizer = TokenizerCreate(zFilename);
+   09    pParser = ParseAlloc( malloc );
+   10    InitParserState(&sState);
+   11    while( GetNextToken(pTokenizer, &hTokenId, &sToken) ){
+   12       Parse(pParser, hTokenId, sToken, &sState);
+   13    }
+   14    Parse(pParser, 0, sToken, &sState);
+   15    ParseFree(pParser, free );
+   16    TokenizerFree(pTokenizer);
+   17    return sState.treeRoot;
+   18 }
+
+This example shows a user-written routine that parses a file of +text and returns a pointer to the parse tree. +(All error-handling code is omitted from this example to keep it +simple.) +We assume the existence of some kind of tokenizer which is created +using TokenizerCreate() on line 8 and deleted by TokenizerFree() +on line 16. The GetNextToken() function on line 11 retrieves the +next token from the input file and puts its type in the +integer variable hTokenId. The sToken variable is assumed to be +some kind of structure that contains details about each token, +such as its complete text, what line it occurs on, etc.

+ +

This example also assumes the existence of structure of type +ParserState that holds state information about a particular parse. +An instance of such a structure is created on line 6 and initialized +on line 10. A pointer to this structure is passed into the Parse() +routine as the optional 4th argument. +The action routine specified by the grammar for the parser can use +the ParserState structure to hold whatever information is useful and +appropriate. In the example, we note that the treeRoot field of +the ParserState structure is left pointing to the root of the parse +tree.

+ +

The core of this example as it relates to Lemon is as follows: +

+   ParseFile(){
+      pParser = ParseAlloc( malloc );
+      while( GetNextToken(pTokenizer,&hTokenId, &sToken) ){
+         Parse(pParser, hTokenId, sToken);
+      }
+      Parse(pParser, 0, sToken);
+      ParseFree(pParser, free );
+   }
+
+Basically, what a program has to do to use a Lemon-generated parser +is first create the parser, then send it lots of tokens obtained by +tokenizing an input source. When the end of input is reached, the +Parse() routine should be called one last time with a token type +of 0. This step is necessary to inform the parser that the end of +input has been reached. Finally, we reclaim memory used by the +parser by calling ParseFree().

+ +

There is one other interface routine that should be mentioned +before we move on. +The ParseTrace() function can be used to generate debugging output +from the parser. A prototype for this routine is as follows: +

+   ParseTrace(FILE *stream, char *zPrefix);
+
+After this routine is called, a short (one-line) message is written +to the designated output stream every time the parser changes states +or calls an action routine. Each such message is prefaced using +the text given by zPrefix. This debugging output can be turned off +by calling ParseTrace() again with a first argument of NULL (0).

+ +

Differences With YACC and BISON

+ +

Programmers who have previously used the yacc or bison parser +generator will notice several important differences between yacc and/or +bison and Lemon. +

+These differences may cause some initial confusion for programmers +with prior yacc and bison experience. +But after years of experience using Lemon, I firmly +believe that the Lemon way of doing things is better.

+ +

Updated as of 2016-02-16: +The text above was written in the 1990s. +We are told that Bison has lately been enhanced to support the +tokenizer-calls-parser paradigm used by Lemon, and to obviate the +need for global variables.

+ +

Input File Syntax

+ +

The main purpose of the grammar specification file for Lemon is +to define the grammar for the parser. But the input file also +specifies additional information Lemon requires to do its job. +Most of the work in using Lemon is in writing an appropriate +grammar file.

+ +

The grammar file for lemon is, for the most part, free format. +It does not have sections or divisions like yacc or bison. Any +declaration can occur at any point in the file. +Lemon ignores whitespace (except where it is needed to separate +tokens) and it honors the same commenting conventions as C and C++.

+ +

Terminals and Nonterminals

+ +

A terminal symbol (token) is any string of alphanumeric +and/or underscore characters +that begins with an upper case letter. +A terminal can contain lowercase letters after the first character, +but the usual convention is to make terminals all upper case. +A nonterminal, on the other hand, is any string of alphanumeric +and underscore characters than begins with a lower case letter. +Again, the usual convention is to make nonterminals use all lower +case letters.

+ +

In Lemon, terminal and nonterminal symbols do not need to +be declared or identified in a separate section of the grammar file. +Lemon is able to generate a list of all terminals and nonterminals +by examining the grammar rules, and it can always distinguish a +terminal from a nonterminal by checking the case of the first +character of the name.

+ +

Yacc and bison allow terminal symbols to have either alphanumeric +names or to be individual characters included in single quotes, like +this: ')' or '$'. Lemon does not allow this alternative form for +terminal symbols. With Lemon, all symbols, terminals and nonterminals, +must have alphanumeric names.

+ +

Grammar Rules

+ +

The main component of a Lemon grammar file is a sequence of grammar +rules. +Each grammar rule consists of a nonterminal symbol followed by +the special symbol "::=" and then a list of terminals and/or nonterminals. +The rule is terminated by a period. +The list of terminals and nonterminals on the right-hand side of the +rule can be empty. +Rules can occur in any order, except that the left-hand side of the +first rule is assumed to be the start symbol for the grammar (unless +specified otherwise using the %start directive described below.) +A typical sequence of grammar rules might look something like this: +

+  expr ::= expr PLUS expr.
+  expr ::= expr TIMES expr.
+  expr ::= LPAREN expr RPAREN.
+  expr ::= VALUE.
+
+

+ +

There is one non-terminal in this example, "expr", and five +terminal symbols or tokens: "PLUS", "TIMES", "LPAREN", +"RPAREN" and "VALUE".

+ +

Like yacc and bison, Lemon allows the grammar to specify a block +of C code that will be executed whenever a grammar rule is reduced +by the parser. +In Lemon, this action is specified by putting the C code (contained +within curly braces {...}) immediately after the +period that closes the rule. +For example: +

+  expr ::= expr PLUS expr.   { printf("Doing an addition...\n"); }
+
+

+ +

In order to be useful, grammar actions must normally be linked to +their associated grammar rules. +In yacc and bison, this is accomplished by embedding a "$$" in the +action to stand for the value of the left-hand side of the rule and +symbols "$1", "$2", and so forth to stand for the value of +the terminal or nonterminal at position 1, 2 and so forth on the +right-hand side of the rule. +This idea is very powerful, but it is also very error-prone. The +single most common source of errors in a yacc or bison grammar is +to miscount the number of symbols on the right-hand side of a grammar +rule and say "$7" when you really mean "$8".

+ +

Lemon avoids the need to count grammar symbols by assigning symbolic +names to each symbol in a grammar rule and then using those symbolic +names in the action. +In yacc or bison, one would write this: +

+  expr -> expr PLUS expr  { $$ = $1 + $3; };
+
+But in Lemon, the same rule becomes the following: +
+  expr(A) ::= expr(B) PLUS expr(C).  { A = B+C; }
+
+In the Lemon rule, any symbol in parentheses after a grammar rule +symbol becomes a place holder for that symbol in the grammar rule. +This place holder can then be used in the associated C action to +stand for the value of that symbol.

+ +

The Lemon notation for linking a grammar rule with its reduce +action is superior to yacc/bison on several counts. +First, as mentioned above, the Lemon method avoids the need to +count grammar symbols. +Secondly, if a terminal or nonterminal in a Lemon grammar rule +includes a linking symbol in parentheses but that linking symbol +is not actually used in the reduce action, then an error message +is generated. +For example, the rule +

+  expr(A) ::= expr(B) PLUS expr(C).  { A = B; }
+
+will generate an error because the linking symbol "C" is used +in the grammar rule but not in the reduce action.

+ +

The Lemon notation for linking grammar rules to reduce actions +also facilitates the use of destructors for reclaiming memory +allocated by the values of terminals and nonterminals on the +right-hand side of a rule.

+ + +

Precedence Rules

+ +

Lemon resolves parsing ambiguities in exactly the same way as +yacc and bison. A shift-reduce conflict is resolved in favor +of the shift, and a reduce-reduce conflict is resolved by reducing +whichever rule comes first in the grammar file.

+ +

Just like in +yacc and bison, Lemon allows a measure of control +over the resolution of paring conflicts using precedence rules. +A precedence value can be assigned to any terminal symbol +using the +%left, +%right or +%nonassoc directives. Terminal symbols +mentioned in earlier directives have a lower precedence that +terminal symbols mentioned in later directives. For example:

+ +

+   %left AND.
+   %left OR.
+   %nonassoc EQ NE GT GE LT LE.
+   %left PLUS MINUS.
+   %left TIMES DIVIDE MOD.
+   %right EXP NOT.
+

+ +

In the preceding sequence of directives, the AND operator is +defined to have the lowest precedence. The OR operator is one +precedence level higher. And so forth. Hence, the grammar would +attempt to group the ambiguous expression +

+     a AND b OR c
+
+like this +
+     a AND (b OR c).
+
+The associativity (left, right or nonassoc) is used to determine +the grouping when the precedence is the same. AND is left-associative +in our example, so +
+     a AND b AND c
+
+is parsed like this +
+     (a AND b) AND c.
+
+The EXP operator is right-associative, though, so +
+     a EXP b EXP c
+
+is parsed like this +
+     a EXP (b EXP c).
+
+The nonassoc precedence is used for non-associative operators. +So +
+     a EQ b EQ c
+
+is an error.

+ +

The precedence of non-terminals is transferred to rules as follows: +The precedence of a grammar rule is equal to the precedence of the +left-most terminal symbol in the rule for which a precedence is +defined. This is normally what you want, but in those cases where +you want to precedence of a grammar rule to be something different, +you can specify an alternative precedence symbol by putting the +symbol in square braces after the period at the end of the rule and +before any C-code. For example:

+ +

+   expr = MINUS expr.  [NOT]
+

+ +

This rule has a precedence equal to that of the NOT symbol, not the +MINUS symbol as would have been the case by default.

+ +

With the knowledge of how precedence is assigned to terminal +symbols and individual +grammar rules, we can now explain precisely how parsing conflicts +are resolved in Lemon. Shift-reduce conflicts are resolved +as follows: +

+Reduce-reduce conflicts are resolved this way: + + +

Special Directives

+ +

The input grammar to Lemon consists of grammar rules and special +directives. We've described all the grammar rules, so now we'll +talk about the special directives.

+ +

Directives in lemon can occur in any order. You can put them before +the grammar rules, or after the grammar rules, or in the mist of the +grammar rules. It doesn't matter. The relative order of +directives used to assign precedence to terminals is important, but +other than that, the order of directives in Lemon is arbitrary.

+ +

Lemon supports the following special directives: +

+Each of these directives will be described separately in the +following sections:

+ + +

The %code directive

+ +

The %code directive is used to specify addition C code that +is added to the end of the main output file. This is similar to +the %include directive except that %include +is inserted at the beginning of the main output file.

+ +

%code is typically used to include some action routines or perhaps +a tokenizer or even the "main()" function +as part of the output file.

+ + +

The %default_destructor directive

+ +

The %default_destructor directive specifies a destructor to +use for non-terminals that do not have their own destructor +specified by a separate %destructor directive. See the documentation +on the %destructor directive below for +additional information.

+ +

In some grammers, many different non-terminal symbols have the +same datatype and hence the same destructor. This directive is +a convenience way to specify the same destructor for all those +non-terminals using a single statement.

+ + +

The %default_type directive

+ +

The %default_type directive specifies the datatype of non-terminal +symbols that do no have their own datatype defined using a separate +%type directive. +

+ + +

The %destructor directive

+ +

The %destructor directive is used to specify a destructor for +a non-terminal symbol. +(See also the %token_destructor +directive which is used to specify a destructor for terminal symbols.)

+ +

A non-terminal's destructor is called to dispose of the +non-terminal's value whenever the non-terminal is popped from +the stack. This includes all of the following circumstances: +

+The destructor can do whatever it wants with the value of +the non-terminal, but its design is to deallocate memory +or other resources held by that non-terminal.

+ +

Consider an example: +

+   %type nt {void*}
+   %destructor nt { free($$); }
+   nt(A) ::= ID NUM.   { A = malloc( 100 ); }
+
+This example is a bit contrived but it serves to illustrate how +destructors work. The example shows a non-terminal named +"nt" that holds values of type "void*". When the rule for +an "nt" reduces, it sets the value of the non-terminal to +space obtained from malloc(). Later, when the nt non-terminal +is popped from the stack, the destructor will fire and call +free() on this malloced space, thus avoiding a memory leak. +(Note that the symbol "$$" in the destructor code is replaced +by the value of the non-terminal.)

+ +

It is important to note that the value of a non-terminal is passed +to the destructor whenever the non-terminal is removed from the +stack, unless the non-terminal is used in a C-code action. If +the non-terminal is used by C-code, then it is assumed that the +C-code will take care of destroying it. +More commonly, the value is used to build some +larger structure and we don't want to destroy it, which is why +the destructor is not called in this circumstance.

+ +

Destructors help avoid memory leaks by automatically freeing +allocated objects when they go out of scope. +To do the same using yacc or bison is much more difficult.

+ + +

The %extra_argument directive

+ +The %extra_argument directive instructs Lemon to add a 4th parameter +to the parameter list of the Parse() function it generates. Lemon +doesn't do anything itself with this extra argument, but it does +make the argument available to C-code action routines, destructors, +and so forth. For example, if the grammar file contains:

+ +

+    %extra_argument { MyStruct *pAbc }
+

+ +

Then the Parse() function generated will have an 4th parameter +of type "MyStruct*" and all action routines will have access to +a variable named "pAbc" that is the value of the 4th parameter +in the most recent call to Parse().

+ + +

The %fallback directive

+ +

The %fallback directive specifies an alternative meaning for one +or more tokens. The alternative meaning is tried if the original token +would have generated a syntax error. + +

The %fallback directive was added to support robust parsing of SQL +syntax in SQLite. +The SQL language contains a large assortment of keywords, each of which +appears as a different token to the language parser. SQL contains so +many keywords, that it can be difficult for programmers to keep up with +them all. Programmers will, therefore, sometimes mistakenly use an +obscure language keyword for an identifier. The %fallback directive +provides a mechanism to tell the parser: "If you are unable to parse +this keyword, try treating it as an identifier instead." + +

The syntax of %fallback is as follows: + +

+%fallback ID TOKEN... . +
+ +

In words, the %fallback directive is followed by a list of token names +terminated by a period. The first token name is the fallback token - the +token to which all the other tokens fall back to. The second and subsequent +arguments are tokens which fall back to the token identified by the first +argument. + + +

The %ifdef, %ifndef, and %endif directives.

+ +

The %ifdef, %ifndef, and %endif directives are similar to +#ifdef, #ifndef, and #endif in the C-preprocessor, just not as general. +Each of these directives must begin at the left margin. No whitespace +is allowed between the "%" and the directive name. + +

Grammar text in between "%ifdef MACRO" and the next nested "%endif" is +ignored unless the "-DMACRO" command-line option is used. Grammar text +betwen "%ifndef MACRO" and the next nested "%endif" is included except when +the "-DMACRO" command-line option is used. + +

Note that the argument to %ifdef and %ifndef must be a single +preprocessor symbol name, not a general expression. There is no "%else" +directive. + + + +

The %include directive

+ +

The %include directive specifies C code that is included at the +top of the generated parser. You can include any text you want -- +the Lemon parser generator copies it blindly. If you have multiple +%include directives in your grammar file, their values are concatenated +so that all %include code ultimately appears near the top of the +generated parser, in the same order as it appeared in the grammer.

+ +

The %include directive is very handy for getting some extra #include +preprocessor statements at the beginning of the generated parser. +For example:

+ +

+   %include {#include <unistd.h>}
+

+ +

This might be needed, for example, if some of the C actions in the +grammar call functions that are prototyed in unistd.h.

+ + +

The %left directive

+ +The %left directive is used (along with the %right and +%nonassoc directives) to declare precedences of +terminal symbols. Every terminal symbol whose name appears after +a %left directive but before the next period (".") is +given the same left-associative precedence value. Subsequent +%left directives have higher precedence. For example:

+ +

+   %left AND.
+   %left OR.
+   %nonassoc EQ NE GT GE LT LE.
+   %left PLUS MINUS.
+   %left TIMES DIVIDE MOD.
+   %right EXP NOT.
+

+ +

Note the period that terminates each %left, %right or %nonassoc +directive.

+ +

LALR(1) grammars can get into a situation where they require +a large amount of stack space if you make heavy use or right-associative +operators. For this reason, it is recommended that you use %left +rather than %right whenever possible.

+ + +

The %name directive

+ +

By default, the functions generated by Lemon all begin with the +five-character string "Parse". You can change this string to something +different using the %name directive. For instance:

+ +

+   %name Abcde
+

+ +

Putting this directive in the grammar file will cause Lemon to generate +functions named +

+The %name directive allows you to generator two or more different +parsers and link them all into the same executable. +

+ + +

The %nonassoc directive

+ +

This directive is used to assign non-associative precedence to +one or more terminal symbols. See the section on +precedence rules +or on the %left directive for additional information.

+ + +

The %parse_accept directive

+ +

The %parse_accept directive specifies a block of C code that is +executed whenever the parser accepts its input string. To "accept" +an input string means that the parser was able to process all tokens +without error.

+ +

For example:

+ +

+   %parse_accept {
+      printf("parsing complete!\n");
+   }
+

+ + +

The %parse_failure directive

+ +

The %parse_failure directive specifies a block of C code that +is executed whenever the parser fails complete. This code is not +executed until the parser has tried and failed to resolve an input +error using is usual error recovery strategy. The routine is +only invoked when parsing is unable to continue.

+ +

+   %parse_failure {
+     fprintf(stderr,"Giving up.  Parser is hopelessly lost...\n");
+   }
+

+ + +

The %right directive

+ +

This directive is used to assign right-associative precedence to +one or more terminal symbols. See the section on +precedence rules +or on the %left directive for additional information.

+ + +

The %stack_overflow directive

+ +

The %stack_overflow directive specifies a block of C code that +is executed if the parser's internal stack ever overflows. Typically +this just prints an error message. After a stack overflow, the parser +will be unable to continue and must be reset.

+ +

+   %stack_overflow {
+     fprintf(stderr,"Giving up.  Parser stack overflow\n");
+   }
+

+ +

You can help prevent parser stack overflows by avoiding the use +of right recursion and right-precedence operators in your grammar. +Use left recursion and and left-precedence operators instead, to +encourage rules to reduce sooner and keep the stack size down. +For example, do rules like this: +

+   list ::= list element.      // left-recursion.  Good!
+   list ::= .
+
+Not like this: +
+   list ::= element list.      // right-recursion.  Bad!
+   list ::= .
+
+ + +

The %stack_size directive

+ +

If stack overflow is a problem and you can't resolve the trouble +by using left-recursion, then you might want to increase the size +of the parser's stack using this directive. Put an positive integer +after the %stack_size directive and Lemon will generate a parse +with a stack of the requested size. The default value is 100.

+ +

+   %stack_size 2000
+

+ + +

The %start_symbol directive

+ +

By default, the start-symbol for the grammar that Lemon generates +is the first non-terminal that appears in the grammar file. But you +can choose a different start-symbol using the %start_symbol directive.

+ +

+   %start_symbol  prog
+

+ + +

The %token_destructor directive

+ +

The %destructor directive assigns a destructor to a non-terminal +symbol. (See the description of the %destructor directive above.) +This directive does the same thing for all terminal symbols.

+ +

Unlike non-terminal symbols which may each have a different data type +for their values, terminals all use the same data type (defined by +the %token_type directive) and so they use a common destructor. Other +than that, the token destructor works just like the non-terminal +destructors.

+ + +

The %token_prefix directive

+ +

Lemon generates #defines that assign small integer constants +to each terminal symbol in the grammar. If desired, Lemon will +add a prefix specified by this directive +to each of the #defines it generates. +So if the default output of Lemon looked like this: +

+    #define AND              1
+    #define MINUS            2
+    #define OR               3
+    #define PLUS             4
+
+You can insert a statement into the grammar like this: +
+    %token_prefix    TOKEN_
+
+to cause Lemon to produce these symbols instead: +
+    #define TOKEN_AND        1
+    #define TOKEN_MINUS      2
+    #define TOKEN_OR         3
+    #define TOKEN_PLUS       4
+
+ + +

The %token_type and %type directives

+ +

These directives are used to specify the data types for values +on the parser's stack associated with terminal and non-terminal +symbols. The values of all terminal symbols must be of the same +type. This turns out to be the same data type as the 3rd parameter +to the Parse() function generated by Lemon. Typically, you will +make the value of a terminal symbol by a pointer to some kind of +token structure. Like this:

+ +

+   %token_type    {Token*}
+

+ +

If the data type of terminals is not specified, the default value +is "void*".

+ +

Non-terminal symbols can each have their own data types. Typically +the data type of a non-terminal is a pointer to the root of a parse-tree +structure that contains all information about that non-terminal. +For example:

+ +

+   %type   expr  {Expr*}
+

+ +

Each entry on the parser's stack is actually a union containing +instances of all data types for every non-terminal and terminal symbol. +Lemon will automatically use the correct element of this union depending +on what the corresponding non-terminal or terminal symbol is. But +the grammar designer should keep in mind that the size of the union +will be the size of its largest element. So if you have a single +non-terminal whose data type requires 1K of storage, then your 100 +entry parser stack will require 100K of heap space. If you are willing +and able to pay that price, fine. You just need to know.

+ + +

The %wildcard directive

+ +

The %wildcard directive is followed by a single token name and a +period. This directive specifies that the identified token should +match any input token. + +

When the generated parser has the choice of matching an input against +the wildcard token and some other token, the other token is always used. +The wildcard token is only matched if there are no other alternatives. + +

Error Processing

+ +

After extensive experimentation over several years, it has been +discovered that the error recovery strategy used by yacc is about +as good as it gets. And so that is what Lemon uses.

+ +

When a Lemon-generated parser encounters a syntax error, it +first invokes the code specified by the %syntax_error directive, if +any. It then enters its error recovery strategy. The error recovery +strategy is to begin popping the parsers stack until it enters a +state where it is permitted to shift a special non-terminal symbol +named "error". It then shifts this non-terminal and continues +parsing. But the %syntax_error routine will not be called again +until at least three new tokens have been successfully shifted.

+ +

If the parser pops its stack until the stack is empty, and it still +is unable to shift the error symbol, then the %parse_failed routine +is invoked and the parser resets itself to its start state, ready +to begin parsing a new file. This is what will happen at the very +first syntax error, of course, if there are no instances of the +"error" non-terminal in your grammar.

+ + + diff --git a/examples/calculator-c.y b/examples/calculator-c.y new file mode 100644 index 0000000..60d7dd9 --- /dev/null +++ b/examples/calculator-c.y @@ -0,0 +1,45 @@ +%token_type {int} + +%left PLUS MINUS. +%left DIVIDE TIMES. + +%include { + #include + #include + #include + #include "calculator-c.h" +} + +%code { + int main() + { + void* pParser = ParseAlloc(malloc); + ParseTrace(stderr, "> "); + Parse(pParser, INTEGER, 1); + Parse(pParser, PLUS, 0); + Parse(pParser, INTEGER, 2); + Parse(pParser, TIMES, 0); + Parse(pParser, INTEGER, 10); + Parse(pParser, DIVIDE, 0); + Parse(pParser, INTEGER, 2); + Parse(pParser, 0, 0); + ParseFree(pParser, free); + } +} + +%syntax_error { + fprintf(stderr, "Syntax error\n"); +} + +program ::= expr(A). { printf("Result=%d\n", A); } +expr(A) ::= expr(B) MINUS expr(C). { A = B - C; } +expr(A) ::= expr(B) PLUS expr(C). { A = B + C; } +expr(A) ::= expr(B) TIMES expr(C). { A = B * C; } +expr(A) ::= expr(B) DIVIDE expr(C). { + if (C != 0) { + A = B / C; + } else { + fprintf(stderr, "Divide by zero\n"); + } +} +expr(A) ::= INTEGER(B). { A = B; } diff --git a/examples/calculator-js.js b/examples/calculator-js.js new file mode 100644 index 0000000..110cde4 --- /dev/null +++ b/examples/calculator-js.js @@ -0,0 +1,917 @@ +/* +** 2000-05-29 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +** Based on SQLite distribution v3.17.0 +** Adopted for JavaScript by Artem Butusov +** +************************************************************************* +** Driver template for the LEMON parser generator. +** +** The "lemon" program processes an LALR(1) input grammar file, then uses +** this template to construct a parser. The "lemon" program inserts text +** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the +** interstitial "-" characters) contained in this template is changed into +** the value of the %name directive from the grammar. Otherwise, the content +** of this template is copied straight through into the generate parser +** source file. +** +** The following is the concatenation of all %include directives from the +** input grammar file: +*/ +/************ Begin %include sections from the grammar ************************/ +// line 8 "examples/calculator-js.y" + + // include something +// line 33 "examples/calculator-js.js" +/**************** End of %include directives **********************************/ +function Parser() { +/* These constants specify the various numeric values for terminal symbols +** in a format understandable to "makeheaders". +***************** Begin makeheaders token definitions *************************/ +this.TOKEN_PLUS = 1; +this.TOKEN_MINUS = 2; +this.TOKEN_DIVIDE = 3; +this.TOKEN_TIMES = 4; +this.TOKEN_INTEGER = 5; +/**************** End makeheaders token definitions ***************************/ + +/* The next sections is a series of control #defines. +** various aspects of the generated parser. +** YYNOCODE is a number of type YYCODETYPE that is not used for +** any terminal or nonterminal symbol. +** YYFALLBACK If defined, this indicates that one or more tokens +** (also known as: "terminal symbols") have fall-back +** values which should be used if the original symbol +** would not parse. This permits keywords to sometimes +** be used as identifiers, for example. +** YYSTACKDEPTH is the maximum depth of the parser's stack. If +** zero the stack is dynamically sized using realloc() +** YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_MIN_REDUCE Maximum value for reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op +*/ +/************* Begin control #defines *****************************************/ +this.YYNOCODE = 10; +this.YYSTACKDEPTH = 100; +this.YYFALLBACK = false; +this.YYNSTATE = 8; +this.YYNRULE = 6; +this.YY_MAX_SHIFT = 7; +this.YY_MIN_SHIFTREDUCE = 11; +this.YY_MAX_SHIFTREDUCE = 16; +this.YY_MIN_REDUCE = 17; +this.YY_MAX_REDUCE = 22; +this.YY_ERROR_ACTION = 23; +this.YY_ACCEPT_ACTION = 24; +this.YY_NO_ACTION = 25; +/************* End control #defines *******************************************/ + +/* Define the yytestcase() macro to be a no-op if is not already defined +** otherwise. +** +** Applications can choose to define yytestcase() in the %include section +** to a macro that can assist in verifying code coverage. For production +** code the yytestcase() macro should be turned off. But it is useful +** for testing. +*/ +if (!this.yytestcase) { + this.yytestcase = function () {}; +} + + +/* Next are the tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead +** token onto the stack and goto state N. +** +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE +** +** N == YY_ERROR_ACTION A syntax error has occurred. +** +** N == YY_ACCEPT_ACTION The parser accepts its input. +** +** N == YY_NO_ACTION No such action. Denotes unused +** slots in the yy_action[] table. +** +** The action table is constructed as a single large table named yy_action[]. +** Given state S and lookahead X, the action is computed as either: +** +** (A) N = yy_action[ yy_shift_ofst[S] + X ] +** (B) N = yy_default[S] +** +** The (A) formula is preferred. The B formula is used instead if: +** (1) The yy_shift_ofst[S]+X value is out of range, or +** (2) yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or +** (3) yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT. +** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that +** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. +** Hence only tests (1) and (2) need to be evaluated.) +** +** The formulas above are for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the yy_reduce_ofst[] array is used in place of +** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of +** YY_SHIFT_USE_DFLT. +** +** The following are the tables generated in this section: +** +** yy_action[] A single table containing all actions. +** yy_lookahead[] A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** yy_shift_ofst[] For each state, the offset into yy_action for +** shifting terminals. +** yy_reduce_ofst[] For each state, the offset into yy_action for +** shifting non-terminals after a reduce. +** yy_default[] Default action for each state. +** +*********** Begin parsing tables **********************************************/ +this.yy_action = [ + /* 0 */ 17, 3, 4, 1, 2, 24, 5, 1, 2, 15, + /* 10 */ 16, 14, 19, 19, 6, 7, +]; +this.yy_lookahead = [ + /* 0 */ 0, 1, 2, 3, 4, 7, 8, 3, 4, 8, + /* 10 */ 5, 8, 9, 9, 8, 8, +]; +this.YY_SHIFT_USE_DFLT = 16; +this.YY_SHIFT_COUNT = 7; +this.YY_SHIFT_MIN = 0; +this.YY_SHIFT_MAX = 5; +this.yy_shift_ofst = [ + /* 0 */ 5, 5, 5, 5, 5, 0, 4, 4, +]; +this.YY_REDUCE_USE_DFLT = -3; +this.YY_REDUCE_COUNT = 4; +this.YY_REDUCE_MIN = -2; +this.YY_REDUCE_MAX = 7; +this.yy_reduce_ofst = [ + /* 0 */ -2, 1, 3, 6, 7, +]; +this.yy_default = [ + /* 0 */ 23, 23, 23, 23, 23, 23, 19, 18, +]; +/********** End of lemon-generated parsing tables *****************************/ + +/* The next table maps tokens (terminal symbols) into fallback tokens. +** If a construct like the following: +** +** %fallback ID X Y Z. +** +** appears in the grammar, then ID becomes a fallback token for X, Y, +** and Z. Whenever one of the tokens X, Y, or Z is input to the parser +** but it does not parse, the type of the token is changed to ID and +** the parse is retried before an error is thrown. +** +** This feature can be used, for example, to cause some keywords in a language +** to revert to identifiers if they keyword does not apply in the context where +** it appears. +*/ +this.yyFallback = [ +]; + +/* The following structure represents a single element of the +** parser's stack. Information stored includes: +** +** + The state number for the parser at this level of the stack. +** +** + The value of the token stored at this level of the stack. +** (In other words, the "major" token.) +** +** + The semantic value stored at this level of the stack. This is +** the information used by the action routines in the grammar. +** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. +*/ +//{ +// stateno, /* The state-number, or reduce action in SHIFTREDUCE */ +// major, /* The major token value. This is the code +// ** number for the token at this stack level */ +// minor, /* The user-supplied minor token value. This +// ** is the value of the token */ +//} + +/* The state of the parser is completely contained in an instance of +** the following structure */ +this.yyhwm = 0; /* High-water mark of the stack */ +this.yyerrcnt = -1; /* Shifts left before out of the error */ +this.yystack = null; /* The parser's stack */ +this.yyidx = -1; /* Stack index of current element in the stack */ + +this.yyTraceCallback = null; +this.yyTracePrompt = ""; + +/* +** Turn parser tracing on by giving a stream to which to write the trace +** and a prompt to preface each trace message. Tracing is turned off +** by making either argument NULL +** +** Inputs: +**
    +**
  • A callback to which trace output should be written. +** If NULL, then tracing is turned off. +**
  • A prefix string written at the beginning of every +** line of trace output. Default is "". +**
+** +** Outputs: +** None. +*/ +this.setTraceCallback = function (callback, prompt) { + this.yyTraceCallback = callback; + this.yyTracePrompt = prompt || ""; +} + +this.trace = function (message) { + this.yyTraceCallback(this.yyTracePrompt + message + "\n"); +} + +/* For tracing shifts, the names of all terminals and nonterminals +** are required. The following table supplies these names */ +this.yyTokenName = [ + "$", "PLUS", "MINUS", "DIVIDE", + "TIMES", "INTEGER", "error", "program", + "expr", +]; + +/* For tracing reduce actions, the names of all rules are required. +*/ +this.yyRuleName = [ + /* 0 */ "program ::= expr", + /* 1 */ "expr ::= expr MINUS expr", + /* 2 */ "expr ::= expr PLUS expr", + /* 3 */ "expr ::= expr TIMES expr", + /* 4 */ "expr ::= expr DIVIDE expr", + /* 5 */ "expr ::= INTEGER", +]; +/* +** Try to increase the size of the parser stack. Return the number +** of errors. Return 0 on success. +*/ +this.yyGrowStack = function () { + // fix me: yystksz*2 + 100 + this.yystack.push({ + stateno: undefined, + major: undefined, + minor: undefined + }); +} + +/* Initialize a new parser that has already been allocated. +*/ +this.init = function () { + this.yyhwm = 0; + this.yyerrcnt = -1; + this.yyidx = 0; + if (this.YYSTACKDEPTH <= 0) { + this.yystack = []; + this.yyGrowStack(); + } else { + this.yystack = new Array(this.YYSTACKDEPTH); + for (var i = 0; i < this.YYSTACKDEPTH; i++) { + this.yystack[i] = { + stateno: undefined, + major: undefined, + minor: undefined + }; + } + } + var yytos = this.yystack[0]; + yytos.stateno = 0; + yytos.major = 0; +} + +/* The following function deletes the "minor type" or semantic value +** associated with a symbol. The symbol can be either a terminal +** or nonterminal. "yymajor" is the symbol code, and "yypminor" is +** a pointer to the value to be deleted. The code used to do the +** deletions is derived from the %destructor and/or %token_destructor +** directives of the input grammar. +*/ +this.yy_destructor = function ( + yymajor, /* Type code for object to destroy */ + yyminor /* The object to be destroyed */ +) { + switch (yymajor) { + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are *not* used + ** inside the C code. + */ +/********* Begin destructor definitions ***************************************/ +/********* End destructor definitions *****************************************/ + default: break; /* If no destructor action specified: do nothing */ + } +} + +/* +** Pop the parser's stack once. +** +** If there is a destructor routine associated with the token which +** is popped from the stack, then call it. +*/ +this.yy_pop_parser_stack = function () { + // assert( pParser->yytos!=0 ); + // assert( pParser->yytos > pParser->yystack ); + var yytos = this.yystack[this.yyidx]; + + if (this.yyTraceCallback) { + this.trace("Popping " + this.yyTokenName[yytos.major]); + } + this.yy_destructor(yytos.major, yytos.minor); + + this.yyidx--; +} + +/* +** Clear all secondary memory allocations from the parser +*/ +this.finalize = function () { + while (this.yyidx > 0) { + this.yy_pop_parser_stack(); + } + this.yystack = null; +} + +/* +** Return the peak depth of the stack for a parser. +*/ +this.getStackPeak = function () { + return this.yyhwm; +} + +/* +** Find the appropriate action for a parser given the terminal +** look-ahead token iLookAhead. +*/ +this.yy_find_shift_action = function ( + iLookAhead /* The look-ahead token */ +) { + var yytos = this.yystack[this.yyidx]; + var stateno = yytos.stateno; + + if (stateno >= this.YY_MIN_REDUCE) { + return stateno; + } + + // assert( stateno <= YY_SHIFT_COUNT ); + + do { + var i = this.yy_shift_ofst[stateno]; + // assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + if (i < 0 || i >= this.yy_action.length || this.yy_lookahead[i] != iLookAhead) { + if (this.YYFALLBACK) { + var iFallback; /* Fallback token */ + if ((iLookAhead < this.yyFallback.length) + && (iFallback = this.yyFallback[iLookAhead]) != 0 + ) { + if (this.yyTraceCallback) { + this.trace("FALLBACK " + this.yyTokenName[iLookAhead] + " => " + this.yyTokenName[iFallback]); + } + } + // assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ + iLookAhead = iFallback; + continue; + } + + if (this.YYWILDCARD) { + var j = i - iLookAhead + this.YYWILDCARD; + var cond1 = (this.YY_SHIFT_MIN + this.YYWILDCARD) < 0 ? j >= 0 : true; + var cond2 = (this.YY_SHIFT_MAX + this.YYWILDCARD) >= this.yy_action.length ? j < this.yy_action.length : true; + if (cond1 && cond2 && this.yy_lookahead[j] == this.YYWILDCARD && iLookAhead > 0) { + if (this.yyTraceCallback) { + this.trace("WILDCARD " + this.yyTokenName[iLookAhead] + " => " + this.yyTokenName[this.YYWILDCARD]); + } + return this.yy_action[j]; + } + } + + return this.yy_default[stateno]; + } else { + return this.yy_action[i]; + } + } while (true); +} + +/* +** Find the appropriate action for a parser given the non-terminal +** look-ahead token iLookAhead. +*/ +this.yy_find_reduce_action = function ( + stateno, /* Current state number */ + iLookAhead /* The look-ahead token */ +) { + if (this.YYERRORSYMBOL) { + if (stateno > this.YY_REDUCE_COUNT) { + return this.yy_default[stateno]; + } + } else { + // assert( stateno<=YY_REDUCE_COUNT ); + } + + var i = this.yy_reduce_ofst[stateno]; + // assert( i!=YY_REDUCE_USE_DFLT ); + // assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + + if (this.YYERRORSYMBOL) { + if (i < 0 || i >= this.yy_action.length || this.yy_lookahead[i] != iLookAhead) { + return this.yy_default[stateno]; + } + } else { + // assert( i>=0 && i 0) { + this.yy_pop_parser_stack(); + } + /* Here code is inserted which will execute if the parser + ** stack every overflows */ +/******** Begin %stack_overflow code ******************************************/ +/******** End %stack_overflow code ********************************************/ +} + +/* +** Print tracing information for a SHIFT action +*/ +this.yyTraceShift = function (yyNewState) { + if (this.yyTraceCallback) { + var yytos = this.yystack[this.yyidx]; + if (yyNewState < this.YYNSTATE) { + this.trace("Shift '" + this.yyTokenName[yytos.major] + "', go to state " + yyNewState); + } else { + this.trace("Shift '" + this.yyTokenName[yytos.major] + "'"); + } + } +} + +/* +** Perform a shift action. +*/ +this.yy_shift = function ( + yyNewState, /* The new state to shift in */ + yyMajor, /* The major token to shift in */ + yyMinor /* The minor token to shift in */ +) { + this.yyidx++; + + if (this.yyidx > this.yyhwm) { + this.yyhwm++; + // assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); + } + + if (this.YYSTACKDEPTH > 0) { + if (this.yyidx >= this.YYSTACKDEPTH) { + this.yyidx--; + this.yyStackOverflow(); + return; + } + } else { + if (this.yyidx >= this.yystack.length) { + this.yyGrowStack(); + } + } + + if (yyNewState > this.YY_MAX_SHIFT) { + yyNewState += this.YY_MIN_REDUCE - this.YY_MIN_SHIFTREDUCE; + } + + var yytos = this.yystack[this.yyidx]; + yytos.stateno = yyNewState; + yytos.major = yyMajor; + yytos.minor = yyMinor; + + this.yyTraceShift(yyNewState); +} + +/* The following table contains information about every rule that +** is used during the reduce. +*/ +//{ +// lhs, /* Symbol on the left-hand side of the rule */ +// nrhs, /* Number of right-hand side symbols in the rule */ +//} +this.yyRuleInfo = [ + { lhs: 7, nrhs: 1 }, + { lhs: 8, nrhs: 3 }, + { lhs: 8, nrhs: 3 }, + { lhs: 8, nrhs: 3 }, + { lhs: 8, nrhs: 3 }, + { lhs: 8, nrhs: 1 }, +]; + +/* +** Perform a reduce action and the shift that must immediately +** follow the reduce. +*/ +this.yy_reduce = function ( + yyruleno /* Number of the rule by which to reduce */ +){ + var yymsp = this.yystack[this.yyidx]; /* The top of the parser's stack */ + + if (yyruleno < this.yyRuleName.length) { + var yysize = this.yyRuleInfo[yyruleno].nrhs; + var ruleName = this.yyRuleName[yyruleno]; + var newStateNo = this.yystack[this.yyidx - yysize].stateno; + if (this.yyTraceCallback) { + this.trace("Reduce [" + ruleName + "], go to state " + newStateNo + "."); + } + } + + /* Check that the stack is large enough to grow by a single entry + ** if the RHS of the rule is empty. This ensures that there is room + ** enough on the stack to push the LHS value */ + if (this.yyRuleInfo[yyruleno].nrhs == 0) { + if (this.yyidx > this.yyhwm) { + this.yyhwm++; + // assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack)); + } + if (this.YYSTACKDEPTH > 0) { + if (this.yyidx >= this.YYSTACKDEPTH - 1) { + this.yyStackOverflow(); + return; + } + } else { + if (this.yyidx >= this.yystack.length - 1) { + this.yyGrowStack(); + yymsp = this.yystack[this.yyidx]; + } + } + } + + var yylhsminor; + switch (yyruleno) { + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ +/********** Begin reduce actions **********************************************/ + case 0: /* program ::= expr */ +// line 63 "examples/calculator-js.y" +{ console.log("Result=" + this.yystack[this.yyidx + 0].minor); } +// line 602 "examples/calculator-js.js" + break; + case 1: /* expr ::= expr MINUS expr */ +// line 64 "examples/calculator-js.y" +{ yylhsminor = this.yystack[this.yyidx + -2].minor - this.yystack[this.yyidx + 0].minor; } +// line 607 "examples/calculator-js.js" + this.yystack[this.yyidx + -2].minor = yylhsminor; + break; + case 2: /* expr ::= expr PLUS expr */ +// line 65 "examples/calculator-js.y" +{ yylhsminor = this.yystack[this.yyidx + -2].minor + this.yystack[this.yyidx + 0].minor; } +// line 613 "examples/calculator-js.js" + this.yystack[this.yyidx + -2].minor = yylhsminor; + break; + case 3: /* expr ::= expr TIMES expr */ +// line 66 "examples/calculator-js.y" +{ yylhsminor = this.yystack[this.yyidx + -2].minor * this.yystack[this.yyidx + 0].minor; } +// line 619 "examples/calculator-js.js" + this.yystack[this.yyidx + -2].minor = yylhsminor; + break; + case 4: /* expr ::= expr DIVIDE expr */ +// line 67 "examples/calculator-js.y" +{ + if (this.yystack[this.yyidx + 0].minor != 0) { + yylhsminor = this.yystack[this.yyidx + -2].minor / this.yystack[this.yyidx + 0].minor; + } else { + throw new Error("Divide by zero"); + } +} +// line 631 "examples/calculator-js.js" + this.yystack[this.yyidx + -2].minor = yylhsminor; + break; + case 5: /* expr ::= INTEGER */ +// line 74 "examples/calculator-js.y" +{ yylhsminor = this.yystack[this.yyidx + 0].minor; } +// line 637 "examples/calculator-js.js" + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + default: + break; +/********** End reduce actions ************************************************/ + }; + // assert( yyruleno this.YY_MAX_SHIFT) { + yyact += this.YY_MIN_REDUCE - this.YY_MIN_SHIFTREDUCE; + } + this.yyidx -= yysize - 1; + yymsp = this.yystack[this.yyidx]; + yymsp.stateno = yyact; + yymsp.major = yygoto; + this.yyTraceShift(yyact); + } else { + // assert( yyact == YY_ACCEPT_ACTION ); + this.yyidx -= yysize; + this.yy_accept(); + } +} + +/* +** The following code executes when the parse fails +*/ +this.yy_parse_failed = function () { + if (this.yyTraceCallback) { + this.trace("Fail!"); + } + while (this.yyidx > 0) { + this.yy_pop_parser_stack(); + } + /* Here code is inserted which will be executed whenever the + ** parser fails */ +/************ Begin %parse_failure code ***************************************/ +/************ End %parse_failure code *****************************************/ +} + +/* +** The following code executes when a syntax error first occurs. +*/ +this.yy_syntax_error = function ( + yymajor, /* The major type of the error token */ + yyminor /* The minor type of the error token */ +) { + var TOKEN = yyminor; +/************ Begin %syntax_error code ****************************************/ +// line 59 "examples/calculator-js.y" + + console.log("Syntax error"); +// line 696 "examples/calculator-js.js" +/************ End %syntax_error code ******************************************/ +} + +/* +** The following is executed when the parser accepts +*/ +this.yy_accept = function () { + if (this.yyTraceCallback) { + this.trace("Accept!"); + } + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + // assert( yypParser->yytos==yypParser->yystack ); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ +/*********** Begin %parse_accept code *****************************************/ +/*********** End %parse_accept code *******************************************/ +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "ParserAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
    +**
  • A pointer to the parser (an opaque structure.) +**
  • The major token number. +**
  • The minor token number. +**
  • An option argument of a grammar-specified type. +**
+** +** Outputs: +** None. +*/ +this.parse = function ( + yymajor, /* The major token code number */ + yyminor /* The value for the token */ +) { + var yyact; /* The parser action. */ + var yyendofinput; /* True if we are at the end of input */ + var yyerrorhit = 0; /* True if yymajor has invoked an error */ + + //assert( yypParser->yytos!=0 ); + + if (yymajor === undefined || yymajor === null) { + yymajor = 0; + } + + yyendofinput = yymajor == 0; + + if (this.yyTraceCallback) { + this.trace("Input '" + this.yyTokenName[yymajor] + "'"); + } + + do { + yyact = this.yy_find_shift_action(yymajor); + if (yyact <= this.YY_MAX_SHIFTREDUCE) { // check me? + this.yy_shift(yyact, yymajor, yyminor); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt--; + } + yymajor = this.YYNOCODE; + } else if (yyact <= this.YY_MAX_REDUCE) { // check me? + this.yy_reduce(yyact - this.YY_MIN_REDUCE); // check me? + } else { + // assert( yyact == YY_ERROR_ACTION ); + if (this.yyTraceCallback) { + this.trace("Syntax Error!"); + } + if (this.YYERRORSYMBOL) { + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if (this.yyerrcnt < 0) { + this.yy_syntax_error(yymajor, yyminor); + } + var yymx = this.yystack[this.yyidx].major; + if (yymx == this.YYERRORSYMBOL || yyerrorhit) { + if (this.yyTraceCallback) { + this.trace("Discard input token " + this.yyTokenName[yymajor]); + } + this.yy_destructor(yymajor, yyminor); + yymajor = this.YYNOCODE; + } else { + while (this.yyidx >= 0 + && yymx != this.YYERRORSYMBOL + && (yyact = this.yy_find_reduce_action( + this.yystack[this.yyidx].stateno, + this.YYERRORSYMBOL)) >= this.YY_MIN_REDUCE // check me? + ) { + this.yy_pop_parser_stack(); + } + if (this.yyidx < 0 || yymajor == 0) { + this.yy_destructor(yymajor, yyminor); + this.yy_parse_failed(); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + yymajor = this.YYNOCODE; + } else if (yymx != this.YYERRORSYMBOL) { + this.yy_shift(yyact, this.YYERRORSYMBOL, yyminor); // check me? + } + } + this.yyerrcnt = 3; + yyerrorhit = 1; + } else if (this.YYNOERRORRECOVERY) { + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + this.yy_syntax_error(yymajor, yyminor); + this.yy_destructor(yymajor, yyminor); + yymajor = this.YYNOCODE; + } else { /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if (this.yyerrcnt <= 0) { + this.yy_syntax_error(yymajor, yyminor); + } + this.yyerrcnt = 3; + this.yy_destructor(yymajor, yyminor); + if (yyendofinput) { + this.yy_parse_failed(); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + } + yymajor = this.YYNOCODE; + } + } + } while (yymajor != this.YYNOCODE && this.yyidx > 0); + + if (this.yyTraceCallback) { + var remainingTokens = []; + for (var i = 1; i <= this.yyidx; i++) { + remainingTokens.push(this.yyTokenName[this.yystack[i].major]); + } + this.trace("Return. Stack=[" + remainingTokens.join(" ") + "]"); + } +} + +this.init(); + +} // function Parser() +// line 12 "examples/calculator-js.y" + + var Lexer = require('../lexer/lexer'); + + var parser = new Parser(); + + parser.setTraceCallback(function (value) { + process.stdout.write(value); + }, '> '); + + var lexer = new Lexer(); + + lexer.addRule(/\d+/, function (value) { + return { major: parser.TOKEN_INTEGER, minor: parseInt(value, 10) }; + }); + lexer.addRule('+', function (value) { + return { major: parser.TOKEN_PLUS, minor: null }; + }); + lexer.addRule('-', function (value) { + return { major: parser.TOKEN_MINUS, minor: null }; + }); + lexer.addRule('/', function (value) { + return { major: parser.TOKEN_DIVIDE, minor: null }; + }); + lexer.addRule('*', function (value) { + return { major: parser.TOKEN_TIMES, minor: null }; + }); + lexer.addRule(/\s+/, function () {}); + + var data = ''; + + process.stdin.on('data', function (chunk) { + data += chunk; + }); + + process.stdin.on('end', function () { + var token; + + lexer.setInput(data); + + while (token = lexer.lex()) { + parser.parse(token.major, token.minor); + } + + parser.parse(); + }); +// line 918 "examples/calculator-js.js" diff --git a/examples/calculator-js.out b/examples/calculator-js.out new file mode 100644 index 0000000..ef48d8f --- /dev/null +++ b/examples/calculator-js.out @@ -0,0 +1,102 @@ +State 0: + program ::= * expr + expr ::= * expr MINUS expr + expr ::= * expr PLUS expr + expr ::= * expr TIMES expr + expr ::= * expr DIVIDE expr + expr ::= * INTEGER + + INTEGER shift-reduce 5 expr ::= INTEGER + program accept + expr shift 5 + +State 1: + expr ::= * expr MINUS expr + expr ::= * expr PLUS expr + expr ::= * expr TIMES expr + expr ::= * expr DIVIDE expr + expr ::= expr DIVIDE * expr + expr ::= * INTEGER + + INTEGER shift-reduce 5 expr ::= INTEGER + expr shift-reduce 4 expr ::= expr DIVIDE expr + +State 2: + expr ::= * expr MINUS expr + expr ::= * expr PLUS expr + expr ::= * expr TIMES expr + expr ::= expr TIMES * expr + expr ::= * expr DIVIDE expr + expr ::= * INTEGER + + INTEGER shift-reduce 5 expr ::= INTEGER + expr shift-reduce 3 expr ::= expr TIMES expr + +State 3: + expr ::= * expr MINUS expr + expr ::= * expr PLUS expr + expr ::= expr PLUS * expr + expr ::= * expr TIMES expr + expr ::= * expr DIVIDE expr + expr ::= * INTEGER + + INTEGER shift-reduce 5 expr ::= INTEGER + expr shift 6 + +State 4: + expr ::= * expr MINUS expr + expr ::= expr MINUS * expr + expr ::= * expr PLUS expr + expr ::= * expr TIMES expr + expr ::= * expr DIVIDE expr + expr ::= * INTEGER + + INTEGER shift-reduce 5 expr ::= INTEGER + expr shift 7 + +State 5: + (0) program ::= expr * + expr ::= expr * MINUS expr + expr ::= expr * PLUS expr + expr ::= expr * TIMES expr + expr ::= expr * DIVIDE expr + + $ reduce 0 program ::= expr + PLUS shift 3 + MINUS shift 4 + DIVIDE shift 1 + TIMES shift 2 + +State 6: + expr ::= expr * MINUS expr + expr ::= expr * PLUS expr + (2) expr ::= expr PLUS expr * + expr ::= expr * TIMES expr + expr ::= expr * DIVIDE expr + + DIVIDE shift 1 + TIMES shift 2 + {default} reduce 2 expr ::= expr PLUS expr + +State 7: + expr ::= expr * MINUS expr + (1) expr ::= expr MINUS expr * + expr ::= expr * PLUS expr + expr ::= expr * TIMES expr + expr ::= expr * DIVIDE expr + + DIVIDE shift 1 + TIMES shift 2 + {default} reduce 1 expr ::= expr MINUS expr + +---------------------------------------------------- +Symbols: + 0: $: + 1: PLUS + 2: MINUS + 3: DIVIDE + 4: TIMES + 5: INTEGER + 6: error: + 7: program: INTEGER + 8: expr: INTEGER diff --git a/examples/calculator-js.y b/examples/calculator-js.y new file mode 100644 index 0000000..a49217f --- /dev/null +++ b/examples/calculator-js.y @@ -0,0 +1,74 @@ +%name Parser + +%token_prefix TOKEN_ + +%left PLUS MINUS. +%left DIVIDE TIMES. + +%include { + // include something +} + +%code { + var Lexer = require('../lexer/lexer'); + + var parser = new Parser(); + + parser.setTraceCallback(function (value) { + process.stdout.write(value); + }, '> '); + + var lexer = new Lexer(); + + lexer.addRule(/\d+/, function (value) { + return { major: parser.TOKEN_INTEGER, minor: parseInt(value, 10) }; + }); + lexer.addRule('+', function (value) { + return { major: parser.TOKEN_PLUS, minor: null }; + }); + lexer.addRule('-', function (value) { + return { major: parser.TOKEN_MINUS, minor: null }; + }); + lexer.addRule('/', function (value) { + return { major: parser.TOKEN_DIVIDE, minor: null }; + }); + lexer.addRule('*', function (value) { + return { major: parser.TOKEN_TIMES, minor: null }; + }); + lexer.addRule(/\s+/, function () {}); + + var data = ''; + + process.stdin.on('data', function (chunk) { + data += chunk; + }); + + process.stdin.on('end', function () { + var token; + + lexer.setInput(data); + + while (token = lexer.lex()) { + parser.parse(token.major, token.minor); + } + + parser.parse(); + }); +} + +%syntax_error { + console.log("Syntax error"); +} + +program ::= expr(A). { console.log("Result=" + A); } +expr(A) ::= expr(B) MINUS expr(C). { A = B - C; } +expr(A) ::= expr(B) PLUS expr(C). { A = B + C; } +expr(A) ::= expr(B) TIMES expr(C). { A = B * C; } +expr(A) ::= expr(B) DIVIDE expr(C). { + if (C != 0) { + A = B / C; + } else { + throw new Error("Divide by zero"); + } +} +expr(A) ::= INTEGER(B). { A = B; } diff --git a/lemon-src/lemon-js b/lemon-src/lemon-js new file mode 100755 index 0000000000000000000000000000000000000000..72d006f3977aca03e6873975c5874157022d5bd4 GIT binary patch literal 86608 zcmeFaeSB2K^}xLg2`n1CQ9+}{678Zv-V!C6Ekv_q19xGgQ9-F_K?4*DBEqhO0wH#j z%I)Vf-E5*f&+p&o^D(X5 zJ2Pj_oH=vm%$YN1?k=r}jPB<3dQyMgJy&@om0RHCl(_}D<2qoFGyCsOI`zz~r`p@?!gqNtyVA)|Ju~YGZ|n=CWu8MiKT8xb9*<9egSd{OtM+Q6_(>r@+cz)ko2(^8VZjmHg84B^fCG zQqNSq>!_C^-GTUj|JUuF=X$rjQ}HFwr9bs_yXzS?d*X6wp=frA;)RbH? zGSzRvFR){-nV<*}vRzcFybo!~lkztg{;cZ0cUqqxx46fz&lq>hh-qUN z77OeU{u3OkUrxgch2#f5HB+KU>dSv`kagCVvpvP&`KtViQ^0@o6#3J@lm8X?|8ffW z!>7pKcZ&S)pCbSHQ{w zo+AJJQ_x>~3i$d{^qg;O_xGpR%rhcaiZ`c>ZyU{Kiw{KX!`z z%2VX$oPz$mQ@{^C1-#7rucH6-Q{;bmiu@I);E$-qi}O1Djb6)j~d7kOf zX|rd0CQqIpy{&rI^XE&;(&x#Og*DPuI&#NtGv|2jo}OaPylK-s zVE<&!TPdU^V-Xn6Xy<&W}!;H@>ob_MLO4jlb>o+0$s| zdw1M9$8Bb^1NxPKvYt5q>v)rN_jG=$yz5iov%Uh#!IMpitUFn1?MbbZXV0A3TP&7u zBQBEn-~&HnS<3Q++;ksR-raMpn~t47vzOrejw4=adm}Ap14ZgQo>teMNaa6M;N;IJ zaw?|sQ|XOe@^AXwf#1?4{}xxSw|B|ka>#+VyX4>SnUmkzCBMqWbEHdttBc2XS!X)? z9(VBMcF7;_;>qihpZBQ)U(hB0pGTei!Y=s}T|6aS@=IJidYAl`BMzRbF8Q{LXJVK9 zDK4HFUGmR$`!%;qzV70w>5{*~#j~tS{^tKV^sMNTA93+K(k1^n7tfPj@-KA9{h2QL zAs5ecUGlfPcs6#)FLn96rAz)47ti)C`RBQOu)E}6?&i04$xj)$N4n$}yYSnGIr1Ym zo@;_D2JOzsKgVI2`q$3LzaS&OH6!1hTB`I&M*gK4@F~47ZHhga`pYYH=;=v@^X8P~ zXXGm*)U8mEkuUaa>aQ>(KV_p!SxH8|^f~ocmXR-Qr~dSe{FE+`vZ{=H6ixbXVn+UH z>0FOzN=Ck{#i_p;8TmP>OrCQy@=<*0znYBvGt#*pPhCd7mXW_KBR{1}rFca~{+Su@ zk7VSZm688gM*i6u`A=r#%bp_j_e@59-&7{g=Q8ro&B$-e$QPR@^|vu2|GZQt&n+4G z=V#<^&&dBqM*i-M{LGDuoslnljnrRjMt=WPCeI@o`S}_79#=n$>|B_U@5{)~w3~7> z^1qb&Pcr<>VtYhI7u z&=~F3zD19(_pMXiYA^nP?DheB_*d&+#+#&PNLgZcJAVUqNLpN1i7hIm;@rl|BU zBrPtfM3qYaMAG7lN|dSek0mWGs6?Sk|4`E6dP?N0^baI0E~iAUO3#zDxSA3kmHxh@ z#l@6pJxRuZ+a)corNnNPzFE@ZQc7%5>FXseuB1exN{^MaxR4UhsPrgFi|Z)ym`ZYCFZL1MUobmQDTZppD$@~6(y=v`YcI{izrd1(x*vU zTtkUMmF^~KaS0{zRr=T&q{X$F$W`f2B`vO@gh!?KNm^V&iPjU+{|_XcFX`PXy+hLC z0!nOA>1~o0*H5BRr8i4jTt10sRC=wX#nqE|Or=*zT3kGd6)OD?NsDVIQAav>Q}D*% zgy8rIH|WM^`aPdWT!3yCPs{asrs`(a0P*SQM!nxheo>=tWhbew8yCC|fNo@OP>X1lJa=zl~nLVjDw#&t<@iJvieDuR{y4x>fxkTOUmqC z)Gma#lCW1I&ycZftt#nzE^zuIbZ@OdG?Hm%|BYdThW~(yyge%KVe&|PLc;!Vfuix# zExfgZt#ot_M&$CQnKua5a>h+46sj^HHbI3F={$OnKdC zu{R%YZ#QTA{SEuI<>#YUbn_N}AwzHWS!ax*R--qtfm+oYbaVW=Bk>>Pbc>xOudUXM zc0F!;B`117H@~f0#j9R}K7)RV4D^Do_1N?`YUBlTlOWd3v$U0GwS{i< zdSeOQjQV{>#9vUfE&eJ)*_!OD$7@PF#?ffDu@&MC9SJR~q0h08r8qZUGu5-Ohfy_^ z!c<98w|?ap&;RrJs((>`F4Fm#{>a&&zi#~N z6QLqvv?UvK>uGt{`iF$OBlP$>pY0b;8_jy)-PkAb((`&#E(7M%8a9H`4EYNXxOihX zBm3Kw*$pTxGVEVhsAo&aFB*$zs3&Eb+8_T)W$Q+k-#!g@t$>aKLY}{v3&ExTlmG#( zelq{YOMmAiDpcYDCozabQJYklIYD6Vmh9LZ^FjZ18fw^9+gCSBuOgFS?u!`GYU8o? zc0|9#vs9}n+7o|KL3)DPqw7P)Ui*6FO3lok~NU5#Q&J3NQ}&lL@NR=J@X8uU?T;R z_|_;Ae{~agM@c+LVujcPMf=0X*6-@Z2YP%@Yt{IOmEFv+!I5j#qjUy*Z14NKo?g%Y zRzSzwrwNm@x5*=LBnscNYbg^`qF4KFt$!V~dDgj4;QyrHi+?ANfhYB*liisedXs&% z?rqk$o{aVZ!2}n9uYLD)C5Nf{LSJ3!9n|&2E}wwXy$nc1@uO&O#`|YJY?Z$GWZBiw z*qj{#(0l8;=|fZVVcqnOcd=&#U*dbfrsy+!>gG9SnQzc2Uu>9plh4>PXtXajApXJO z__1!WT-_YQe>diuWm5)?niA{n^dM_y^V3f~_0+oQE3l>VG8QIg2&$B<)*4;J9r|lkx+uI#J>E^k*d4}jX^RAp|KsUS9mE5d7+NiB;M1jRmcB6H( zsP3wp%d|(E%e0kk@n&zJIX2iFYdyMw;WBalhmJX2}Fx^LgS^ql9=0M0To zuyq}c)k~K=UHPfI{8QZNVUF>^y-XO!_7H<4!ofIKy_y_A~zzTG#u6hOHa#t@@TUXZ1e+U-Bq@ zui4|7G;r;$Ebh+%m+EhZeTL*m%;HH*zRHF+t^RjZ567=Ui9aH5=68M1GtJsD!?b#n zoS<<+EnTSMpc(+(>U%GZB)=o_r>*SHLYxz~2R(TrdeQ1qp#`gLh>&RYoeUfkBS*xj z$XTb1_nsF4)s444`=j=7V?R%QV}%ylD_jVh{SJurceatJXxI~dyrN-$G$+VXpWL*< zp@~UlY|La)T3@*R6JZD&uVaPMOWhn2>w)DJg#dGjT8_`wjW_MxpFx~C z$g1%M<0rh@atX55%?UoeVVhRJUl^d|@C6y-Qr?(>Td2F@95bC~IxU(%<60Uwlw}uQC;kEj`ERDJR zIuJ3yYCo+oWC!wRwO!^Zz$-vr$_aYM*hC!*M}sb+N0csUek z(yWz`+@Z6>)mi>{(rZ`IT-UFBIN(*Vu7*N62djF;7B)eYFD)zm`1B zoqtZ-`XlMq1zsrlSbs#Q$!x!2lU9F+h@AC^6IxRsnWMfqC z-%WmVPmi|x46)qubt~kbD~4Gv`I1vWq7UyLq5IEFrlQ6QEG%obzfXsJYO7BV8V3a8 zb0CA(9IsxzB~txv#CvR<)vtg2=z!QeA>&X*QDejq2^-iHF>dku>^>M5+Um27*NpN0 zEJjXSxzo;t59V;cNsey%+Cty)VovS^ZAHLt`T_@+00O)9)!6dZ|`F7V{k~KdxJs9nJW)X!(XiDJ?&XH`dSiu>rBIQ2D$F zheIlQ#d`nQ*Q_6VRce_6~H`)9CF zk5mh_)=(ql+_PQ=t7EFLzMdSS)r*Bn3v`nnBz-bJ%CyyO<<>~U*buTJwOfMP%Hcg_ z%h49P%j=81Pw{)OM5{kWRbz+nqoEa+(sEw>$T02REc$_HcOUOBh&TGo^BK=PH0?6U z*XsYszpIO=s#}Zwg~8`!6-@S&cCqQ7d-1` z)8it^7hbA0iW@ct*j$JgfS-d2R0uH<&`=>qA+-@wv| zUh_0XpH&6bke__1W4?ch&dn@!LZ`hAtGj5^Y8g5qR*ht0x}rG5z#5;bnVPtr#_YCF zWF-=Y{GyA!8@wq}eN;@GHyzFU1!@-y0G->ut)a1Y##r-~Qcu{}AL0Mr6O}BN17zvU z#~@%f2ixBS=yKh-l?LA(YxW!A0L69!88PgzR`HH1>}+u`-y+5|{`=@!(?2r#N79wX zN4k+z$pMdeVfN7nN zweSMj*IX`!dvCGQx9G;<Xxy_;`Qta7W-K^+hac~YD`|(F`f=yW|{p%K^@-! zS)WJGPuUIFC5^G2lC2v@D8W;}4pw@HD(U0c{AZy?gaH|9#O|9Ekz(A<(K&<0mr}KZJ~j(%8O)%6&?tW_*9_(l z8kHaOnZevaqjIATqVEWzva=MTa~(vN3ZnP-v3{F3jW22(apU+HMnmltY)2-tk>S5H z^nex{ZjEhM8wfq#ec_qFgB>#y1~{ov85Hl}PxG*Eg@z3AnZDwNHF*r0;t%_VKYjryHey&tJPRF|PvIJQ(~Wg@9hG&X zoMCH>4%Dp`o+;#Kd+Mb0hFpgpy#+l^MtrSL53G%~PFg2sXpcmD`y{<3dldNPuVX*t zw*Qbqo&C*>_UnLKUBEwUNu$Q`%?RTo%Vl5d8+?) zz;$lFj$U-?|NpuD9&Y=WovQtZ6zc3VGTJYb_Vf8?jl8M;cWu9i>c8**(Eq(o|DTfO zkiU-p;~V}tnPw04)WxzH|Wn;*@A!3d3M$qVQ2Lnm1LC&urw>G3mwZC)!P)Q-gJ!-df5{=v^bk#Hck&4N5X+)vA0^v{UW=vpBuO( zV`*{sIc9m*pz?h2a7=)6A$;e;=L%yRZTqZI?ZW4xO<{3!hx|FoZ=~!Ss{&D42+;|x zUe4om<7kCBe53KQv9Y3|ZQ*XAUuU>rbwC(h^9qcsF#Si^nJ-)$jyJ($e;}Ek!smds z)I6d1+eFVs9%*P__()~o{n&@N8J6Ri^%!prJQxg=M1L5<>6)V^MsVPPWFG5tg_&~% zj(969|5X)%m$dpx5EV3z3J*RE1zyn_#*&fvL&}b_LIr({FuVW>&CDyS z$iBptRKt@plxFF~{c@64v_EkUNvzz&RDm;l*RjRwG>clqVYLccoVov)P-XRge~qk- zvfZ$X7XZOR{}%xaekl3{7pCmzbCY2uAG&!xZpLj!%47{#Kls>U{;Dba-UCdxdB1e&5xv%GWgBKRJ2YsXs5 zn(+c(`;(Gbr&Yq zt8}ngxhTujSmw5r6#@wrWhE#u%thmHB+wk~p~stj$v4f?JN_raKZe06+NqmaiHJGT zSJ|*L);Gby6`mANr8%jD46VL_bkHm>M0qDGQBajIdGhoXVY6GkX=n3Z71HIM5h-54J>mgC=DB*QK`7ydEO9#eN@b zDKDhiG!aVK1ZBe_ZZA9+y~47sDMy^J;d^7DIT=IAP&$4c@{#@aXD|S=#UEROKXwyo znCkLh)+e1d)`&H1SH6$PVOP0ThCro`KXwIWIA1sNkUy455G|`yuYDH^u)Cc7x5JHnRg#-zdFIJ;s!Du@9~6GZp^Q-+~$+Z zb8zr1jx~9lp150(D!I(0R@uK|&zhyTN@c6>E3AmRwV=e#7B5^&_P0~WjO-BtAzaHK zPUeeK{{pA}a;JWoy#ZGW_4gfei7~R1yna=EGy8ohX~}*i1>2JSU-wncFU;(}JLSLW zDF0TTXCMCK3TS;JNSj66z{JnY-w6W;Z{ zL5d%DiWfS?YoxfGV)%t~`!#%KTi}b%cCRGDa#a@LO|Nf^Ua1WX`Kz>n5&slz;CTOB zZQ!r`bwFwZA6MQqZQ!5%kE!&({NN=2MSmmDjIAiX3%qN%N6pf$2U&?Yt}gzLw10yl z&+)p+QJV1{Ab$l!92KlAY|c3r68jOiTUH@YDaf^-mU?D!J&VCDeizv+uOG z6L4U59F`23 zs4DN*>X%Bd;!BuoHM)5#8h2;(PBc?4F8_#C`isA!F%_A~Ytr(9-{0~1pOg0a2cS-9 zLFbpjB*)JmG1@cz{2!|=w0Y|hY2$S9@~;e{0+U;03WN+h!@$YlmOG#9E5(Mb-cr=q zu&w5FX28QlZ=mf}<*%53c2D3N8W*1K%rAFc?Ry=yirO%XX)>`M5M?KJ#IG=OBWCI4 zKvkM|qQGI0_#q<3@rZGP!yGvXaoBNLhdwH6YhD%`8*e-<{(eZS*s>xRcs-;=IGNp$ z&-{U#q@ZaOiNe_)VkFT2;N0Yp1WUs(L10px<%? z+(;GiPnhFy>F*oJ%3o@6Fu{56caWB%#&`lbEw=xH7k>tJs{^}MU@L)D<0S2EeEUoN zen{$D*|)jv^#aER!C^T#Y6V9xX|L0MMc0kR9Qxx6%3YOv*e$&BztKy9(PiXb|0&eV86T!Y5qQk~|CW2P~tnkz< zwv#yY2Jcl46UTlSTv$6F2Ai||rQvw3 z4;OJYjRoUZv7cD%_b$i_n((~$_5WldRA=3zdpF8SKKi1Fb?u;>3+UbPy)n0a3;LTC zXqet(6>J}1H@AO~p3m(Quzj*eL#bi|>kG_qd}%KFj2hS#t) z1h6En^0bD8s7u^R6@0{*wx-hXeJ~r)0p{7}$Nc z%Zhlcs4axN)?moPL@)hROiVmWt<-sxG^ne9F0KAi@>K&g{Lj6r;j%Q!wR8@YO2Y`& ziyW*^CsJ5%qc-!YQ!jcu^i2nuB1{I}Dt?xuGV<4ckZ;wSTJRilE7onz#-sQuB*AGeybk;0Y4XlqLovvMVvW$%2@f5JRWWhX`kI zYQ99n^b`$Zze3YBiUye}4h65W^>g&5ZdT-mh~2<`lscjs5;mnN`keW4PP*(RRaV9k zz+tE8xQwf+_IHYyPZhZ;R3!U@bipGT_@o@xF;B9a(f|t-Kpow4M=LQ@<*5115GVSR zbm%oo5@9D%FW??xgU##(X-q>>^+at*;INr}bGk5>!U#L^T(-6ka1bKQ#vO%#!Or6k z(hn6*G4KseEM7m7GEFv1viNmZUt{?9QV5HW$uJmwlk&i7yjYK4?crPko%Ri76mtMS z`Izjg)vBE0=a>A+X7+Ct24}y6g!FxYbm#p}E**8^Pi}<{%%M4M6^TE|WxrF_Bef3d z#`X9Y*5>Mgtt$ScgcA89cPxWS$Ggy!hP?1)`-rehaX!CGe*i_fyj7sj=@BW#?+ zG?Q|oL=MPeBquth=%Fx&t1xe8^x~9U4zhR4WH%?|v3-N~3f9$0EGz;Q+g)i+^|R64 z6Kf93$)!ZIWQWZwl24@c@j7Q8?i?q%0VVe}NwkQ0bxFP9FP zlcNE@k^er`jVtM4Y+r_dm!Th^n|+ChWT8q19i{bm>HnQR7bP9?ZA;yt+*5f`|%ldSeGmH9d zvDVc?K_f>1uL&C9yEk)KID)Jl(Hvz@uJzV^+Y>un)Tq`SZDl0evn;2Zs=rAb4BLfc z;@bV~hseVK(52*I%1@AoozUUPLlbZKb0zFVozaOSdCf*?UTvCwxJCMLKxjFawI*U< z?Zs6wL9!F1H^v4bus0`Ts!2spux_M(LF^OV{HX#pOTPgXOvfRQqYE1CU(>62d5Jf< z*_E^4O+}4iqdgsj9sdL|d$v)iuu9*00D_-_jtcWm{I&$--o!!}HpdhOtqJWfoTi{= z;GUfNh9}ueweQ&JOpg*fNX5W9EwUW~BY}@%d!CC>!5WwS^TS7#3xpN;+KoXg)VrM= zV*||c z$(8^!Ir>|yH`y7q(-mdEDdVa%Cd3!*!CS^4mCytoDhHO-rKEyp1!`mI#R*GQ3&936 zCL=VGdcgDmCQPhYMPOQ9?d-5wkw4ZPygR-GhuWL05JaJso&}@|L6a8VgktROB?O^u zbmyftk(lL*i4o&4+}iFC@;B+2vE91C8<;SD={uC|aK}x$2w)Q-Ge2fx0P7QNC^t-Yz_g-i(%%M&F|~HWXB!CY*?`Bh2p>f8OE(<6WLPv9)YTRxu;e zGu$KO9hIg8ZR}5UF)yQwqCeDV6xTbLgR4cv-SD_neBeoG(TcCh0?~^(N9bFY6pL8pEs7Oa&nAf`Pw$MdthB%uB70n)csl7e&FFuN3%U0+Eufw)}EGrVMwmc{qMQz-GarBgE z+=F0!2&~tbBU@32=1sZwIH(A6mw^?Tm0_~4p`9R+{$f*1O59IgYW$cfvRZeo}C<#(kIwN%51uH)ic8sKR zu_GfA0`#d8if>7~_pr?iEgK{{+BKov0BR*CR%w<1ebOvAMCkAf2(vK0W=I>3JJnK6 z>2m z!`5W4I&u@c0TfTtr$}|@eMh*w9;zRdB$t`pz zh?!U7O)P*b>`%}N`zZbJ=T47z*#qfJVrY83DE+yEX&<(&z=4Gx_vyeh3{cy@!CA*} z)}6NC`?5o_`W8G!jq0o)>fSBsdp7EB%9yBEXN9V~ zsdDbJDERJU@;U!;%d@K7_>bpkyglRoOgbLfHK}B8^0GuF``}=wAwJ>^v2}|-mw>ob z`|%17{xK}P?kpdSBHI@G$@MZlkF6G=Aj{rPT8YF#2qwPNpF46uthsKn|I7he{Z8^+ zB+)fR+eF`-i#$qne7L$9=&am&q=~JyXJl2(4p+A+!;xYA{v)9GzL!2Wi2YY|22lyo zQFa6D4O-VouxE~5y$M5@jayD`TS)h2an2fev-X=IE-6)SW`qjBgEkqsMfbjKtYxfb zKriiQ(-(0szrBtbSlt{lK21^6Fb^iWc69YI-McFx_N~&}(AsbX33a)ce&}YsUpSZc zd~$j5p7p}q3_H<2hbA0+O8K!uUXT>=VLyKrCv!@gg_Bo7S^P*l>!Gm=%BSIlw7)If zb(YCd=r(LPcBEKIq>Bne`kU5(xe&(C_0^6Yq`f2cEN^`o`Edl9vOgtscQbqn4?o zYu%#g`6F+UtCR=U+`{PE?~|W=D`h{Zo@7|tlNj+8V81nCILvb7pV$YjUU^rDF#TM1 z*G{0991}u^w))#;_3hD1((+1cb`1^HjW{iq9gJuBakj4WLLFeS59$`RpBC#EG#kY2 zjLI15uurbqxbsJ-6}!g193-lK8alR3?hb^_hty8oIB3n~TEcLD2a3e2O4rs=UA!6U zDd!LF{UjA1EAhL`KS($4&cXlRfTx|&_{B5wkhp{myrBO(Z9^741VQs4NG6iQ^})$o z)p(CQEtaKnSC5dXS=uC1^GF!+G1lP=8RO;H^m4A9dPn+WuhlK``eSbd%^Qg~EglF` zwF`zp-R$vHbm3!L7*CKnd`LE8)5SdOfr+CFnwH`m*9q2r1)Y1vLADu8LQw}_)Ga2L zAKWVe;yPUdgpg9yXCeWCi@^Cd&Yw$7SxU35A8&$J=6)ks{*|(zP z6Z;g~U3oEo@f;w+%7%Z7!DjV-o<($3eQz?cDRJ2#Xai90_SQA%`*^cpl@Wy(M$BWM zE*QT>d(M0{$K$uo)2Dh_Mt6SpbE(SIxOHcO6E%ejOKFEVeN#lfE%UPD= zG5UT)jy0dClci{g>{m(Y)qS_{XwJ$-JUKN(!KJQBoG!G+*B7QVobW?y_?MvMw9hGf27{Hrz7%#Q3v{|H=rVqyNPwO0O=h z1SDd#vUus;OG`-LG<5VFc9yo1SWynRmyK=bjC;TR*ng-aq5sKlLGOl;F+`(6h4E41 zB1KCb1j0}ur0rGMXk0@!@^KHsu&U9KkG=QPgjIT>c=rAdx)pGv31C*8m~!5V`>-YZ zA7sg2;(bKSE&(vLUZ&$w-9UZgzcSZZ<;x_Bs~4_DtwD|z@klTawnb-WOsQ5d!hyd% z;7&|?jdTb=ZDlv6G3I2Mhxy(;-kr&{mhO`qJD_%8DUAW`w^QA&U=Ym9Wzu#}>#yj1 zPM~$=jI6ub6D#JVzswWum#UiD$z3G%v#GC|NgWcW^qE@!UsPo24q4Bo^iUVNl8*|# zo#)r(_Mh3Gx|{qu;UJB4%xW`MXq&QurO$~^VJ-e*tN76sN+SN@v|6TG)#_iM82S1# zKN7!{is|ui&-2sHq>x$bS6-TOtKZdn^>L%0-@Zgp%NdQbVh-T}BIi%^aD6P7ITEp z_8uY>e;4I=wFZvCnb(VSPBLR>R0>JzOcV3LW8bX!R^s||xq0eE&-c`gwQZp>-u&1B zBNB2%BvLI((P!T&OW$0K?EH=!q$ymqiI>l-;*0mMUC~WQu`@V{hHN? zys!0oTea2m+RN(OVh09*e`(RS!^8ckTs6?dop8e4l)MnRA|AueSG00}4J2}7H#B0f zUyHMU!7*~RgsU*-n$ z>(;z(+!cS*X!11a@g3bG13!f(z51Blm!`r-PsTbLE83Pkt3!SX?cPUM9sfG9)2g*6$c%L`NAN8`h$~a-AJblo>X~#XE9O)@?_&3yU=e>nZJ`0_d z{R}nPPDHejZEyWJbJqQiI`KsXUKL$Y8x6iFo>Y~HZ~)A$p%?vf4NBA*(DDF?@lNDBuDiS0jp4Bnft$pR2!Z<+!-c=w7noI0d#*Y}P}>)QI1dQ;5c{*7 zZF7LuZts0os)KyJJ(5@I#CindU$jSOv5UJEmk$kIAUwciKb>AUvou;M^w(b9iS*JV zfFn;ruS2Xu7vu=JLXt!BYfeK;-xQLXSIUU6?S^licNW6o)Vhn4-F^|s42yk5YCMR{ z#|v^iVrnX2ek`-Hf(5?Pnn~>LP5HWTwod+In^b8{-2jbsFq}{n^s$~-VN?}{Tgvh! zD{Rat&>J?z&PtBLpA#?3=U^e6PfqkQU4k|^&pISLuCY1fB-C}p4F!w0kGR3_+9|+N z2|FAVuda}Yaftj_lcL*Ls^`#iuh&}di*j`!^7-)h{*vF<#MWsb%6hm7)mK|TUhJ7lamzUhcLuS}g$KH|nJlO>;#F|RLUxeXZ2 zKrT&CCB;QnHt+_`nle=QX}Y;EHyo%bi46@6+#VXZC1{itDrd1VzoSNqlZh*tY0`Z1 zAN1uY>(r`S&r;Iy=)LkNJ+OcoG?Q+n=QHZpC~awBgW$@sSO?Xm|0YLGJNuVFiyCll zl-TdQ^_Al*dopu@9|MmsA6;8$-G~Yf;$!HuC7GqxCbL&duuw)%=TkU=EwO!(>S+ZL zoa zg`7%p|KOZOSbP=cb%g|q$M#m3RSX=V+&`vH(3*!cwkBU$$E+pY(*0-Zwa7<)yyn=# z=%vWeU*X&u;oLCsCa~wlg$d_Axf{+6`~+933NVx@q94Au3#=S3q3)) zN~50O3-Yf}*Kj*UBKQr(GD7B%39LaCRwdE;dz^lRR2B5?+{1CP6RD{+$VZ(rgxXmTingiGoaxm_hj~*0om<8x+Y^%8IjCFg>e-xBQ@@SNEI}_}Yw&J6>q7FE=3V0vvwL+jI8iXjnPYXCkLiA0FtDLEQq&fF zPE>XBB4>WOU?{f~+(5Z!LxW55Ga6G3iIL%5AGmA$(v51%Wv|Aem!Zw*Z(`ivJ7saX z;etT%kJfuUd@u+V;yua2-`s<3E`)OlXI+)OP>`h7KjK?}#nXL7$SP|u+6L!)iru6e zo9jlNQzq9{tgK}) za`m}bOZ!nMW$FDtU%c5OTjgjEN6#e0-<#3JPVtF+k7K~^ftH|;Ioj%;;ut#{%a2V@ z(N0Hht(-|>$5{PL9D=K51F+xmN{lC-&^RGREXwY}?GbDE+DdaMYi)F7rExHso7V43 zC8jQViF`~7U&%<=i)h(mRTVX+#D|q71z#QUXN3Y>yseFhVd)E6LEIjMK##Ms!Wb27 z#b=TvyxDyt8Kh>BzZK4|~! z8PP`K6xZryDvz^5cgP+rBp(r}vI4qM~jz`jP2@|U*Hxs9PM zkCOx4?Ulw6CNr_zW!?3r-Q8uJ!V*s~)*Q}F(N@1- zXsfqsE47e!2fNL^p$k8i7Ev6h$x^89b?F9+M|^Ew*wA4?Vc4jG8{H89yf7m5pzIY0 zZl(*N3qPy0qVIa_JN}B1%a}!Ab513p5WY%2qmnCMvZw$siy=(>m}!`nXRMv%eawrZ zorx^+rEUo0JJAO&DY-U_Z$s^<=x&z58A>Ljcj0Yu&M87x76RMk9c5+JiNu~M_C4Nz z^D676_6xU&I~p5&E%QItJ9#Pl9b$?|MEmRJAohN;!SD4qB{JLx&xlhg+CPbO$2~;m z*`I_S*WZc6T_BO`w?d|SgmVGXfin{v$eL_!GLkYsciCJ1l0KLD96nJ`oF)6CgMgRw zmSy9!WuR4@j}Z3E7XYQPIP=B+B~dF~=9}`zDGu>{LUfn;T}&mSN;|p3eH0L2zO#0) zT->wj{TF5-8PleV2kpPoV_CjlqC9Ngm>)KKG}o&ZvANDN$5sXODy=>$ z71%8?zdqrD-Q9gE4I8^(H(zL^oe2>nQqJ!C=Fe3d=Rdu|UY;1C;LX8FLHgPp@(=MQ z&v5u7366#LtBxWj%AdVm<}O+`Q#Z9;{%@l`?pep!2KOW1b}E32m$ zn7Ocr>luLN@y-_mzK}h2zVMFqoh~A)5rZw1*jtf+->=obhjO!@r;)IA3l^e@1ONNp ztms9Rf$1e$y-q3kBmVZbtg*gWPTkSTTEpdJGXCPZgciqUdiWEu*LR1FceIr=ypR)n zEKTSl%EX@PB_XU0FJok+i>6Q%Hm^axH$_k9xMBz+w-|uTZ$;_WeTx6WUnZn4ImF?c zj9kN8(k+>S!MerSkX3&TN=a%<%pEIU?QNCj^n$Tgzx~3H3W6bwgpR`@;>E<0j}ye6 z6VGT~8@Z!A(4bRRonILgezXIYN;VRy#-he8NH{p$;1x zo(GqU-(d|i$LjkhRZYa+%qe6x!7hbECxAwfhmB1UQ+$Wdv8hn#7>UGP3YW-{3Nqh? z7HP{G<%^!{jAMsCL}kYg5;;COnvhdi6^1V%`db)0Em07XWIrOH?Viv`xA86*ue}HXTd$nS+MoOjVWhAlT03D$kUWa zISrb0Ypgg8wx*m0`XE*;LgxFlhrO>S#9giQzUz-h1{tS;P3*%Y^t<$(Sy-}7UOP&| ziQ5!#9r%@$G9kqvTpmFwv#{cU8C{^{91oLRP$;Q14#-_u;B<}?%9hRc4+|SFN6?jp zDf^YJFRSNgW39oT3tI8>u;%Tx_dMxHO@!Wr@k3Vft&G>Sm0*xvcA-c_nZ){)Bwk^F zI^xgJcuHCrOoqG->SbsdS9h=(-nWS_kl{^?7k$t9{Uf*x!^Y^6#6@JJ5!v zay{{RMmgR+k)cXsH|?yX9o>|LSA^8eB9!(&jnW+z2sDERWg^Q78dEpXrqI+&e-V;% z6rYFBW*r$@rPS!awRW}*X`)RX5wSntOMsrzos7w!7Nhk(BqtTB&3wZ*^1!onmPJFx zU%G=L=@T?Y7baHGkJNmZQ%ydLW_;ekOT|W6HAEeSm`iE{UB8J`$HcSx{(O<78On%E zxAfeRNT+)@IvfpGZ*YaVxjswQId7s6k>RSa=*}m2O zmCBR24FntyAoc}VzOpp%H8}KMdzn@*-#vB6$@0l9Xs!M=+BWqnH7MVk38R02gGQYI z=+7-&ID5wYJ7&%aK7Z#tjy6L6o7JjRHC>FdiL%g1<`B8z*H14iX~8 z+uD#;cX^iaFH4DTVv1@`?qy5Q5*LaMC10+M{aAVS_{b1B@VN9*TC#r(zQkorD*Lg= z!Ps%1O?l{aMpdlrPPR@7nTWxZ_k|J}NwK>xVo70Y%&Cz_aos{^xca>&Ffv5WbsCw? zIOn4N2p`L+!VHQK`ECsNN30>!se-c$W{MK<*mv!cRr-+JtR%k6{ArzJ_WJ;Q)Kt|7 zG1`|ILTO(mRm|Uu0%n5DhZMF(I1hI#Pup~*Yj;>!mTe0s)=~~I(E4*b6N#=~<>PvWU02VTT zZ#y0B?Lfo*lM7V!2gxAxkY!*C4L=ErkU78K>O8;|o-x+oD=Mo$62&`7@QbOXhUn40 zQn9VJl7P;jbvJZLi8Yr0Mjmp$ufnGQ%(Gg`{aTFLjA z*ZYkszWTQrcSh{LE|#iXuE*oi;6j7P=q9ci_C7cTVz}to$*^5mB^2V|>49_8Zu%|% zyojSa#MeBy8mCT4M)mc2u%94dpxn86U0r&<377ewU=Dsukd0jiC(KYvt7p$JJ7Q?AU@QpN%%{c}Em z-KqxX36(Q~Z$!2iwK?_@iz5N$WqDHZn~XcWjlX|5ezaSR@9^rm#$B^1g_Vu5kDR#q zPoq=$npU#Kgt4=)|g` zZ_?*Hl~Li}3-6F+=gSH%uHAp#ov{XtEX6DO1yuKC*y0eUz`rS*7=Z zf{xe&g+UablPaccY%ImT(Sj?rtdTdz?(W zg)D_P_GCvd+w3W1yS-Em#U2*>uCP;A>C59_u|vWrltXGPYp>=YC0xzrt5DOCduk*E zp)NK^9Zf&NotgCaS@6$0=Oy?2z=7!*G+MZ-ve*2n$VY|sJ$UB`V;Suz!Psw~BLo|K zG{GqeQKA~QPyC8ulrKKIC3?E#L#G70nhmu#X$`gCr__;i%cQcdl4he!!kYlZPK zXLTa6_m@&BA$lu4ZjNR!?s5cmHpn^3>lK||5jYUNEyyLCFyBCma0cy*&J*L)?9Nog zeQk%#nu6dx?TS-9L#CP^EW)urGkilGbm)w4fKaZnAEP4_Tu6c4lRVJJek=FgQc}9d ze(aa&@pD^@E$=#bd*@RBUh2cOktyDcxh=)dzks6Cu$FWhR{3gHUCi$eD;LU9=;`rv z3RB~$M$#%iJ=GD04+B-^N0>HfJ*@__0C7fhm{T%1MOTk>Nnc8&E3!_fj*mO)Ju6fj z8DplD%in=zI}>6_80m~)vS@{?S9aX#4A*~t;SSgLWVp~OnZt!%k>QfDGWEgt9F@)o zwa}|FNY9EswTgcP9#BS9Z|pn4hEl=N?F?3wQk2R>vej@2zcR+F_2*w2ug-df$#x|u z?6*K5y=UQ1_r5;^>2U3NN<(IKYAWv0mdnZ8R}~VH5afHYGMDT>2x-TAMQ?Z1&m@lW!mjq2t+Y>;{~9w3%@bSg$o~>? zp2$>_531zZ)2J`|Ku`->cete$)r4I^xub#pkm5+uPJ7oQX@)oo=s`{tyGrl?TCz7# z|Em|FS!^IeW?U>?Ieb%gnps|8{&P1um?z{d??D8nZjLJ??gTsa4GKv1*~&gN&lAfh z5+`sZ*(YU>d|Qw2;wuxm^Kepdcs3Z(WK3g*6Y3NA~N$sC8;&CQ8dj+mrAW!yp zQoX7Oe7yKdX?LvI=Y4MLVmIO2Z^YhJYu)>|Di*3b12XC?c}dMPc0ZZhia0kq%|(8R z1X`o$Ctksts@$uWLlI++Rp)GW%z8ZoQ!VDE|EQGxX~m6c6v$n6CQ){xn>l+?to zlshGS`}l}Y=K7~8zS|#x*f~y+h0_-Ms;e9o3$;riO|CN6DEeqD_SY|wavr4U@fV&E z_ONZmId0D9WL#VRXI_y3<&#NX=Ew^dVAvxNBk3+XuVMDuH>J8+w2dz!$^eG~@7HoE zsl1Q_^4;R2-5bQHCW2uXr@t=N%EI+<}{4P)wr02=8ZYB6?_V( zz8sQ?OU=sJET6k814QD6*!|REvJ(f|AVAzr-%F34b9yAUKB`%{6NsMYD{SZm#}Pcs zU;7=0O1~{5k&uy4A@aLqPW$+R*qdSX6DqM+AgbYA9KQ02F@#}|#h@vNuu#kJheD2(s>d&h*{>M3?-t=Mj4tuB4 z9QjRk9$G=9FA6F8&9J$E$jr8AU%4Yi(AgMHTyD%teLp~jeQ~}Y3f$pex;BDn@+}_r zySJ!Q#MAI9NDm#Z?XpjbXc}*KV%f9bhh=|AlNx{*_lxt~)n^93;sM(t90@4j3K*GC zEcibfQ#T@Bmx>8oaJ9{f9T`r7NGEUm^*lC~ehhLQO|xH99zn)oOU%uP3N4v?MI^rKw~U;RBO{;Oa_vXyip zqYQtpqTZG3YdE>dl}qPG*vfyhw&CBib~ zFjMg#!rx%;N)ZFPdLJH9Q0XiKCF? z*m7FKuiJ#8afR$p5(9W=a@W(-6jv^`q*#3!9E)=Q|J`|%$O72m><9Oyal{&#jroF} zr)+65U5}ldPlci{G2%LjL7TilM=-~<)fDomYkfyp%yCMlZ+e%r?z zx{wHI2`su84HX?xY3xmEDLuP1*NrRUJG`9bdS5=E|tdja#ci8rEyiIu^>;dXE<4> zuTWnUR-a9mGo3sYSM;&O74adpz4lcQ#r6oJo)4%oafpV@mYAZvuow3MCgTMtbkYFt zwytaEzF{6gKCk;a?1it4FM5~(k@%u;OLHq&rR9E=)=jwt89C@H2c8UDk)DOqS6oXN zcBFd+pL?F@<4pQ&;^5TpEk%s?BtXYwjVzPcdu!x#+Um91%5%I&2+-LVx^N?*R_n3q z!`8f3xi^v(F|H@h=&UeV@wFUd-n>0*T)!o3jI#Nuu`s``^^kO|pL}#s(PMY}v9tMw zy>xGh3OVBP$F$tgg$b@xRa(ROQISd_jxJ8jVdZhxF&L3t23^RD`izIvcAUt0>i1Up z^kssWDGVm9{yx~?#QQi=M~hU{ku%<38F$354sZwQx-M}?eX-Ru{85uQj#BYQi;$}W zwn;aXT$z01M{cJ$adq-8VwQ?qBmU^O#O`H&|Bv%8J|<#bOO)LuVxfz9o&G9Khd#lj zRA~-hA0|9UP6V{%QpZle1FLX9-@_CK{t{oHCNGvWZz#aRX8*n?$r(r;_^``m^H5z- z`wD=$+@R0*$nUbT`Qv~>HWT(!tlC7K;9k-5Ip)jb=!GfCmNeu0^fkH3mF_$-za>Xd zK1@pf4V33;_kRZwV?CTpq=i=RM_}y*j92PV?rbtDO~2nBMLXvFJPB~4tt#4L{-n-% zIvV4n_~<3Vl#u?@QQxj!z^}sb({ETgBBtM#RVt4U-{#vnm|+T&nnRz^SOkB+iU{6C zg7X$uv$gU$7DqpuV|}vmuxO!Wlw6CI69dV4kxo16&b&U~`_Ju^r~|X(@)G#A*6?Wx zHu?g`3(oyAuobmQs{VhJ#uHibqgi*aM!`-)o345T^yuGj-7JsN%dUkp`|lM=4w1pv z;)%Xe7q(QKkLJ&oDu6;zphw@(dHQrH6D{^}eI$4LtzF9;nKX z7>$Y*%K;1ChrKBgAaPxaXKU6#>&)@=EJCoUjw&-a;ZdeJW1q9?&AawX!q|tAyVir zxqARX0MQdIWizCD`W%U~$8vs?v}jY>F3#cz);T}A0Qh)~uUqVNzP^^*GHM3=m74kD zbop4K6a%j;eMM$n5g(1Z9Uc>ZT`_~gJTpd^#tV(WG`wsLN9=fum0bE4yXS8VkZDmwRTe%p`&(duPz#O(af zo08WZf1v%7A0f&sC(vP{Fn8fIBBH5^U%e^Ufkly(t7#l>l`Lhm#COYUfs`8MIr?)D z-7dY;4MN@BFOl;s5rCHRGI`klIV^naAP?5w3~M|3Blk+wG3_@%CBX$)WY`<(qK?~F ziAqe%m#9nmx{Uj27Onnino(}gM!u{rnzNrfKe?J1DB&jni7jAF+#_?%*n+Y^Hkggk z_uzGGkBEQBye?O+9K00?ydT}Bpyl_nfL45T^rn(4F8UCJ95@s^BNF(yOna!gtnQ>Y zz6-f^WNb$$@M`VP<<|v_my!=CI>e+FF~ra)5k0|&y5!R+oULm27s5dCZwvq+f)9yx zu+kde#6kC_jQ%2nj(Bq|p87RD_P6~-z@3XJ$nEv4B-^5=Ba`?!p_@+>x&-0k1Rd@l zE>$}7Gi-WMuz5v8WyySc| ze>(m~R$|dkLButBPU1dZ1i)y1g7N1l5n~gXi{9>1N~uhDGOTP{}^DAb!|94tgtaAPgGA{qD|&- zCO)nHL(v;mK9NoNoSFQ%?1zFq`fjF(^rk4Woqtkx2%J203!MDK*&Ez@gW}{D6Y0Tq z_n=?-USHd)+^ZQ_enU%{?J7)XBRR_OmLRgZQ#o~azw%V?e~uzKz~Hl-_E1phX1q~4 zXBA(;W=}VMrG8*g-LS@DkWi4hcALP7(fb$naO&E##}p3kDuoJuA)5+I{2E<%lwB&mGgJV8KphjM{jGU6P1im z{P-}fLB=mwcVZa&7S|MS{4V49tRwLcP->6+>u3!hZjIJ3#HkU@jvrqf)1dDS;YXI% zu!qh8l1Egq-^-`bozEBTwX?P`M_yDxe|^KoI_+MuONo_P0ZK$QEOry$6z4bi+IKti z9q$GwbrK)kI9irq`_Q9~3nsiogj~YXj9u}v60iNF%5Y{vM%>Mh&IRW^>t*U>YzfnL zhr$o`A2Un9{!_4b!g@VelTXsM?jwitU22v$Vmq~!8{0x-yw zobL#XlKh_JM~t^YoEV4wiysrOvuevOqzP>IEU!o&Klmh znCgFk+{&&;=ugZ;^(nn6t^s~zNap^_ZCSc6)+>ID=>1AoJkZ=J&~HfWHLJ}#(k%N+ zY56~?`W4#hmz7M(ZvbzK`4j7?2|kWO<3~^9u9s8>Nb@$y5>o#-#gtqWdrz=Q6sz%a z@IM#9?S%Y3XnOuJ471^>{5hjfMp-I@_nM4%VZQvel;z7qiN*k}z7$F31VUT6r7d)w zmz~LI)W}FUejamSTu4L(EWm;Kc<`GCh+RSEyipF(VF60k^6z?afM3M5Vcl9sk++DSTrNgLY~ z!=I8&lgS^NWI|@9O`$+Fr8K6&uCgwxtXucM^l zC;OpE0KX^{BHc-%N&DARP9_k?q@2TFRuaTQRne|bq7CGQTa#0?%jF(s+_xIV=g|ru z;(bDjwZc=AAwuLNe#IKDDi7tGTXe^-QTQfW#=68aMC@1^YqAp}dM`03FW*tLt5G^316okRvZ6a2O62Doox&}3KKVDKG_>h`PU2>v zomy{F^p_RwQshYf&yY3P+&PIL*hXH}IB^JhPW7u@E}TxuZO-em(;?uBej3_HF(`%T z9@b*G>r{7h)M~uelX;>;K16+&qsGr96uKN$DcbcH6gfrqJ6okYJg9_QsS75DV&zqV zt|mkOMz#2?x+oQTK_i;wPLlB95cUGiVI2xE=nXmF=c8%dRFC*(5mYL5=pWwy8r2lv z11%I#;seSExtH?zfl3!s`!jmu;hOMDmKPMQFFrBe8)bWGqvG>UbIi}0tH#9c*hk~NPA*B1w$B@P^(%djdEQJC*EX_Hv}LKclXW-&}6^+N`U?#npG~j7eSGWTagZ-YkG_* zU1N$D-SJu7lTtdGGIM@nw81Ls#BbkZJ|rjAjQ**EScdZXnciujEvk1>J`Y05>WhO< z7&}6Zs$$1Z+^Pq0pL&xZ&J9{X*77;CLvpCu@@%Qn*N86I9<4<0p?A90AIiR|#n8JzQzhT->I_a+8z;K^3 z#h73)r`UDZ?~+M*>9d=V74aGE`*!{T@HQTi%*JUsz9;?D9M>G8{dwXBA`IWuT;OT+ zww@mhBMr7(i6Trs-*<1NUi&7ktwauGVZ1;1aZWrR&c&&Y=!Jdgg{2ckyY>*ur}XST z5Y9v6QUyV;AjNuS4nO8(_F!^)aOAdJKB)PaKj()M9mJ&%Tl)TkHJYKHAlgTY4%R~Y ziPSoQHCcC2a_K*c4@I<%Z}2q+=%*+nHPzhMi9!}`3=pbM2KZHE1{;cYH4zV=;y+KU z~@f54;d6;-jj8Jf_Ti z!^ha`JFBM4Ck|ATir7kP_K z0%-DNOtbL`zG@d>nET^N5M5rq;^1{}Q@T9pNIA&s0$dweQQri}5%1WisJ8-jL&2 zBq#AYIy)s#h)SibiRAVAi~P86_7(m4p`w~6kerw0t!EixRGD~<>R36D1%X@{d2+EE z5YwNsXb}HM;t`B&=|3`pl~()#M<*FS zq^0B%X;G~MOa=zI%?Jd{tl;PlkRH82Px;%d)uoB08MP|7+2~ zHMv1(i9ULp9SbrO*iV_n7U6?jZ0M2qW=*2uX@*A3E&F}iT2(%QT2LT5>aiw|f45T^Dl~XFk3tMR zCK|V&lAuWWzM27h;Yu%}vT+@1vbDHW12rVLI+gyx#XPRz5nHDUDQchu^mfcy zxk;3vbL)m#ZR9XYVDJ?jqUO&T8mk))I@|{HCwQ$Ln0umb?r6@~;PHvUiGrd}hUlc+ z&wpy@Sr*~zhi76t&0M+k`~AO3)Dne~5RpWx?-H*_{*jhsee2885vC()&-eM5^>P06 zj*n8~$+jN};NI=G^5JbCnzvEcG|(-2N*;w}s{Ps1d}N{yH?3lyR*gUT3*iNKniu#G z89ogWxr#-nKu&J_9C@kR#IZg8iXJ-CEGB$iwz7D)+g3`8xHTiSxX$6- zL__L3*vb#re0g0F7`{ z>y9u@X(C9A{hO$MEp7a&XNel?Rg7esp>`ERS;eqNiyN(FR{#?^Bo~GE!q25+qXZJK z)3*XczChv?22+VTNR&x-7S)wX?I%23J(FrXiSG%T>UJY%T}87~&&0xCrxi+9{nra9 z%$L|Xkv^_|QZ$LI0TkWw?O(S8vXf6rZw3!a3nlk>GLWg2NU&VgZ%5)9`Zp?HrVe?Dz1U8V zFlUq;Ka`o{l_X7{XH6RuRGayo2`CL0_=aZ6USQ{KbW{G@i59e}ujt!B=bd%|ypvd` z)iV?HC$J8+?wZ+QM2-OvCvvJ{d%UzSh4%PG?W##hc~2_ka{?1#l{aiA<$iC<-!kwc z2PEoCCHgB*qAN_Id%TGrB@rHyr=bpLbvXs`jS5*z7#}31ey=`Pimd2fl%iYXp=%4X z&r7#0RTw9gsMwR}f|FQ0INFqA@I|TAKg5Yh~UtN%o5r7o3RcBfJ=OQkOP6RGJ-Mye}Pso$4M{lD8L>wV#%UVhJkd$Ir~ z7q`NBW|~go*RR-`KZC8U`S$sDNiQnMoBnI5^mnJyU;8fUMFDuz_omVx5Tjn%I3Mz) zFE+@3b6AD?3vCHT2xQa(vZ#e6mxO5;ax!CMGlDD0|^yd5> z=kWOzOowTKyqkILzvj3&$J+%Babr1YJFE4p;j>M#ID3A z_` zAdWMmr|^6>^lWhGNp@hwpHmA)d~Of|-(l*UEUJ0zap1oa$WtKU_WjkLdva{&Jfk>7 znm<|o;j_C&uR!kd_z2?g?bXBUTSjI+H!|x}Ib$HkkK26%Hk8dY2Lmu#>#|!znm^N| zevgkI9Hh;>4M4qDG(lrTDC%lGz!oQqWpYx7#LRdG1 z6SbiV$qAoGwQLez#UA0S?H=KU^+Wh}%h)5VuVCxv8E3Z9SFlYIC{KG9` zjcKpjow7Va4rB~1l#y_|^ z>H0MwawfO#@Z(scNz}@@41@)0d$8=TuBYVnbAQLI{4}NmxJsyYuHs`pb$PO|J-m)X zcB~NJ2a({=5!p#6@=d!uCG8^nessI!JR_1c^HVv$m6rM01uw8d^aJqfMgk|KH?w7U z6%x zH~&J)UeDq}LH)?N>|n^BQ!j`#cw!TcsrwbId38|U*S?r|fmbAwmX%F7U64pLmO}o+Q_kjk^C{`9TfMNa`3T^U1CAz zd38g%LD}>ML8VYVyEXQ)VgH~@ed}BC-klsDxnA@8lUI$*x;+RzwM+k>K#;Hh@K&g9 z=vj8wAEECVc{(g(I{oi4{PX`XenR}ywEOV?0 zO0;H)X#H8o^-Q5yjQ?nCXe5bcULd+meY5 zGMoti&l>8H7JM#-N2Sw;Z!e(bhI3`F-N?*0=|Uqj@5*`Wf+H*@4bQ?&=X3GGS1(lQ zC=-d;@(6N)YPTx}3gW+<^kb{!JasmtiL)WMlaps}884A!xcXJmuD=r&4lQT0^v3|F zl!xAs-ad|IgpKNhePuGo3b6TRmf4qfEFPsjn{-opzCVQ}UbkH=m@-nb0#GnM{eMEm z>`P@w56mbIjI6E#wI(>Xh8~+#@R@@9h2&3*e5+bADaCsjf*+g8DSP8(|32Og; z(XLAY1&4baye|8eZYsI$mAc`LZlLw?;K-aOq@2Ow<&XIWZ=abHKUzP$jj@hV^&5;A zOsVS_v%#cWJRuX=05ZaUaq;)wjudb*8SQ*{Xl&_^idYDe*S|IH+TncHH(afpQlBSO z#3}XF-1vvj`@|N>Z5E#|c)>Svi_yDv!+m9SeEU*7Grf5G%vUcq>%Q!c6$jI?={uL_ zIPv@^^K&?a%!=UB!||6TXWww3a%fD(nZKrJK6y*E^oK1(5OAtJWGZ=PD-k`((O&cq4wLi}w6P zn8V5v?~O*Jt3R24S`KeNeM=h}Z+})ko$FtEB({=cmwm!rM{og*r&o@fk}Y zBeRMtrd>z=MI)jdp3Kk9F{$H^jsFi!Cyob~1kA^?gF{uX-VcSuCCC-wtDje&BCcP0 zTQ#o<5I7$P?$l|#v&Jrx@uF_{;3!nbuSC3kgSAyTwz=;(gR&(4i6qi5QF0#rHvST8 ze+XHN+{EwItKRCi=WMj0J9TGep(--=T0o8f zRhKNSDY*6c_$_cp0D;zxXn&6Sus3^n5ueb~BAeDk9LqwKK1stwZ>Nbv|YqtTlM(f4hnR5+ShrOOKo$0 z9KZns7?%ZJTC!-@GZH;4%#fo|!;7y4gSTT0a7q62F52gg#&($fL5p+dZ=d{Jz(KZwE1EcHC=T4qsO-2H7;3-|ef} z^;C?H?<_tVmeQ@5CiOZz?RB2y;`TS@c6=3v9;}BvF0$1Z>f{9Daykp?o|fbzXRQ?T zXk=T}c)XpP!_)4Yr1Q7t79Tm|-)13Cn1D?2MOj)y{1gW9(=IP6N&<&#R6*sdYc;qY zM?A*=mr8+8>R*oEC+nkW{r@hL3(XLl`svF+a2cz`Y7i0tZ{=*{ge2akAd{!a!$T!y$7my2w zg7^SlwVcnG|04dWieIhKGd3WAR_rXoG%f}bgX9_$oqqAl~Wa+i}xBs&=H6N60qm;s5W=A`zuanrI0M74<&i{jaQ}ie1vxYwA z{EeK>3U1=;XJDjU!VST>EXKT6&@sMt9Sr*ZAM;}8+wT|f-B2`~A0K+jSud$VEd43x zO;o0-gCd^#Nq&!$zl1_lFy;Bwb&3P0IB<#sr#Ntm1E)A}iUX%OaEb$`IB<#sr#Ntm z1OGpBAjjeT1J}mh(B|&28;iK1?(RryC>C~m!ab4x?QVOd-wnCF;caeTs6QI!p*7YO z>19`xTor%f(pc|}K7?bT&6@CgDNk#px4o+)-XHRmralQ7U$=B659f4tCS9a2+^f<}i5ZRcm9GrN zW0CcWxhqywx;u8b3fkvy?~Yk;2(*EUel{8CE6Q_hdtZ3PinZ(3HrA}KuUqR2s-#48 zhk7=*g;uNx*3_?C+g$4lZm6;8T#C~bt=PW3xh))x^~YOdk^ZvMw#zIy8Vj{d z+S|kZ(oNe!v5=HFsuY09;oY)RMg9|bp&RAPOk6>gt?E{y(72Iu&UU+^s5WV&!CXdJ2U`oIRGC2jttQI(JCa6R$v zSXUoCN+*hXQc0H!x&86(@X6`lwe*gYYQJ~moV&tROyQs6-!<2)-O%W3tg(Gj!WFbD z?P=rJZw#hIXT&$x`(;H!mfp8|bxlK~2O)$0+I5~dA&?QT^KRa_p{Chi8*H@Ej-Xav=DPDP zaZBe{M%@)FMQl1nBVIBO1tRe(Hq}S{seY$1bW5f1{NeTxlH<~?%ikppX^}zIlVcEU z4MoFc+qa8I^+r~#C~cFzDwkWfuA#B6zV6x@zq@W@<6KD>iZaZ{Qs@_W(4A=CK>12h zv@2Fb`?l*__o9nj1t(#I2#S`{Cn3StifBWC7TmBdcr{s}!#pwT{Zqwk*syxt+S*{< z>c)&D8`e(|u|6Y$GER|wD(0l@Q!$y@Pex3Y%VhSt-5O~F)sx*;3=Pp3o5Nv{ibEo+Q`@`x(WjWB+Ne=^B6eH5 zB5}=6L%A`sg&|bw^=M}#-VIa2Zn%pgijps#?P}{aV%gE{DicTQ>#-NC;_iu)DFi|c z-HLnNwgOY|N9<+H&5m9#vDw#^MmNpYDH~EYiQ5zE-=dYgVUA|E9z))1ed~>NJ9`sV zOUqTrS2#5TePN7NtdWQt4NFaCFPm+cDEyYTlOfEERLYtYNmU#){13TXm37KL20al* zn@X^>#H7l>7^<5{s0(GCO1P|=#V|01U%t|lV40w(f!Oj{%X!K^MO(!?I=xd7B`uRf zV`C%&V|%xIM^VOI=@d$UCnYTu^OlEvMAWEU4?MyS?~BB-S+G)Lp)Mtv%y}ZcJ2PsRSxsr_q=o02EZ!Yo^ynm}=7^zRTB;&dtwY_SrnXZB z%wln*Dwn}i2x44WBsyCMwlXTs(}Sp?kptbgym2DfLd6J7X=yvcvKgZ^HOErXOfIZ8 zpfih-(B-I)C}R{|9AU~enO0?_yN$xedQ}NZOSg4(x3z})+uj>}7$KQ8(|XaUYxRll zA&_U5Le^bOQes{0UE%&^t}d37p3Y7er>@F|7<03ZG>v|;9-SNkHiwaPZH0#sb2Vh> zX^G+EYOT?|BgB7;-dc~wO*MLNM0HBB@J0VdQB}f)HeC&GZ?LLS#yaVTwj0`#cl3vt zIrmq%nnSb^a|(tljCd5X(pRb~mL}s;456&h`N}Y8V^rZHt*!C?lm*`3)zKL%@02-A zl%|qAjWzBz%0R1fW9dN5FjYXaFrr7nwg_Xg=ob3y7KECyy}x_AVKe%fHHGf&TKdd{ zds4C>Zq_A~ae!ydOgQw>BTeO&~yx&sd0?%#JYi6`jPhY zM^qj43c+la1JnS{V$f`9Os2C)xZrK*jBFFFs^TU|phgF=kPDxig(bgd12m5YuS7dQwJGdHD^@a~-=*Btu9jHr=6# z7%y&&Ru3%HQ!mod4}6fAIgz}ZFhlB@Kthmu} zOBZ5)mR|y29arok$Gs%`;*WR9M$dL+;bs!I+$=ApprO{*a9>Ozg!}uM=c{?t=5T8W zH9!@a=0us!INLTm6|}drLF5v%nH4KmH0FX1BNj(pbt4b1YlbOV)FN&nd=|M_uGdRe zIu*<2ZSLw_HZLCSmm8-vI*<4UUt_=#!Kvt`H0po}UZ@8v@)w*B? zF;=94tDu)giX|zdsaCkx(LOSxUd$5XJQ}ccQQKwCrb*Izso|vAxT&m(UA=DOTD4q} zF_l-#7_wlYL?WYTCN0YyC&yAkwWQe`X=fUtN@;U6%L6*gjQmWT&Txu2m7O<^ymWua z%uKD^=_LZqCy|o$1qjtGraLn7T_$o~K7Vo4>6c!+o;lx&6Snnys5$sg5xgx1lv+oWCWm#D2c1fd7u@#8nzaeQP}3fBl$GLO8>v+n>C(y!cC5W> zs47F$tAbKf2b;aiz$g<7rR-{?WMbiHo0llX6BXyR;jR5H`o+O`k|kW2EKQ(n4s#!r zLD8k!QF-SwzoOEal=rXBQYUX7OHB?n3#%md41vxUi ziwa-Sc{H9eFdVNj<5H&nE~0fxW#C}Z-5hqy)XK?RRIOUE7fU)tg0Zc`xycw1VtUxp zYYPJt(%GO4S4{cz6@JVKj&CHBeOD!u`}tM!JCNo1(O@!p@7iQ?2fJC{AUv4m`JaHv z@0jd(ZAd1|_{noI;RLWF+`rtEOwQy)(q;UD{JQzwZGMjuzQON#aJu2MKr(sgtI1@HUy$Ex_a~E&^1GMcW&Gawd+77)=C_F7z4z(g z4#GqH&gJ(SbnfRD{M%%5LpbJZGxI@Z?D0=E;CigW{@7}aQeGA47!BH>n-`EoipAO} zRx0*gXn@yT&0@#uB)qJGgo;%JLcA2}ZEMDq(e&*9a8Fmv)0?3{x92i1yRgd`*SS`! zcZ=N!WOF#g(Q0GbDRP0J7*`8;_qO54)Sr1<1}5uzHZ{0E~Tx3-$S^Zu;j92 zvPqr^y9pmA+)3C}PP-8XDv&9{J%lr-L3ds<*+_V#5_u*ZpPx*gAgo%FOje)fIL8S0 z5*Ab?lP?hl2u~31AT3d3Bu|k z;H{L4aDs3@;dq$#CmiUYzQvUDM%sz6vzz)67WBdoyib%6x`YA3<%Cr?q08i%a3A3S z;g<=I5I#&eL3os~gaZi22|GE7;y)8O;ZDLm9R4^&IB*;F`v7!4ht47FB-}%I>>l(8 z;lN)a&$CGPSHu$@`6BH_Sn@Z>8UBn8j3$$vgyV!e2n)VXc?kCq9wM9|JVsdc81x7` z2@5_*JmI;7B~MTu!UKeN6P7&*-w3M+Um|QGJR$HS^b4HB?;-4z@G0s`Sapi9K<^eqmHwPU(5OAAzL^Ki)#x@t}L3dtzf6~(GOj5$-+6afjWYGJ-@Px zlSv%UsEeXFef%1M-7FA)p_|(>y|BPPL*efQ^e}NZNgS8Z*w1fHSuzRR)Fm_?SqK={ya2XbngcZtr6kDUMc(Te1Bot;IuV`mA6kXEb|q*K3nIS8M*Nc5FF+G zF~2#q^9ktr(9ybVHNw-$I!=~tP2usJyst{(PG;*Wp(k~X2|aZD-Q0WVsWwtPQ}q0{ zjT*92LuRCGqtNnKCX<(@_%>O#48E-?9Lvqi$zC>9SCO}3#;-Z^lgTAgwvQJU3v3Rs z24Kq3Pm6|C0ows=gWz!qUJ%$3U>6F&Usx>trv=!{z_tktIU3AW67QQ)S6CqNJBjx% zm^ywx@n3$I_)+5X7iLc{WjaoL8}Yc&QJ3`HH-PN}wpt*_>_{#=+nYykH1dI9b)<`j zBmM3&+W!FQJ|gK;_K%9^+THso!U=FHNgW=P#%g4V$0$>g`W zmrGzLfX%@^s8A5ISAODeik=?s;1BE3cqMP^T*Aa-8nVF zi6}q)%p6HW2R=mlQn{zCc1zz>zyN-0&AR zZ7DtkX884eO<_T;qz~}Bo3vwBGCxn%vmw2nljE(wuqChL-RgN9TK@WE@-NftIXTwW zdF_haP1N(`4u^zRQZAO)oO_sKT%W2Fwvf@2YYO)jSFguUrO>}B%Xo=B zl}kIO8B}X#Yz6Q*40#e8>nKCq)1ri=ePhsj0D9j^)nDnLNgFlY{&_j8>;<&VJSuf* zoE+b)q5nLv24Lfc{#|1K8J&PCH97&&$sL=nlq+gKr5ccFk>8S;@P~QeorcCqHiFdo z(TqA%CaIr%x}*UIAnT?Y!@cs`koQFP<_#!Yc0aTluf$VhXh*it(qoB=W8AM@Wgu1_WrC3UIs57-G{O1~n0*BV<$ z^mQL``OKI9)%5L?+6LYXo|bnOeL5|Fh8(4HA0YjMtd+@-BbVsmL%<#YrgZaWZJ!SljIfV_Di#E$@g5~;%IC+i| zm?82tKpy9AW*vZgr!HlaC<7$7Eu|l8XROv;SQ&_*R`Yw1w0)$#gZuRUeod;s`)Aaq z?rUaD>VY-z@p_U?K;uzpoS;wYu|(BzQZG)*74wGT-0v11D_&oCy!cweRRHqN$*y%p zi3cIs#Ck?>OEUQk^KuXUY&^WOaOeB-Zp@pim+N*q0KKE1OmC-0fgJnmCHKy( z?X+e_?p0Z{Uai{6MMz%WWo(a+iG5&oYwnG@l~Aa)O1Z99ik1K~c)C#+1QGGg$o+!n0n|2rFO#$zL_^Yd&@FI9+#S7I6WIEtR6gl_i1~ljpW~z_i16jw0V#W78ttB&g%q!IdM(I zd38qvur^?KzBift^i(p*lq$;-!Wv~$JV+k-itPQpRdpb1+y{kTCMJtOwtewGy1YrNG7k7f^dnBycbx> z*OSTD1%N!6bsTi$?tI4a!D;@&ojJKkf12TOm9lS-k@h9h{*U+44h?zj*=620qnfPe zFgdAy(9Dl1$4O(Bnq#QKFi6U5%{y}Hd3`4|F8dbi?C*((d3D)`@iodOco16qA4(?w z-q0G!rJtGg3&WSn+;?CL79iS+5^L4KK$QVPhN)k^H1@;EWL)Sm7TuNGB=!;I*yVtS$iIUjm=gyiD8V z`j8Hs6DIy#^K9dP|4uR@paNNo;Id=iPS#E3VmVk{l_uWK=tL|lI*}uzu4zA=@CHh zE_BuI!ewqcqIuF(c}4nr4Y+=xhK&7IKil{XzthhRCjLZ5{8|5@;~PzUiJI(8>W6Q> zrQ@4Sd{svL;Rzi-ZsJ=q;?Fkp115f7M*I)n(CLFF{^5-H3rzj|CjMAP{0dXP788Gt z$@=}RxQJhZ3Plj-o&9IH>$;io%R@1(;E9ILO=;qP;-9!!T%PwC5aINI6u zLiBx}Gn1XX+2HSY_GjrYc`Sn3@y44f&tZY>xEG@D^O#LN{KstYGuX#*a?9p9XQsw4 zZ>l`!1F3N<9X`v+rXTYdeU7P@)Lv{p!Q!wv$c#VUm^?HG3 z%4z$P&)^qds_}oW181FqtCx`6%le62*YcC{R9~hZ9kow~+g5%;|A7qpod(}6*Z68b z4!3;Pf)fuE3yaNVr~=PwDRJo^@DfZDght<+oKql-0M z?LXo6tNe11!-wD2aF(srCF`Jq?-<078u$x@g1^(?OMA#AYD@9K;BQb+>>(Oh;QI~y zLIa-#TNO>AegIoG{8T`Hs{LfhW2LB3E{`(bvk#oTGYs=>+44r+3jx1%%6+mx_{2b4q z{{w@6ECc?WfsbXt@5TTXI!7|#KQ;KH8SqyWyvQjrQ;1$4ye&rgSpW5@l?Fxl|wzJRZ8Eb#b_bi3ZJsEU9rr^1Mkd`pRX8rrGZ<$@jU}?FmOAcIi}!eJENwY zErt=lQ1B9`C4+CT8@RO-ZTseMBkj_Zfj`T@tzBsGT?22(z^^cHYsXo9*?%td0~z>h z4BXmv7XRZ4UgQ*;`r7$oAMkAB-7UaHKC5LQ^IW$YcwYuw_O}VXoe$e2UjZ)VEHmY_ za`i0*KiN)@y=_9rG2^MFGp_JYwl}6K|LIgP$2r5=pxm;0v0vdOyhRZUwiwO@^PJ z0)GD#@b4S?V@oxS`wXdP4SavK2H5`cl7YMZ8ZLH#T!|^@zXRM=80r7M&V1d?V!wTe z@k01e?brCaRwexKG zJ~{ODTUZyET0F*v!l>%hswKoa_uVGU4w zU${NXz(==exSfZeXW+#~u2vg5Wd^>#RRiwK(cuCEcmGL0j~e(&13$4*!_}TKZm%-% zorb;<7pKXNfX*hLmsmQcKVEF|T|5e!Qa`b@zs7RZa+H({#OjXd!xqx3nOpeH1K`TXt;W3%B_6)G+TY2n*#m|OMg_; zvE`TjII17jX!vD1aQ@xGO?_>Dk#kqs^8J8<(;rRye$~*KZQxF~2B>{l+{$?^$+y9@ zOR2$MX5hzHXu!7(e650;2T#yr@K1bC181AKZ3gZ(Xn@+o%I)n_(D{tPZ!vs$*6{xe zQ{evtaOuC9^ZiE+ofEw}U%H{X4oyKPX9{>=3VD|G8sVF3Cb()Z5;?pw1)aBmySfNz z{|iuz+1l}3;AiBX?bvlpn`rJ7_{#+!d)qm#F)@eKb(O+r-fHyl90Ol(;QNCbVCCvs z3paYtmaW6UN2@iyX&z^rf!m3`mFGbNuO8L-cjoGFpMmc*_DooVo&5%W+{mF3Bj;-h zZjd~|V+MchF%8^n;x3?}i?Jt9Wa#s!4SwZcXhgL)i`#FRb}TN_@Og&L3kH95lLq{a zf&bjV9n&AJ9{V=~-&d~@|Iy&TVc=s1U+pa^i^(18=xQ!)-fmQgD;l6Z8WYK4*@bw;4Ki zy;1EAA>E*Xx0rGHBZk(e4g7&74N&{Fx&6F>7a!8_r;Q%?s>L_;HRPOc8~E`jH2!ru zaE=)GiKjHc+EFhlxIyv+Ck+0+&uQRao4B_PyybBXu=c?8DfGj63eJ3|S>vlc&)hCB z@D|g*<{SDe47~Ds4N!Z;xm_~_{Y?gc%yisK4V@beyx8=w9s}QD@r|I_^51FTJC|uh zb^eIkKR58>M>SmSIpX$<3a(+^|KFSf{=^jU7l4a?%aosAD17*48gQ%0cC>O{1$?%l=?%g%{^rv7t{2K)IYVdvN)4|t z(Jcy|jw?^GDc~c(&+zmkvQyWW41K3YKije$Fz_)`A+_gHP;jCb!xfUpE0T82fHx~g z+>A8$gmKN`_>@PfX55QA?fv1f9JsGoG{0)8qrBM&Vk6Enl$(HPv-p^Da9g3g3(3t^ zhj(KNs`6>($bC%E^;1;2k7{>RP8Gavi}C)ZM_dhLxDRM?TrQqUC#5m~Cc`b6Z!vp;^e~FVd>5_$u@l4bo4Yh|k-4D-HIp<%_ zidx9HBl5zloPh*vywn+37&1-^l%ESvEJk1D4H3$vDM(Sk)w z9OZe$IQQ^4`AB*A$ewcX zU>VOSJS<$Kg`&MX)FF>lCnB}!jYJIyUiSCd=*4(!(f&LH`Ero8zYih1z`-$p7%i{_ z{)&4L?G#3ymu4^UlEs%ht^KiREY64iT5+M%k0V-x>$mURY za~nR*qs^iCfGS0I7u+ubS$cR1q0MRbU$fR%U$;6vhDxKNnwxp++*}hd6axPBPV<`J zx>del^SavF4Kb@KvVU@ul` zHXVW|Jo(r`I?y;A(MlVh3Y)}9NmoZoUBQ?(TByGlot~b!xvsGuFDtek8tYd}D`@X5 z^!2sA{*I{8R?1PI)1W%L&c*9{#rjzBxKF`jN;pQ#Nm1}5+KK0)=rc$-mE4QE%I_g>QWPBM99){+v!ZL=^%xz`m~H-R9{+nqiSl&ECbuq z2;x#m$zjUFkLH4KQQ(?^);*?ZFV)3q392bBin5hMI=ppbB!$hWD<#xz(yAmWodTK< zQIe)c^H7OeY%yW|Nj+Qp0He;z4-tL^yFC&iEN^b^z~z^wgvx_PEOHV`TQt(FO$3ay zp4PtYlckx@s8qL(^le?e&C0tN#aT<$D8JYUR@__h*BtXopr}?Gsqc-Eq)O^hk}6Vl z)1DBndc_Hx_Qp9`2&sr}+;&<+HHq$X_!6^rg|Y5bWDrv&n$nAgIaFQgr*`~sZ%L=^ z5e4awS~08bI$=Pk0bYmE@VD7*ruxy_PJAxy?askq6_pc)?N%IxxL z5|W;Gb2Fov8PCOtiDQV9i>P-s>SZ1C@Tu1ul`p|)Rj)g{^jB};m0UJAw+@749LJ6N z%^6zUGsBR6#b0L8TE%T(^JW|+dW2&k$mqWeWV#chuyIrBh;w0Dosj;j!cKc9j$`?* zOa(5D!xg?&b>*>8hk5GgjaR5)tGuhtsgNfnL=|n@d!gs35C?23#@LE!56#5&hr2_9 zXzu7;PKDw`g__A#z=|&7B5J6s2zMHLtFw)?_DQGEc9o-2*n0?vdb(Q4H3D7!Rj3R! zG5U*BK^uy%Vm+(<@BjZWBxGqQ*O0RN0Bv|~MiyonDFwZdJ6?UH(%b!lHgr>%`o^xS zTCl9&c+;)c$jeXTvR| zm%ZHf+3t_Dp?Phf_Nzuy#4()$ie*1iY8*h5M zzto1}bJ2Tc^6#OH^mZSr4P{G(_p^0V34{Tk!vNvS5=TRnK5EuEvliul>| z_B_Uk_n7_|X+?Hydb|I1!lXa0i;s!ZPB#mrZZOSBUhc zPwl65b@~P!q)6CwHvCOSdix%(G$Xy6qNHzrPaw2x`R#hJd!bG^mVxhPJpX}b z!K}=H+w^B-z-@87VM=yAxK$>9_Z$tBejMj#>uvl0`+(W9YfSyWX}QkfYZ)EL=4)}K SeA&{UcBRJ5%Se#%==>j%rG9z< literal 0 HcmV?d00001 diff --git a/lemon-src/lemon-js.c b/lemon-src/lemon-js.c new file mode 100644 index 0000000..4cdfeb7 --- /dev/null +++ b/lemon-src/lemon-js.c @@ -0,0 +1,5442 @@ +/* +** This file contains all sources (including headers) to the LEMON +** LALR(1) parser generator. The sources have been combined into a +** single file to make it easy to include LEMON in the source tree +** and Makefile of another program. +** +** The author of this program disclaims copyright. +** +** Based on SQLite distribution v3.17.0 +** Adopted for JavaScript by Artem Butusov +*/ +#include +#include +#include +#include +#include +#include + +#define ISSPACE(X) isspace((unsigned char)(X)) +#define ISDIGIT(X) isdigit((unsigned char)(X)) +#define ISALNUM(X) isalnum((unsigned char)(X)) +#define ISALPHA(X) isalpha((unsigned char)(X)) +#define ISUPPER(X) isupper((unsigned char)(X)) +#define ISLOWER(X) islower((unsigned char)(X)) + + +#ifndef __WIN32__ +# if defined(_WIN32) || defined(WIN32) +# define __WIN32__ +# endif +#endif + +#ifdef __WIN32__ +#ifdef __cplusplus +extern "C" { +#endif +extern int access(const char *path, int mode); +#ifdef __cplusplus +} +#endif +#else +#include +#endif + +/* #define PRIVATE static */ +#define PRIVATE + +#ifdef TEST +#define MAXRHS 5 /* Set low to exercise exception code */ +#else +#define MAXRHS 1000 +#endif + +static int showPrecedenceConflict = 0; +static char *msort(char*,char**,int(*)(const char*,const char*)); + +/* +** Compilers are getting increasingly pedantic about type conversions +** as C evolves ever closer to Ada.... To work around the latest problems +** we have to define the following variant of strlen(). +*/ +#define lemonStrlen(X) ((int)strlen(X)) + +/* +** Compilers are starting to complain about the use of sprintf() and strcpy(), +** saying they are unsafe. So we define our own versions of those routines too. +** +** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and +** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). +** The third is a helper routine for vsnprintf() that adds texts to the end of a +** buffer, making sure the buffer is always zero-terminated. +** +** The string formatter is a minimal subset of stdlib sprintf() supporting only +** a few simply conversions: +** +** %d +** %s +** %.*s +** +*/ +static void lemon_addtext( + char *zBuf, /* The buffer to which text is added */ + int *pnUsed, /* Slots of the buffer used so far */ + const char *zIn, /* Text to add */ + int nIn, /* Bytes of text to add. -1 to use strlen() */ + int iWidth /* Field width. Negative to left justify */ +){ + if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} + while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } + if( nIn==0 ) return; + memcpy(&zBuf[*pnUsed], zIn, nIn); + *pnUsed += nIn; + while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } + zBuf[*pnUsed] = 0; +} +static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ + int i, j, k, c; + int nUsed = 0; + const char *z; + char zTemp[50]; + str[0] = 0; + for(i=j=0; (c = zFormat[i])!=0; i++){ + if( c=='%' ){ + int iWidth = 0; + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + c = zFormat[++i]; + if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ + if( c=='-' ) i++; + while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; + if( c=='-' ) iWidth = -iWidth; + c = zFormat[i]; + } + if( c=='d' ){ + int v = va_arg(ap, int); + if( v<0 ){ + lemon_addtext(str, &nUsed, "-", 1, iWidth); + v = -v; + }else if( v==0 ){ + lemon_addtext(str, &nUsed, "0", 1, iWidth); + } + k = 0; + while( v>0 ){ + k++; + zTemp[sizeof(zTemp)-k] = (v%10) + '0'; + v /= 10; + } + lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); + }else if( c=='s' ){ + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, -1, iWidth); + }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ + i += 2; + k = va_arg(ap, int); + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, k, iWidth); + }else if( c=='%' ){ + lemon_addtext(str, &nUsed, "%", 1, 0); + }else{ + fprintf(stderr, "illegal format\n"); + exit(1); + } + j = i+1; + } + } + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + return nUsed; +} +static int lemon_sprintf(char *str, const char *format, ...){ + va_list ap; + int rc; + va_start(ap, format); + rc = lemon_vsprintf(str, format, ap); + va_end(ap); + return rc; +} +static void lemon_strcpy(char *dest, const char *src){ + while( (*(dest++) = *(src++))!=0 ){} +} +static void lemon_strcat(char *dest, const char *src){ + while( *dest ) dest++; + lemon_strcpy(dest, src); +} + + +/* a few forward declarations... */ +struct rule; +struct lemon; +struct action; + +static struct action *Action_new(void); +static struct action *Action_sort(struct action *); + +/********** From the file "build.h" ************************************/ +void FindRulePrecedences(); +void FindFirstSets(); +void FindStates(); +void FindLinks(); +void FindFollowSets(); +void FindActions(); + +/********* From the file "configlist.h" *********************************/ +void Configlist_init(void); +struct config *Configlist_add(struct rule *, int); +struct config *Configlist_addbasis(struct rule *, int); +void Configlist_closure(struct lemon *); +void Configlist_sort(void); +void Configlist_sortbasis(void); +struct config *Configlist_return(void); +struct config *Configlist_basis(void); +void Configlist_eat(struct config *); +void Configlist_reset(void); + +/********* From the file "error.h" ***************************************/ +void ErrorMsg(const char *, int,const char *, ...); + +/****** From the file "option.h" ******************************************/ +enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, + OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; +struct s_options { + enum option_type type; + const char *label; + char *arg; + const char *message; +}; +int OptInit(char**,struct s_options*,FILE*); +int OptNArgs(void); +char *OptArg(int); +void OptErr(int); +void OptPrint(void); + +/******** From the file "parse.h" *****************************************/ +void Parse(struct lemon *lemp); + +/********* From the file "plink.h" ***************************************/ +struct plink *Plink_new(void); +void Plink_add(struct plink **, struct config *); +void Plink_copy(struct plink **, struct plink *); +void Plink_delete(struct plink *); + +/********** From the file "report.h" *************************************/ +void Reprint(struct lemon *); +void ReportOutput(struct lemon *); +void ReportTable(struct lemon *, int); +void ReportHeader(struct lemon *); +void CompressTables(struct lemon *); +void ResortStates(struct lemon *); + +/********** From the file "set.h" ****************************************/ +void SetSize(int); /* All sets will be of size N */ +char *SetNew(void); /* A new set for element 0..N */ +void SetFree(char*); /* Deallocate a set */ +int SetAdd(char*,int); /* Add element to a set */ +int SetUnion(char *,char *); /* A <- A U B, thru element N */ +#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ + +/********** From the file "struct.h" *************************************/ +/* +** Principal data structures for the LEMON parser generator. +*/ + +typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; + +/* Symbols (terminals and nonterminals) of the grammar are stored +** in the following: */ +enum symbol_type { + TERMINAL, + NONTERMINAL, + MULTITERMINAL +}; +enum e_assoc { + LEFT, + RIGHT, + NONE, + UNK +}; +struct symbol { + const char *name; /* Name of the symbol */ + int index; /* Index number for this symbol */ + enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ + struct rule *rule; /* Linked list of rules of this (if an NT) */ + struct symbol *fallback; /* fallback token in case this token doesn't parse */ + int prec; /* Precedence if defined (-1 otherwise) */ + enum e_assoc assoc; /* Associativity if precedence is defined */ + char *firstset; /* First-set for all rules of this symbol */ + Boolean lambda; /* True if NT and can generate an empty string */ + int useCnt; /* Number of times used */ + char *destructor; /* Code which executes whenever this symbol is + ** popped from the stack during error processing */ + int destLineno; /* Line number for start of destructor. Set to + ** -1 for duplicate destructors. */ + char *datatype; /* The data type of information held by this + ** object. Only used if type==NONTERMINAL */ + int dtnum; /* The data type number. In the parser, the value + ** stack is a union. The .yy%d element of this + ** union is the correct data type for this object */ + /* The following fields are used by MULTITERMINALs only */ + int nsubsym; /* Number of constituent symbols in the MULTI */ + struct symbol **subsym; /* Array of constituent symbols */ +}; + +/* Each production rule in the grammar is stored in the following +** structure. */ +struct rule { + struct symbol *lhs; /* Left-hand side of the rule */ + const char *lhsalias; /* Alias for the LHS (NULL if none) */ + int lhsStart; /* True if left-hand side is the start symbol */ + int ruleline; /* Line number for the rule */ + int nrhs; /* Number of RHS symbols */ + struct symbol **rhs; /* The RHS symbols */ + const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ + int line; /* Line number at which code begins */ + const char *code; /* The code executed when this rule is reduced */ + const char *codePrefix; /* Setup code before code[] above */ + const char *codeSuffix; /* Breakdown code after code[] above */ + int noCode; /* True if this rule has no associated C code */ + int codeEmitted; /* True if the code has been emitted already */ + struct symbol *precsym; /* Precedence symbol for this rule */ + int index; /* An index number for this rule */ + int iRule; /* Rule number as used in the generated tables */ + Boolean canReduce; /* True if this rule is ever reduced */ + Boolean doesReduce; /* Reduce actions occur after optimization */ + struct rule *nextlhs; /* Next rule with the same LHS */ + struct rule *next; /* Next rule in the global list */ +}; + +/* A configuration is a production rule of the grammar together with +** a mark (dot) showing how much of that rule has been processed so far. +** Configurations also contain a follow-set which is a list of terminal +** symbols which are allowed to immediately follow the end of the rule. +** Every configuration is recorded as an instance of the following: */ +enum cfgstatus { + COMPLETE, + INCOMPLETE +}; +struct config { + struct rule *rp; /* The rule upon which the configuration is based */ + int dot; /* The parse point */ + char *fws; /* Follow-set for this configuration only */ + struct plink *fplp; /* Follow-set forward propagation links */ + struct plink *bplp; /* Follow-set backwards propagation links */ + struct state *stp; /* Pointer to state which contains this */ + enum cfgstatus status; /* used during followset and shift computations */ + struct config *next; /* Next configuration in the state */ + struct config *bp; /* The next basis configuration */ +}; + +enum e_action { + SHIFT, + ACCEPT, + REDUCE, + ERROR, + SSCONFLICT, /* A shift/shift conflict */ + SRCONFLICT, /* Was a reduce, but part of a conflict */ + RRCONFLICT, /* Was a reduce, but part of a conflict */ + SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ + RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ + NOT_USED, /* Deleted by compression */ + SHIFTREDUCE /* Shift first, then reduce */ +}; + +/* Every shift or reduce operation is stored as one of the following */ +struct action { + struct symbol *sp; /* The look-ahead symbol */ + enum e_action type; + union { + struct state *stp; /* The new state, if a shift */ + struct rule *rp; /* The rule, if a reduce */ + } x; + struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ + struct action *next; /* Next action for this state */ + struct action *collide; /* Next action with the same hash */ +}; + +/* Each state of the generated parser's finite state machine +** is encoded as an instance of the following structure. */ +struct state { + struct config *bp; /* The basis configurations for this state */ + struct config *cfp; /* All configurations in this set */ + int statenum; /* Sequential number for this state */ + struct action *ap; /* List of actions for this state */ + int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ + int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ + int iDfltReduce; /* Default action is to REDUCE by this rule */ + struct rule *pDfltReduce;/* The default REDUCE rule. */ + int autoReduce; /* True if this is an auto-reduce state */ +}; +#define NO_OFFSET (-2147483647) + +/* A followset propagation link indicates that the contents of one +** configuration followset should be propagated to another whenever +** the first changes. */ +struct plink { + struct config *cfp; /* The configuration to which linked */ + struct plink *next; /* The next propagate link */ +}; + +/* The state vector for the entire parser generator is recorded as +** follows. (LEMON uses no global variables and makes little use of +** static variables. Fields in the following structure can be thought +** of as begin global variables in the program.) */ +struct lemon { + struct state **sorted; /* Table of states sorted by state number */ + struct rule *rule; /* List of all rules */ + struct rule *startRule; /* First rule */ + int nstate; /* Number of states */ + int nxstate; /* nstate with tail degenerate states removed */ + int nrule; /* Number of rules */ + int nsymbol; /* Number of terminal and nonterminal symbols */ + int nterminal; /* Number of terminal symbols */ + struct symbol **symbols; /* Sorted array of pointers to symbols */ + int errorcnt; /* Number of errors */ + struct symbol *errsym; /* The error symbol */ + struct symbol *wildcard; /* Token that matches anything */ + char *name; /* Name of the generated parser */ + char *arg; /* Declaration of the 3th argument to parser */ + char *tokentype; /* Type of terminal symbols in the parser stack */ + char *vartype; /* The default type of non-terminal symbols */ + char *start; /* Name of the start symbol for the grammar */ + char *stacksize; /* Size of the parser stack */ + char *include; /* Code to put at the start of the C file */ + char *error; /* Code to execute when an error is seen */ + char *overflow; /* Code to execute on a stack overflow */ + char *failure; /* Code to execute on parser failure */ + char *accept; /* Code to execute when the parser excepts */ + char *extracode; /* Code appended to the generated file */ + char *tokendest; /* Code to execute to destroy token data */ + char *vardest; /* Code for the default non-terminal destructor */ + char *filename; /* Name of the input file */ + char *outname; /* Name of the current output file */ + char *tokenprefix; /* A prefix added to token names in the .h file */ + int nconflict; /* Number of parsing conflicts */ + int nactiontab; /* Number of entries in the yy_action[] table */ + int tablesize; /* Total table size of all tables in bytes */ + int basisflag; /* Print only basis configurations */ + int has_fallback; /* True if any %fallback is seen in the grammar */ + int nolinenosflag; /* True if #line statements should not be printed */ + char *argv0; /* Name of the program */ +}; + +#define MemoryCheck(X) if((X)==0){ \ + extern void memory_error(); \ + memory_error(); \ +} + +/**************** From the file "table.h" *********************************/ +/* +** All code in this file has been automatically generated +** from a specification in the file +** "table.q" +** by the associative array code building program "aagen". +** Do not edit this file! Instead, edit the specification +** file, then rerun aagen. +*/ +/* +** Code for processing tables in the LEMON parser generator. +*/ +/* Routines for handling a strings */ + +const char *Strsafe(const char *); + +void Strsafe_init(void); +int Strsafe_insert(const char *); +const char *Strsafe_find(const char *); + +/* Routines for handling symbols of the grammar */ + +struct symbol *Symbol_new(const char *); +int Symbolcmpp(const void *, const void *); +void Symbol_init(void); +int Symbol_insert(struct symbol *, const char *); +struct symbol *Symbol_find(const char *); +struct symbol *Symbol_Nth(int); +int Symbol_count(void); +struct symbol **Symbol_arrayof(void); + +/* Routines to manage the state table */ + +int Configcmp(const char *, const char *); +struct state *State_new(void); +void State_init(void); +int State_insert(struct state *, struct config *); +struct state *State_find(struct config *); +struct state **State_arrayof(/* */); + +/* Routines used for efficiency in Configlist_add */ + +void Configtable_init(void); +int Configtable_insert(struct config *); +struct config *Configtable_find(struct config *); +void Configtable_clear(int(*)(struct config *)); + +/****************** From the file "action.c" *******************************/ +/* +** Routines processing parser actions in the LEMON parser generator. +*/ + +/* Allocate a new parser action */ +static struct action *Action_new(void){ + static struct action *freelist = 0; + struct action *newaction; + + if( freelist==0 ){ + int i; + int amt = 100; + freelist = (struct action *)calloc(amt, sizeof(struct action)); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new parser action."); + exit(1); + } + for(i=0; inext; + return newaction; +} + +/* Compare two actions for sorting purposes. Return negative, zero, or +** positive if the first action is less than, equal to, or greater than +** the first +*/ +static int actioncmp( + struct action *ap1, + struct action *ap2 +){ + int rc; + rc = ap1->sp->index - ap2->sp->index; + if( rc==0 ){ + rc = (int)ap1->type - (int)ap2->type; + } + if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ + rc = ap1->x.rp->index - ap2->x.rp->index; + } + if( rc==0 ){ + rc = (int) (ap2 - ap1); + } + return rc; +} + +/* Sort parser actions */ +static struct action *Action_sort( + struct action *ap +){ + ap = (struct action *)msort((char *)ap,(char **)&ap->next, + (int(*)(const char*,const char*))actioncmp); + return ap; +} + +void Action_add( + struct action **app, + enum e_action type, + struct symbol *sp, + char *arg +){ + struct action *newaction; + newaction = Action_new(); + newaction->next = *app; + *app = newaction; + newaction->type = type; + newaction->sp = sp; + newaction->spOpt = 0; + if( type==SHIFT ){ + newaction->x.stp = (struct state *)arg; + }else{ + newaction->x.rp = (struct rule *)arg; + } +} +/********************** New code to implement the "acttab" module ***********/ +/* +** This module implements routines use to construct the yy_action[] table. +*/ + +/* +** The state of the yy_action table under construction is an instance of +** the following structure. +** +** The yy_action table maps the pair (state_number, lookahead) into an +** action_number. The table is an array of integers pairs. The state_number +** determines an initial offset into the yy_action array. The lookahead +** value is then added to this initial offset to get an index X into the +** yy_action array. If the aAction[X].lookahead equals the value of the +** of the lookahead input, then the value of the action_number output is +** aAction[X].action. If the lookaheads do not match then the +** default action for the state_number is returned. +** +** All actions associated with a single state_number are first entered +** into aLookahead[] using multiple calls to acttab_action(). Then the +** actions for that single state_number are placed into the aAction[] +** array with a single call to acttab_insert(). The acttab_insert() call +** also resets the aLookahead[] array in preparation for the next +** state number. +*/ +struct lookahead_action { + int lookahead; /* Value of the lookahead token */ + int action; /* Action to take on the given lookahead */ +}; +typedef struct acttab acttab; +struct acttab { + int nAction; /* Number of used slots in aAction[] */ + int nActionAlloc; /* Slots allocated for aAction[] */ + struct lookahead_action + *aAction, /* The yy_action[] table under construction */ + *aLookahead; /* A single new transaction set */ + int mnLookahead; /* Minimum aLookahead[].lookahead */ + int mnAction; /* Action associated with mnLookahead */ + int mxLookahead; /* Maximum aLookahead[].lookahead */ + int nLookahead; /* Used slots in aLookahead[] */ + int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ +}; + +/* Return the number of entries in the yy_action table */ +#define acttab_size(X) ((X)->nAction) + +/* The value for the N-th entry in yy_action */ +#define acttab_yyaction(X,N) ((X)->aAction[N].action) + +/* The value for the N-th entry in yy_lookahead */ +#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) + +/* Free all memory associated with the given acttab */ +void acttab_free(acttab *p){ + free( p->aAction ); + free( p->aLookahead ); + free( p ); +} + +/* Allocate a new acttab structure */ +acttab *acttab_alloc(void){ + acttab *p = (acttab *) calloc( 1, sizeof(*p) ); + if( p==0 ){ + fprintf(stderr,"Unable to allocate memory for a new acttab."); + exit(1); + } + memset(p, 0, sizeof(*p)); + return p; +} + +/* Add a new action to the current transaction set. +** +** This routine is called once for each lookahead for a particular +** state. +*/ +void acttab_action(acttab *p, int lookahead, int action){ + if( p->nLookahead>=p->nLookaheadAlloc ){ + p->nLookaheadAlloc += 25; + p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, + sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); + if( p->aLookahead==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + } + if( p->nLookahead==0 ){ + p->mxLookahead = lookahead; + p->mnLookahead = lookahead; + p->mnAction = action; + }else{ + if( p->mxLookaheadmxLookahead = lookahead; + if( p->mnLookahead>lookahead ){ + p->mnLookahead = lookahead; + p->mnAction = action; + } + } + p->aLookahead[p->nLookahead].lookahead = lookahead; + p->aLookahead[p->nLookahead].action = action; + p->nLookahead++; +} + +/* +** Add the transaction set built up with prior calls to acttab_action() +** into the current action table. Then reset the transaction set back +** to an empty set in preparation for a new round of acttab_action() calls. +** +** Return the offset into the action table of the new transaction. +*/ +int acttab_insert(acttab *p){ + int i, j, k, n; + assert( p->nLookahead>0 ); + + /* Make sure we have enough space to hold the expanded action table + ** in the worst case. The worst case occurs if the transaction set + ** must be appended to the current action table + */ + n = p->mxLookahead + 1; + if( p->nAction + n >= p->nActionAlloc ){ + int oldAlloc = p->nActionAlloc; + p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; + p->aAction = (struct lookahead_action *) realloc( p->aAction, + sizeof(p->aAction[0])*p->nActionAlloc); + if( p->aAction==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=oldAlloc; inActionAlloc; i++){ + p->aAction[i].lookahead = -1; + p->aAction[i].action = -1; + } + } + + /* Scan the existing action table looking for an offset that is a + ** duplicate of the current transaction set. Fall out of the loop + ** if and when the duplicate is found. + ** + ** i is the index in p->aAction[] where p->mnLookahead is inserted. + */ + for(i=p->nAction-1; i>=0; i--){ + if( p->aAction[i].lookahead==p->mnLookahead ){ + /* All lookaheads and actions in the aLookahead[] transaction + ** must match against the candidate aAction[i] entry. */ + if( p->aAction[i].action!=p->mnAction ) continue; + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 || k>=p->nAction ) break; + if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; + if( p->aLookahead[j].action!=p->aAction[k].action ) break; + } + if( jnLookahead ) continue; + + /* No possible lookahead value that is not in the aLookahead[] + ** transaction is allowed to match aAction[i] */ + n = 0; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead<0 ) continue; + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; + } + if( n==p->nLookahead ){ + break; /* An exact match is found at offset i */ + } + } + } + + /* If no existing offsets exactly match the current transaction, find an + ** an empty offset in the aAction[] table in which we can add the + ** aLookahead[] transaction. + */ + if( i<0 ){ + /* Look for holes in the aAction[] table that fit the current + ** aLookahead[] transaction. Leave i set to the offset of the hole. + ** If no holes are found, i is left at p->nAction, which means the + ** transaction will be appended. */ + for(i=0; inActionAlloc - p->mxLookahead; i++){ + if( p->aAction[i].lookahead<0 ){ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 ) break; + if( p->aAction[k].lookahead>=0 ) break; + } + if( jnLookahead ) continue; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; + } + if( j==p->nAction ){ + break; /* Fits in empty slots */ + } + } + } + } + /* Insert transaction set at index i. */ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + p->aAction[k] = p->aLookahead[j]; + if( k>=p->nAction ) p->nAction = k+1; + } + p->nLookahead = 0; + + /* Return the offset that is added to the lookahead in order to get the + ** index into yy_action of the action */ + return i - p->mnLookahead; +} + +/********************** From the file "build.c" *****************************/ +/* +** Routines to construction the finite state machine for the LEMON +** parser generator. +*/ + +/* Find a precedence symbol of every rule in the grammar. +** +** Those rules which have a precedence symbol coded in the input +** grammar using the "[symbol]" construct will already have the +** rp->precsym field filled. Other rules take as their precedence +** symbol the first RHS symbol with a defined precedence. If there +** are not RHS symbols with a defined precedence, the precedence +** symbol field is left blank. +*/ +void FindRulePrecedences(struct lemon *xp) +{ + struct rule *rp; + for(rp=xp->rule; rp; rp=rp->next){ + if( rp->precsym==0 ){ + int i, j; + for(i=0; inrhs && rp->precsym==0; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + if( sp->subsym[j]->prec>=0 ){ + rp->precsym = sp->subsym[j]; + break; + } + } + }else if( sp->prec>=0 ){ + rp->precsym = rp->rhs[i]; + } + } + } + } + return; +} + +/* Find all nonterminals which will generate the empty string. +** Then go back and compute the first sets of every nonterminal. +** The first set is the set of all terminal symbols which can begin +** a string generated by that nonterminal. +*/ +void FindFirstSets(struct lemon *lemp) +{ + int i, j; + struct rule *rp; + int progress; + + for(i=0; insymbol; i++){ + lemp->symbols[i]->lambda = LEMON_FALSE; + } + for(i=lemp->nterminal; insymbol; i++){ + lemp->symbols[i]->firstset = SetNew(); + } + + /* First compute all lambdas */ + do{ + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->lhs->lambda ) continue; + for(i=0; inrhs; i++){ + struct symbol *sp = rp->rhs[i]; + assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); + if( sp->lambda==LEMON_FALSE ) break; + } + if( i==rp->nrhs ){ + rp->lhs->lambda = LEMON_TRUE; + progress = 1; + } + } + }while( progress ); + + /* Now compute all first sets */ + do{ + struct symbol *s1, *s2; + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + s1 = rp->lhs; + for(i=0; inrhs; i++){ + s2 = rp->rhs[i]; + if( s2->type==TERMINAL ){ + progress += SetAdd(s1->firstset,s2->index); + break; + }else if( s2->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + progress += SetAdd(s1->firstset,s2->subsym[j]->index); + } + break; + }else if( s1==s2 ){ + if( s1->lambda==LEMON_FALSE ) break; + }else{ + progress += SetUnion(s1->firstset,s2->firstset); + if( s2->lambda==LEMON_FALSE ) break; + } + } + } + }while( progress ); + return; +} + +/* Compute all LR(0) states for the grammar. Links +** are added to between some states so that the LR(1) follow sets +** can be computed later. +*/ +PRIVATE struct state *getstate(struct lemon *); /* forward reference */ +void FindStates(struct lemon *lemp) +{ + struct symbol *sp; + struct rule *rp; + + Configlist_init(); + + /* Find the start symbol */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ){ + ErrorMsg(lemp->filename,0, +"The specified start symbol \"%s\" is not \ +in a nonterminal of the grammar. \"%s\" will be used as the start \ +symbol instead.",lemp->start,lemp->startRule->lhs->name); + lemp->errorcnt++; + sp = lemp->startRule->lhs; + } + }else{ + sp = lemp->startRule->lhs; + } + + /* Make sure the start symbol doesn't occur on the right-hand side of + ** any rule. Report an error if it does. (YACC would generate a new + ** start symbol in this case.) */ + for(rp=lemp->rule; rp; rp=rp->next){ + int i; + for(i=0; inrhs; i++){ + if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ + ErrorMsg(lemp->filename,0, +"The start symbol \"%s\" occurs on the \ +right-hand side of a rule. This will result in a parser which \ +does not work properly.",sp->name); + lemp->errorcnt++; + } + } + } + + /* The basis configuration set for the first state + ** is all rules which have the start symbol as their + ** left-hand side */ + for(rp=sp->rule; rp; rp=rp->nextlhs){ + struct config *newcfp; + rp->lhsStart = 1; + newcfp = Configlist_addbasis(rp,0); + SetAdd(newcfp->fws,0); + } + + /* Compute the first state. All other states will be + ** computed automatically during the computation of the first one. + ** The returned pointer to the first state is not used. */ + (void)getstate(lemp); + return; +} + +/* Return a pointer to a state which is described by the configuration +** list which has been built from calls to Configlist_add. +*/ +PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ +PRIVATE struct state *getstate(struct lemon *lemp) +{ + struct config *cfp, *bp; + struct state *stp; + + /* Extract the sorted basis of the new state. The basis was constructed + ** by prior calls to "Configlist_addbasis()". */ + Configlist_sortbasis(); + bp = Configlist_basis(); + + /* Get a state with the same basis */ + stp = State_find(bp); + if( stp ){ + /* A state with the same basis already exists! Copy all the follow-set + ** propagation links from the state under construction into the + ** preexisting state, then return a pointer to the preexisting state */ + struct config *x, *y; + for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ + Plink_copy(&y->bplp,x->bplp); + Plink_delete(x->fplp); + x->fplp = x->bplp = 0; + } + cfp = Configlist_return(); + Configlist_eat(cfp); + }else{ + /* This really is a new state. Construct all the details */ + Configlist_closure(lemp); /* Compute the configuration closure */ + Configlist_sort(); /* Sort the configuration closure */ + cfp = Configlist_return(); /* Get a pointer to the config list */ + stp = State_new(); /* A new state structure */ + MemoryCheck(stp); + stp->bp = bp; /* Remember the configuration basis */ + stp->cfp = cfp; /* Remember the configuration closure */ + stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ + stp->ap = 0; /* No actions, yet. */ + State_insert(stp,stp->bp); /* Add to the state table */ + buildshifts(lemp,stp); /* Recursively compute successor states */ + } + return stp; +} + +/* +** Return true if two symbols are the same. +*/ +int same_symbol(struct symbol *a, struct symbol *b) +{ + int i; + if( a==b ) return 1; + if( a->type!=MULTITERMINAL ) return 0; + if( b->type!=MULTITERMINAL ) return 0; + if( a->nsubsym!=b->nsubsym ) return 0; + for(i=0; insubsym; i++){ + if( a->subsym[i]!=b->subsym[i] ) return 0; + } + return 1; +} + +/* Construct all successor states to the given state. A "successor" +** state is any state which can be reached by a shift action. +*/ +PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) +{ + struct config *cfp; /* For looping thru the config closure of "stp" */ + struct config *bcfp; /* For the inner loop on config closure of "stp" */ + struct config *newcfg; /* */ + struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ + struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ + struct state *newstp; /* A pointer to a successor state */ + + /* Each configuration becomes complete after it contibutes to a successor + ** state. Initially, all configurations are incomplete */ + for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; + + /* Loop through all configurations of the state "stp" */ + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ + if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ + Configlist_reset(); /* Reset the new config set */ + sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ + + /* For every configuration in the state "stp" which has the symbol "sp" + ** following its dot, add the same configuration to the basis set under + ** construction but with the dot shifted one symbol to the right. */ + for(bcfp=cfp; bcfp; bcfp=bcfp->next){ + if( bcfp->status==COMPLETE ) continue; /* Already used */ + if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ + bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ + if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ + bcfp->status = COMPLETE; /* Mark this config as used */ + newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); + Plink_add(&newcfg->bplp,bcfp); + } + + /* Get a pointer to the state described by the basis configuration set + ** constructed in the preceding loop */ + newstp = getstate(lemp); + + /* The state "newstp" is reached from the state "stp" by a shift action + ** on the symbol "sp" */ + if( sp->type==MULTITERMINAL ){ + int i; + for(i=0; insubsym; i++){ + Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); + } + }else{ + Action_add(&stp->ap,SHIFT,sp,(char *)newstp); + } + } +} + +/* +** Construct the propagation links +*/ +void FindLinks(struct lemon *lemp) +{ + int i; + struct config *cfp, *other; + struct state *stp; + struct plink *plp; + + /* Housekeeping detail: + ** Add to every propagate link a pointer back to the state to + ** which the link is attached. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + cfp->stp = stp; + } + } + + /* Convert all backlinks into forward links. Only the forward + ** links are used in the follow-set computation. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + for(plp=cfp->bplp; plp; plp=plp->next){ + other = plp->cfp; + Plink_add(&other->fplp,cfp); + } + } + } +} + +/* Compute all followsets. +** +** A followset is the set of all symbols which can come immediately +** after a configuration. +*/ +void FindFollowSets(struct lemon *lemp) +{ + int i; + struct config *cfp; + struct plink *plp; + int progress; + int change; + + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + cfp->status = INCOMPLETE; + } + } + + do{ + progress = 0; + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; + for(plp=cfp->fplp; plp; plp=plp->next){ + change = SetUnion(plp->cfp->fws,cfp->fws); + if( change ){ + plp->cfp->status = INCOMPLETE; + progress = 1; + } + } + cfp->status = COMPLETE; + } + } + }while( progress ); +} + +static int resolve_conflict(struct action *,struct action *); + +/* Compute the reduce actions, and resolve conflicts. +*/ +void FindActions(struct lemon *lemp) +{ + int i,j; + struct config *cfp; + struct state *stp; + struct symbol *sp; + struct rule *rp; + + /* Add all of the reduce actions + ** A reduce action is added for each element of the followset of + ** a configuration which has its dot at the extreme right. + */ + for(i=0; instate; i++){ /* Loop over all states */ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ + if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ + for(j=0; jnterminal; j++){ + if( SetFind(cfp->fws,j) ){ + /* Add a reduce action to the state "stp" which will reduce by the + ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ + Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); + } + } + } + } + } + + /* Add the accepting token */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ) sp = lemp->startRule->lhs; + }else{ + sp = lemp->startRule->lhs; + } + /* Add to the first state (which is always the starting state of the + ** finite state machine) an action to ACCEPT if the lookahead is the + ** start nonterminal. */ + Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); + + /* Resolve conflicts */ + for(i=0; instate; i++){ + struct action *ap, *nap; + stp = lemp->sorted[i]; + /* assert( stp->ap ); */ + stp->ap = Action_sort(stp->ap); + for(ap=stp->ap; ap && ap->next; ap=ap->next){ + for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ + /* The two actions "ap" and "nap" have the same lookahead. + ** Figure out which one should be used */ + lemp->nconflict += resolve_conflict(ap,nap); + } + } + } + + /* Report an error for each rule that can never be reduced. */ + for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; + for(i=0; instate; i++){ + struct action *ap; + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; + } + } + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->canReduce ) continue; + ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); + lemp->errorcnt++; + } +} + +/* Resolve a conflict between the two given actions. If the +** conflict can't be resolved, return non-zero. +** +** NO LONGER TRUE: +** To resolve a conflict, first look to see if either action +** is on an error rule. In that case, take the action which +** is not associated with the error rule. If neither or both +** actions are associated with an error rule, then try to +** use precedence to resolve the conflict. +** +** If either action is a SHIFT, then it must be apx. This +** function won't work if apx->type==REDUCE and apy->type==SHIFT. +*/ +static int resolve_conflict( + struct action *apx, + struct action *apy +){ + struct symbol *spx, *spy; + int errcnt = 0; + assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ + if( apx->type==SHIFT && apy->type==SHIFT ){ + apy->type = SSCONFLICT; + errcnt++; + } + if( apx->type==SHIFT && apy->type==REDUCE ){ + spx = apx->sp; + spy = apy->x.rp->precsym; + if( spy==0 || spx->prec<0 || spy->prec<0 ){ + /* Not enough precedence information. */ + apy->type = SRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ /* higher precedence wins */ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = SH_RESOLVED; + }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ + apy->type = RD_RESOLVED; /* associativity */ + }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ + apx->type = SH_RESOLVED; + }else{ + assert( spx->prec==spy->prec && spx->assoc==NONE ); + apx->type = ERROR; + } + }else if( apx->type==REDUCE && apy->type==REDUCE ){ + spx = apx->x.rp->precsym; + spy = apy->x.rp->precsym; + if( spx==0 || spy==0 || spx->prec<0 || + spy->prec<0 || spx->prec==spy->prec ){ + apy->type = RRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = RD_RESOLVED; + } + }else{ + assert( + apx->type==SH_RESOLVED || + apx->type==RD_RESOLVED || + apx->type==SSCONFLICT || + apx->type==SRCONFLICT || + apx->type==RRCONFLICT || + apy->type==SH_RESOLVED || + apy->type==RD_RESOLVED || + apy->type==SSCONFLICT || + apy->type==SRCONFLICT || + apy->type==RRCONFLICT + ); + /* The REDUCE/SHIFT case cannot happen because SHIFTs come before + ** REDUCEs on the list. If we reach this point it must be because + ** the parser conflict had already been resolved. */ + } + return errcnt; +} +/********************* From the file "configlist.c" *************************/ +/* +** Routines to processing a configuration list and building a state +** in the LEMON parser generator. +*/ + +static struct config *freelist = 0; /* List of free configurations */ +static struct config *current = 0; /* Top of list of configurations */ +static struct config **currentend = 0; /* Last on list of configs */ +static struct config *basis = 0; /* Top of list of basis configs */ +static struct config **basisend = 0; /* End of list of basis configs */ + +/* Return a pointer to a new configuration */ +PRIVATE struct config *newconfig(){ + struct config *newcfg; + if( freelist==0 ){ + int i; + int amt = 3; + freelist = (struct config *)calloc( amt, sizeof(struct config) ); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new configuration."); + exit(1); + } + for(i=0; inext; + return newcfg; +} + +/* The configuration "old" is no longer used */ +PRIVATE void deleteconfig(struct config *old) +{ + old->next = freelist; + freelist = old; +} + +/* Initialized the configuration list builder */ +void Configlist_init(){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_init(); + return; +} + +/* Initialized the configuration list builder */ +void Configlist_reset(){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_clear(0); + return; +} + +/* Add another configuration to the configuration list */ +struct config *Configlist_add( + struct rule *rp, /* The rule */ + int dot /* Index into the RHS of the rule where the dot goes */ +){ + struct config *cfp, model; + + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + Configtable_insert(cfp); + } + return cfp; +} + +/* Add a basis configuration to the configuration list */ +struct config *Configlist_addbasis(struct rule *rp, int dot) +{ + struct config *cfp, model; + + assert( basisend!=0 ); + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + *basisend = cfp; + basisend = &cfp->bp; + Configtable_insert(cfp); + } + return cfp; +} + +/* Compute the closure of the configuration list */ +void Configlist_closure(struct lemon *lemp) +{ + struct config *cfp, *newcfp; + struct rule *rp, *newrp; + struct symbol *sp, *xsp; + int i, dot; + + assert( currentend!=0 ); + for(cfp=current; cfp; cfp=cfp->next){ + rp = cfp->rp; + dot = cfp->dot; + if( dot>=rp->nrhs ) continue; + sp = rp->rhs[dot]; + if( sp->type==NONTERMINAL ){ + if( sp->rule==0 && sp!=lemp->errsym ){ + ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", + sp->name); + lemp->errorcnt++; + } + for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ + newcfp = Configlist_add(newrp,0); + for(i=dot+1; inrhs; i++){ + xsp = rp->rhs[i]; + if( xsp->type==TERMINAL ){ + SetAdd(newcfp->fws,xsp->index); + break; + }else if( xsp->type==MULTITERMINAL ){ + int k; + for(k=0; knsubsym; k++){ + SetAdd(newcfp->fws, xsp->subsym[k]->index); + } + break; + }else{ + SetUnion(newcfp->fws,xsp->firstset); + if( xsp->lambda==LEMON_FALSE ) break; + } + } + if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); + } + } + } + return; +} + +/* Sort the configuration list */ +void Configlist_sort(){ + current = (struct config*)msort((char*)current,(char**)&(current->next), + Configcmp); + currentend = 0; + return; +} + +/* Sort the basis configuration list */ +void Configlist_sortbasis(){ + basis = (struct config*)msort((char*)current,(char**)&(current->bp), + Configcmp); + basisend = 0; + return; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_return(){ + struct config *old; + old = current; + current = 0; + currentend = 0; + return old; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_basis(){ + struct config *old; + old = basis; + basis = 0; + basisend = 0; + return old; +} + +/* Free all elements of the given configuration list */ +void Configlist_eat(struct config *cfp) +{ + struct config *nextcfp; + for(; cfp; cfp=nextcfp){ + nextcfp = cfp->next; + assert( cfp->fplp==0 ); + assert( cfp->bplp==0 ); + if( cfp->fws ) SetFree(cfp->fws); + deleteconfig(cfp); + } + return; +} +/***************** From the file "error.c" *********************************/ +/* +** Code for printing error message. +*/ + +void ErrorMsg(const char *filename, int lineno, const char *format, ...){ + va_list ap; + fprintf(stderr, "%s:%d: ", filename, lineno); + va_start(ap, format); + vfprintf(stderr,format,ap); + va_end(ap); + fprintf(stderr, "\n"); +} +/**************** From the file "main.c" ************************************/ +/* +** Main program file for the LEMON parser generator. +*/ + +/* Report an out-of-memory condition and abort. This function +** is used mostly by the "MemoryCheck" macro in struct.h +*/ +void memory_error(){ + fprintf(stderr,"Out of memory. Aborting...\n"); + exit(1); +} + +static int nDefine = 0; /* Number of -D options on the command line */ +static char **azDefine = 0; /* Name of the -D macros */ + +/* This routine is called with the argument to each -D command-line option. +** Add the macro defined to the azDefine array. +*/ +static void handle_D_option(char *z){ + char **paz; + nDefine++; + azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); + if( azDefine==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + paz = &azDefine[nDefine-1]; + *paz = (char *) malloc( lemonStrlen(z)+1 ); + if( *paz==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + lemon_strcpy(*paz, z); + for(z=*paz; *z && *z!='='; z++){} + *z = 0; +} + +static char *user_templatename = NULL; +static void handle_T_option(char *z){ + user_templatename = (char *) malloc( lemonStrlen(z)+1 ); + if( user_templatename==0 ){ + memory_error(); + } + lemon_strcpy(user_templatename, z); +} + +/* Merge together to lists of rules ordered by rule.iRule */ +static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ + struct rule *pFirst = 0; + struct rule **ppPrev = &pFirst; + while( pA && pB ){ + if( pA->iRuleiRule ){ + *ppPrev = pA; + ppPrev = &pA->next; + pA = pA->next; + }else{ + *ppPrev = pB; + ppPrev = &pB->next; + pB = pB->next; + } + } + if( pA ){ + *ppPrev = pA; + }else{ + *ppPrev = pB; + } + return pFirst; +} + +/* +** Sort a list of rules in order of increasing iRule value +*/ +static struct rule *Rule_sort(struct rule *rp){ + int i; + struct rule *pNext; + struct rule *x[32]; + memset(x, 0, sizeof(x)); + while( rp ){ + pNext = rp->next; + rp->next = 0; + for(i=0; iuseCnt = 0; + + /* Parse the input file */ + Parse(&lem); + if( lem.errorcnt ) exit(lem.errorcnt); + if( lem.nrule==0 ){ + fprintf(stderr,"Empty grammar.\n"); + exit(1); + } + + /* Count and index the symbols of the grammar */ + Symbol_new("{default}"); + lem.nsymbol = Symbol_count(); + lem.symbols = Symbol_arrayof(); + for(i=0; iindex = i; + qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); + for(i=0; iindex = i; + while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } + assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); + lem.nsymbol = i - 1; + for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); + lem.nterminal = i; + + /* Assign sequential rule numbers. Start with 0. Put rules that have no + ** reduce action C-code associated with them last, so that the switch() + ** statement that selects reduction actions will have a smaller jump table. + */ + for(i=0, rp=lem.rule; rp; rp=rp->next){ + rp->iRule = rp->code ? i++ : -1; + } + for(rp=lem.rule; rp; rp=rp->next){ + if( rp->iRule<0 ) rp->iRule = i++; + } + lem.startRule = lem.rule; + lem.rule = Rule_sort(lem.rule); + + /* Generate a reprint of the grammar, if requested on the command line */ + if( rpflag ){ + Reprint(&lem); + }else{ + /* Initialize the size for all follow and first sets */ + SetSize(lem.nterminal+1); + + /* Find the precedence for every production rule (that has one) */ + FindRulePrecedences(&lem); + + /* Compute the lambda-nonterminals and the first-sets for every + ** nonterminal */ + FindFirstSets(&lem); + + /* Compute all LR(0) states. Also record follow-set propagation + ** links so that the follow-set can be computed later */ + lem.nstate = 0; + FindStates(&lem); + lem.sorted = State_arrayof(); + + /* Tie up loose ends on the propagation links */ + FindLinks(&lem); + + /* Compute the follow set of every reducible configuration */ + FindFollowSets(&lem); + + /* Compute the action tables */ + FindActions(&lem); + + /* Compress the action tables */ + if( compress==0 ) CompressTables(&lem); + + /* Reorder and renumber the states so that states with fewer choices + ** occur at the end. This is an optimization that helps make the + ** generated parser tables smaller. */ + if( noResort==0 ) ResortStates(&lem); + + /* Generate a report of the parser generated. (the "y.output" file) */ + if( !quiet ) ReportOutput(&lem); + + /* Generate the source code for the parser */ + ReportTable(&lem, mhflag); + + /* Produce a header file for use by the scanner. (This step is + ** omitted if the "-m" option is used because makeheaders will + ** generate the file for us.) */ + //if( !mhflag ) ReportHeader(&lem); + } + if( statistics ){ + printf("Parser statistics:\n"); + stats_line("terminal symbols", lem.nterminal); + stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); + stats_line("total symbols", lem.nsymbol); + stats_line("rules", lem.nrule); + stats_line("states", lem.nxstate); + stats_line("conflicts", lem.nconflict); + stats_line("action table entries", lem.nactiontab); + stats_line("total table size (bytes)", lem.tablesize); + } + if( lem.nconflict > 0 ){ + fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); + } + + /* return 0 on success, 1 on failure. */ + exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; + exit(exitcode); + return (exitcode); +} +/******************** From the file "msort.c" *******************************/ +/* +** A generic merge-sort program. +** +** USAGE: +** Let "ptr" be a pointer to some structure which is at the head of +** a null-terminated list. Then to sort the list call: +** +** ptr = msort(ptr,&(ptr->next),cmpfnc); +** +** In the above, "cmpfnc" is a pointer to a function which compares +** two instances of the structure and returns an integer, as in +** strcmp. The second argument is a pointer to the pointer to the +** second element of the linked list. This address is used to compute +** the offset to the "next" field within the structure. The offset to +** the "next" field must be constant for all structures in the list. +** +** The function returns a new pointer which is the head of the list +** after sorting. +** +** ALGORITHM: +** Merge-sort. +*/ + +/* +** Return a pointer to the next structure in the linked list. +*/ +#define NEXT(A) (*(char**)(((char*)A)+offset)) + +/* +** Inputs: +** a: A sorted, null-terminated linked list. (May be null). +** b: A sorted, null-terminated linked list. (May be null). +** cmp: A pointer to the comparison function. +** offset: Offset in the structure to the "next" field. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** of both a and b. +** +** Side effects: +** The "next" pointers for elements in the lists a and b are +** changed. +*/ +static char *merge( + char *a, + char *b, + int (*cmp)(const char*,const char*), + int offset +){ + char *ptr, *head; + + if( a==0 ){ + head = b; + }else if( b==0 ){ + head = a; + }else{ + if( (*cmp)(a,b)<=0 ){ + ptr = a; + a = NEXT(a); + }else{ + ptr = b; + b = NEXT(b); + } + head = ptr; + while( a && b ){ + if( (*cmp)(a,b)<=0 ){ + NEXT(ptr) = a; + ptr = a; + a = NEXT(a); + }else{ + NEXT(ptr) = b; + ptr = b; + b = NEXT(b); + } + } + if( a ) NEXT(ptr) = a; + else NEXT(ptr) = b; + } + return head; +} + +/* +** Inputs: +** list: Pointer to a singly-linked list of structures. +** next: Pointer to pointer to the second element of the list. +** cmp: A comparison function. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** orginally in list. +** +** Side effects: +** The "next" pointers for elements in list are changed. +*/ +#define LISTSIZE 30 +static char *msort( + char *list, + char **next, + int (*cmp)(const char*,const char*) +){ + unsigned long offset; + char *ep; + char *set[LISTSIZE]; + int i; + offset = (unsigned long)((char*)next - (char*)list); + for(i=0; istate = WAITING_FOR_DECL_KEYWORD; + }else if( ISLOWER(x[0]) ){ + psp->lhs = Symbol_new(x); + psp->nrhs = 0; + psp->lhsalias = 0; + psp->state = WAITING_FOR_ARROW; + }else if( x[0]=='{' ){ + if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"There is no prior rule upon which to attach the code \ +fragment which begins on this line."); + psp->errorcnt++; + }else if( psp->prevrule->code!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Code fragment beginning on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->line = psp->tokenlineno; + psp->prevrule->code = &x[1]; + psp->prevrule->noCode = 0; + } + }else if( x[0]=='[' ){ + psp->state = PRECEDENCE_MARK_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Token \"%s\" should be either \"%%\" or a nonterminal name.", + x); + psp->errorcnt++; + } + break; + case PRECEDENCE_MARK_1: + if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "The precedence symbol must be a terminal."); + psp->errorcnt++; + }else if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "There is no prior rule to assign precedence \"[%s]\".",x); + psp->errorcnt++; + }else if( psp->prevrule->precsym!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Precedence mark on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->precsym = Symbol_new(x); + } + psp->state = PRECEDENCE_MARK_2; + break; + case PRECEDENCE_MARK_2: + if( x[0]!=']' ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"]\" on precedence mark."); + psp->errorcnt++; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + break; + case WAITING_FOR_ARROW: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else if( x[0]=='(' ){ + psp->state = LHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Expected to see a \":\" following the LHS symbol \"%s\".", + psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->lhsalias = x; + psp->state = LHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the LHS \"%s\"\n", + x,psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = LHS_ALIAS_3; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_3: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"->\" following: \"%s(%s)\".", + psp->lhs->name,psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case IN_RHS: + if( x[0]=='.' ){ + struct rule *rp; + rp = (struct rule *)calloc( sizeof(struct rule) + + sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); + if( rp==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't allocate enough memory for this rule."); + psp->errorcnt++; + psp->prevrule = 0; + }else{ + int i; + rp->ruleline = psp->tokenlineno; + rp->rhs = (struct symbol**)&rp[1]; + rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); + for(i=0; inrhs; i++){ + rp->rhs[i] = psp->rhs[i]; + rp->rhsalias[i] = psp->alias[i]; + } + rp->lhs = psp->lhs; + rp->lhsalias = psp->lhsalias; + rp->nrhs = psp->nrhs; + rp->code = 0; + rp->noCode = 1; + rp->precsym = 0; + rp->index = psp->gp->nrule++; + rp->nextlhs = rp->lhs->rule; + rp->lhs->rule = rp; + rp->next = 0; + if( psp->firstrule==0 ){ + psp->firstrule = psp->lastrule = rp; + }else{ + psp->lastrule->next = rp; + psp->lastrule = rp; + } + psp->prevrule = rp; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISALPHA(x[0]) ){ + if( psp->nrhs>=MAXRHS ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Too many symbols on RHS of rule beginning at \"%s\".", + x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + }else{ + psp->rhs[psp->nrhs] = Symbol_new(x); + psp->alias[psp->nrhs] = 0; + psp->nrhs++; + } + }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ + struct symbol *msp = psp->rhs[psp->nrhs-1]; + if( msp->type!=MULTITERMINAL ){ + struct symbol *origsp = msp; + msp = (struct symbol *) calloc(1,sizeof(*msp)); + memset(msp, 0, sizeof(*msp)); + msp->type = MULTITERMINAL; + msp->nsubsym = 1; + msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); + msp->subsym[0] = origsp; + msp->name = origsp->name; + psp->rhs[psp->nrhs-1] = msp; + } + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); + if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Cannot form a compound containing a non-terminal"); + psp->errorcnt++; + } + }else if( x[0]=='(' && psp->nrhs>0 ){ + psp->state = RHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal character on RHS of rule: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->alias[psp->nrhs-1] = x; + psp->state = RHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", + x,psp->rhs[psp->nrhs-1]->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case WAITING_FOR_DECL_KEYWORD: + if( ISALPHA(x[0]) ){ + psp->declkeyword = x; + psp->declargslot = 0; + psp->decllinenoslot = 0; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + if( strcmp(x,"name")==0 ){ + psp->declargslot = &(psp->gp->name); + psp->insertLineMacro = 0; + }else if( strcmp(x,"include")==0 ){ + psp->declargslot = &(psp->gp->include); + }else if( strcmp(x,"code")==0 ){ + psp->declargslot = &(psp->gp->extracode); + }else if( strcmp(x,"token_destructor")==0 ){ + psp->declargslot = &psp->gp->tokendest; + }else if( strcmp(x,"default_destructor")==0 ){ + psp->declargslot = &psp->gp->vardest; + }else if( strcmp(x,"token_prefix")==0 ){ + psp->declargslot = &psp->gp->tokenprefix; + psp->insertLineMacro = 0; + }else if( strcmp(x,"syntax_error")==0 ){ + psp->declargslot = &(psp->gp->error); + }else if( strcmp(x,"parse_accept")==0 ){ + psp->declargslot = &(psp->gp->accept); + }else if( strcmp(x,"parse_failure")==0 ){ + psp->declargslot = &(psp->gp->failure); + }else if( strcmp(x,"stack_overflow")==0 ){ + psp->declargslot = &(psp->gp->overflow); + }else if( strcmp(x,"extra_argument")==0 ){ + psp->declargslot = &(psp->gp->arg); + psp->insertLineMacro = 0; + }else if( strcmp(x,"token_type")==0 ){ + psp->declargslot = &(psp->gp->tokentype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"default_type")==0 ){ + psp->declargslot = &(psp->gp->vartype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"stack_size")==0 ){ + psp->declargslot = &(psp->gp->stacksize); + psp->insertLineMacro = 0; + }else if( strcmp(x,"start_symbol")==0 ){ + psp->declargslot = &(psp->gp->start); + psp->insertLineMacro = 0; + }else if( strcmp(x,"left")==0 ){ + psp->preccounter++; + psp->declassoc = LEFT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"right")==0 ){ + psp->preccounter++; + psp->declassoc = RIGHT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"nonassoc")==0 ){ + psp->preccounter++; + psp->declassoc = NONE; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"destructor")==0 ){ + psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; + }else if( strcmp(x,"type")==0 ){ + psp->state = WAITING_FOR_DATATYPE_SYMBOL; + }else if( strcmp(x,"fallback")==0 ){ + psp->fallback = 0; + psp->state = WAITING_FOR_FALLBACK_ID; + }else if( strcmp(x,"wildcard")==0 ){ + psp->state = WAITING_FOR_WILDCARD_ID; + }else if( strcmp(x,"token_class")==0 ){ + psp->state = WAITING_FOR_CLASS_ID; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Unknown declaration keyword: \"%%%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal declaration keyword: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_DESTRUCTOR_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%destructor keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_new(x); + psp->declargslot = &sp->destructor; + psp->decllinenoslot = &sp->destLineno; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + } + break; + case WAITING_FOR_DATATYPE_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%type keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_find(x); + if((sp) && (sp->datatype)){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol %%type \"%s\" already defined", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + if (!sp){ + sp = Symbol_new(x); + } + psp->declargslot = &sp->datatype; + psp->insertLineMacro = 0; + psp->state = WAITING_FOR_DECL_ARG; + } + } + break; + case WAITING_FOR_PRECEDENCE_SYMBOL: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) ){ + struct symbol *sp; + sp = Symbol_new(x); + if( sp->prec>=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol \"%s\" has already be given a precedence.",x); + psp->errorcnt++; + }else{ + sp->prec = psp->preccounter; + sp->assoc = psp->declassoc; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't assign a precedence to \"%s\".",x); + psp->errorcnt++; + } + break; + case WAITING_FOR_DECL_ARG: + if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ + const char *zOld, *zNew; + char *zBuf, *z; + int nOld, n, nLine = 0, nNew, nBack; + int addLineMacro; + char zLine[50]; + zNew = x; + if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; + nNew = lemonStrlen(zNew); + if( *psp->declargslot ){ + zOld = *psp->declargslot; + }else{ + zOld = ""; + } + nOld = lemonStrlen(zOld); + n = nOld + nNew + 20; + addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && + (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); + if( addLineMacro ){ + for(z=psp->filename, nBack=0; *z; z++){ + if( *z=='\\' ) nBack++; + } + lemon_sprintf(zLine, "// line %d ", psp->tokenlineno); + nLine = lemonStrlen(zLine); + n += nLine + lemonStrlen(psp->filename) + nBack; + } + *psp->declargslot = (char *) realloc(*psp->declargslot, n); + zBuf = *psp->declargslot + nOld; + if( addLineMacro ){ + if( nOld && zBuf[-1]!='\n' ){ + *(zBuf++) = '\n'; + } + memcpy(zBuf, zLine, nLine); + zBuf += nLine; + *(zBuf++) = '"'; + for(z=psp->filename; *z; z++){ + if( *z=='\\' ){ + *(zBuf++) = '\\'; + } + *(zBuf++) = *z; + } + *(zBuf++) = '"'; + *(zBuf++) = '\n'; + } + if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ + psp->decllinenoslot[0] = psp->tokenlineno; + } + memcpy(zBuf, zNew, nNew); + zBuf += nNew; + *zBuf = 0; + psp->state = WAITING_FOR_DECL_OR_RULE; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal argument to %%%s: %s",psp->declkeyword,x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_FALLBACK_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%fallback argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->fallback==0 ){ + psp->fallback = sp; + }else if( sp->fallback ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "More than one fallback assigned to token %s", x); + psp->errorcnt++; + }else{ + sp->fallback = psp->fallback; + psp->gp->has_fallback = 1; + } + } + break; + case WAITING_FOR_WILDCARD_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%wildcard argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->gp->wildcard==0 ){ + psp->gp->wildcard = sp; + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "Extra wildcard to token: %s", x); + psp->errorcnt++; + } + } + break; + case WAITING_FOR_CLASS_ID: + if( !ISLOWER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class must be followed by an identifier: ", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else if( Symbol_find(x) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "Symbol \"%s\" already used", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + psp->tkclass = Symbol_new(x); + psp->tkclass->type = MULTITERMINAL; + psp->state = WAITING_FOR_CLASS_TOKEN; + } + break; + case WAITING_FOR_CLASS_TOKEN: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ + struct symbol *msp = psp->tkclass; + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + if( !ISUPPER(x[0]) ) x++; + msp->subsym[msp->nsubsym-1] = Symbol_new(x); + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class argument \"%s\" should be a token", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case RESYNC_AFTER_RULE_ERROR: +/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; +** break; */ + case RESYNC_AFTER_DECL_ERROR: + if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; + if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; + break; + } +} + +/* Run the preprocessor over the input file text. The global variables +** azDefine[0] through azDefine[nDefine-1] contains the names of all defined +** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and +** comments them out. Text in between is also commented out as appropriate. +*/ +static void preprocess_input(char *z){ + int i, j, k, n; + int exclude = 0; + int start = 0; + int lineno = 1; + int start_lineno = 1; + for(i=0; z[i]; i++){ + if( z[i]=='\n' ) lineno++; + if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; + if( strncmp(&z[i],"%endif",6)==0 && ISSPACE(z[i+6]) ){ + if( exclude ){ + exclude--; + if( exclude==0 ){ + for(j=start; jfilename; + ps.errorcnt = 0; + ps.state = INITIALIZE; + + /* Begin by reading the input file */ + fp = fopen(ps.filename,"rb"); + if( fp==0 ){ + ErrorMsg(ps.filename,0,"Can't open this file for reading."); + gp->errorcnt++; + return; + } + fseek(fp,0,2); + filesize = ftell(fp); + rewind(fp); + filebuf = (char *)malloc( filesize+1 ); + if( filesize>100000000 || filebuf==0 ){ + ErrorMsg(ps.filename,0,"Input file too large."); + gp->errorcnt++; + fclose(fp); + return; + } + if( fread(filebuf,1,filesize,fp)!=filesize ){ + ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", + filesize); + free(filebuf); + gp->errorcnt++; + fclose(fp); + return; + } + fclose(fp); + filebuf[filesize] = 0; + + /* Make an initial pass through the file to handle %ifdef and %ifndef */ + preprocess_input(filebuf); + + /* Now scan the text of the input file */ + lineno = 1; + for(cp=filebuf; (c= *cp)!=0; ){ + if( c=='\n' ) lineno++; /* Keep track of the line number */ + if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ + if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ + cp+=2; + while( (c= *cp)!=0 && c!='\n' ) cp++; + continue; + } + if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ + cp+=2; + while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c ) cp++; + continue; + } + ps.tokenstart = cp; /* Mark the beginning of the token */ + ps.tokenlineno = lineno; /* Linenumber on which token begins */ + if( c=='\"' ){ /* String literals */ + cp++; + while( (c= *cp)!=0 && c!='\"' ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c==0 ){ + ErrorMsg(ps.filename,startline, +"String starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( c=='{' ){ /* A block of C code */ + int level; + cp++; + for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ + if( c=='\n' ) lineno++; + else if( c=='{' ) level++; + else if( c=='}' ) level--; + else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ + int prevc; + cp = &cp[2]; + prevc = 0; + while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ + if( c=='\n' ) lineno++; + prevc = c; + cp++; + } + }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ + cp = &cp[2]; + while( (c= *cp)!=0 && c!='\n' ) cp++; + if( c ) lineno++; + }else if( c=='\'' || c=='\"' ){ /* String a character literals */ + int startchar, prevc; + startchar = c; + prevc = 0; + for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ + if( c=='\n' ) lineno++; + if( prevc=='\\' ) prevc = 0; + else prevc = c; + } + } + } + if( c==0 ){ + ErrorMsg(ps.filename,ps.tokenlineno, +"C code starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( ISALNUM(c) ){ /* Identifiers */ + while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ + cp += 3; + nextcp = cp; + }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ + cp += 2; + while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else{ /* All other (one character) operators */ + cp++; + nextcp = cp; + } + c = *cp; + *cp = 0; /* Null terminate the token */ + parseonetoken(&ps); /* Parse the token */ + *cp = (char)c; /* Restore the buffer */ + cp = nextcp; + } + free(filebuf); /* Release the buffer after parsing */ + gp->rule = ps.firstrule; + gp->errorcnt = ps.errorcnt; +} +/*************************** From the file "plink.c" *********************/ +/* +** Routines processing configuration follow-set propagation links +** in the LEMON parser generator. +*/ +static struct plink *plink_freelist = 0; + +/* Allocate a new plink */ +struct plink *Plink_new(){ + struct plink *newlink; + + if( plink_freelist==0 ){ + int i; + int amt = 100; + plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); + if( plink_freelist==0 ){ + fprintf(stderr, + "Unable to allocate memory for a new follow-set propagation link.\n"); + exit(1); + } + for(i=0; inext; + return newlink; +} + +/* Add a plink to a plink list */ +void Plink_add(struct plink **plpp, struct config *cfp) +{ + struct plink *newlink; + newlink = Plink_new(); + newlink->next = *plpp; + *plpp = newlink; + newlink->cfp = cfp; +} + +/* Transfer every plink on the list "from" to the list "to" */ +void Plink_copy(struct plink **to, struct plink *from) +{ + struct plink *nextpl; + while( from ){ + nextpl = from->next; + from->next = *to; + *to = from; + from = nextpl; + } +} + +/* Delete every plink on the list */ +void Plink_delete(struct plink *plp) +{ + struct plink *nextpl; + + while( plp ){ + nextpl = plp->next; + plp->next = plink_freelist; + plink_freelist = plp; + plp = nextpl; + } +} +/*********************** From the file "report.c" **************************/ +/* +** Procedures for generating reports and tables in the LEMON parser generator. +*/ + +/* Generate a filename with the given suffix. Space to hold the +** name comes from malloc() and must be freed by the calling +** function. +*/ +PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) +{ + char *name; + char *cp; + + name = (char*)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 ); + if( name==0 ){ + fprintf(stderr,"Can't allocate space for a filename.\n"); + exit(1); + } + lemon_strcpy(name,lemp->filename); + cp = strrchr(name,'.'); + if( cp ) *cp = 0; + lemon_strcat(name,suffix); + return name; +} + +/* Open a file with a name based on the name of the input file, +** but with a different (specified) suffix, and return a pointer +** to the stream */ +PRIVATE FILE *file_open( + struct lemon *lemp, + const char *suffix, + const char *mode +){ + FILE *fp; + + if( lemp->outname ) free(lemp->outname); + lemp->outname = file_makename(lemp, suffix); + fp = fopen(lemp->outname,mode); + if( fp==0 && *mode=='w' ){ + fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); + lemp->errorcnt++; + return 0; + } + return fp; +} + +/* Duplicate the input file without comments and without actions +** on rules */ +void Reprint(struct lemon *lemp) +{ + struct rule *rp; + struct symbol *sp; + int i, j, maxlen, len, ncolumns, skip; + printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); + maxlen = 10; + for(i=0; insymbol; i++){ + sp = lemp->symbols[i]; + len = lemonStrlen(sp->name); + if( len>maxlen ) maxlen = len; + } + ncolumns = 76/(maxlen+5); + if( ncolumns<1 ) ncolumns = 1; + skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; + for(i=0; insymbol; j+=skip){ + sp = lemp->symbols[j]; + assert( sp->index==j ); + printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); + } + printf("\n"); + } + for(rp=lemp->rule; rp; rp=rp->next){ + printf("%s",rp->lhs->name); + /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ + printf(" ::="); + for(i=0; inrhs; i++){ + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + printf(" %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + printf("|%s", sp->subsym[j]->name); + } + }else{ + printf(" %s", sp->name); + } + /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ + } + printf("."); + if( rp->precsym ) printf(" [%s]",rp->precsym->name); + /* if( rp->code ) printf("\n %s",rp->code); */ + printf("\n"); + } +} + +/* Print a single rule. +*/ +void RulePrint(FILE *fp, struct rule *rp, int iCursor){ + struct symbol *sp; + int i, j; + fprintf(fp,"%s ::=",rp->lhs->name); + for(i=0; i<=rp->nrhs; i++){ + if( i==iCursor ) fprintf(fp," *"); + if( i==rp->nrhs ) break; + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + fprintf(fp," %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + fprintf(fp,"|%s",sp->subsym[j]->name); + } + }else{ + fprintf(fp," %s", sp->name); + } + } +} + +/* Print the rule for a configuration. +*/ +void ConfigPrint(FILE *fp, struct config *cfp){ + RulePrint(fp, cfp->rp, cfp->dot); +} + +/* #define TEST */ +#if 0 +/* Print a set */ +PRIVATE void SetPrint(out,set,lemp) +FILE *out; +char *set; +struct lemon *lemp; +{ + int i; + char *spacer; + spacer = ""; + fprintf(out,"%12s[",""); + for(i=0; interminal; i++){ + if( SetFind(set,i) ){ + fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); + spacer = " "; + } + } + fprintf(out,"]\n"); +} + +/* Print a plink chain */ +PRIVATE void PlinkPrint(out,plp,tag) +FILE *out; +struct plink *plp; +char *tag; +{ + while( plp ){ + fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); + ConfigPrint(out,plp->cfp); + fprintf(out,"\n"); + plp = plp->next; + } +} +#endif + +/* Print an action to the given file descriptor. Return FALSE if +** nothing was actually printed. +*/ +int PrintAction( + struct action *ap, /* The action to print */ + FILE *fp, /* Print the action here */ + int indent /* Indent by this amount */ +){ + int result = 1; + switch( ap->type ){ + case SHIFT: { + struct state *stp = ap->x.stp; + fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); + break; + } + case REDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case SHIFTREDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case ACCEPT: + fprintf(fp,"%*s accept",indent,ap->sp->name); + break; + case ERROR: + fprintf(fp,"%*s error",indent,ap->sp->name); + break; + case SRCONFLICT: + case RRCONFLICT: + fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.rp->iRule); + break; + case SSCONFLICT: + fprintf(fp,"%*s shift %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.stp->statenum); + break; + case SH_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s shift %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.stp->statenum); + }else{ + result = 0; + } + break; + case RD_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s reduce %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.rp->iRule); + }else{ + result = 0; + } + break; + case NOT_USED: + result = 0; + break; + } + if( result && ap->spOpt ){ + fprintf(fp," /* because %s==%s */", ap->sp->name, ap->spOpt->name); + } + return result; +} + +/* Generate the "*.out" log file */ +void ReportOutput(struct lemon *lemp) +{ + int i; + struct state *stp; + struct config *cfp; + struct action *ap; + FILE *fp; + + fp = file_open(lemp,".out","wb"); + if( fp==0 ) return; + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + fprintf(fp,"State %d:\n",stp->statenum); + if( lemp->basisflag ) cfp=stp->bp; + else cfp=stp->cfp; + while( cfp ){ + char buf[20]; + if( cfp->dot==cfp->rp->nrhs ){ + lemon_sprintf(buf,"(%d)",cfp->rp->iRule); + fprintf(fp," %5s ",buf); + }else{ + fprintf(fp," "); + } + ConfigPrint(fp,cfp); + fprintf(fp,"\n"); +#if 0 + SetPrint(fp,cfp->fws,lemp); + PlinkPrint(fp,cfp->fplp,"To "); + PlinkPrint(fp,cfp->bplp,"From"); +#endif + if( lemp->basisflag ) cfp=cfp->bp; + else cfp=cfp->next; + } + fprintf(fp,"\n"); + for(ap=stp->ap; ap; ap=ap->next){ + if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); + } + fprintf(fp,"\n"); + } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Symbols:\n"); + for(i=0; insymbol; i++){ + int j; + struct symbol *sp; + + sp = lemp->symbols[i]; + fprintf(fp, " %3d: %s", i, sp->name); + if( sp->type==NONTERMINAL ){ + fprintf(fp, ":"); + if( sp->lambda ){ + fprintf(fp, " "); + } + for(j=0; jnterminal; j++){ + if( sp->firstset && SetFind(sp->firstset, j) ){ + fprintf(fp, " %s", lemp->symbols[j]->name); + } + } + } + fprintf(fp, "\n"); + } + fclose(fp); + return; +} + +/* Search for the file "name" which is in the same directory as +** the exacutable */ +PRIVATE char *pathsearch(char *argv0, char *name, int modemask) +{ + const char *pathlist; + char *pathbufptr; + char *pathbuf; + char *path,*cp; + char c; + +#ifdef __WIN32__ + cp = strrchr(argv0,'\\'); +#else + cp = strrchr(argv0,'/'); +#endif + if( cp ){ + c = *cp; + *cp = 0; + path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); + if( path ) lemon_sprintf(path,"%s/%s",argv0,name); + *cp = c; + }else{ + pathlist = getenv("PATH"); + if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; + pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); + path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); + if( (pathbuf != 0) && (path!=0) ){ + pathbufptr = pathbuf; + lemon_strcpy(pathbuf, pathlist); + while( *pathbuf ){ + cp = strchr(pathbuf,':'); + if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; + c = *cp; + *cp = 0; + lemon_sprintf(path,"%s/%s",pathbuf,name); + *cp = c; + if( c==0 ) pathbuf[0] = 0; + else pathbuf = &cp[1]; + if( access(path,modemask)==0 ) break; + } + free(pathbufptr); + } + } + return path; +} + +/* Given an action, compute the integer value for that action +** which is to be put in the action table of the generated machine. +** Return negative if no action should be generated. +*/ +PRIVATE int compute_action(struct lemon *lemp, struct action *ap) +{ + int act; + switch( ap->type ){ + case SHIFT: act = ap->x.stp->statenum; break; + case SHIFTREDUCE: act = ap->x.rp->iRule + lemp->nstate; break; + case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break; + case ERROR: act = lemp->nstate + lemp->nrule*2; break; + case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break; + default: act = -1; break; + } + return act; +} + +#define LINESIZE 1000 +/* The next cluster of routines are for reading the template file +** and writing the results to the generated parser */ +/* The first function transfers data from "in" to "out" until +** a line is seen which begins with "%%". The line number is +** tracked. +** +** if name!=0, then any word that begin with "Parse" is changed to +** begin with *name instead. +*/ +PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) +{ + int i, iStart; + char line[LINESIZE]; + while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ + (*lineno)++; + iStart = 0; + if( name ){ + for(i=0; line[i]; i++){ + if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 + && (i==0 || !ISALPHA(line[i-1])) + ){ + if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); + fprintf(out,"%s",name); + i += 4; + iStart = i+1; + } + } + } + fprintf(out,"%s",&line[iStart]); + } +} + +/* The next function finds the template file and opens it, returning +** a pointer to the opened file. */ +PRIVATE FILE *tplt_open(struct lemon *lemp) +{ + static char templatename[] = "lempar.js"; + char buf[1000]; + FILE *in; + char *tpltname; + char *cp; + + /* first, see if user specified a template filename on the command line. */ + if (user_templatename != 0) { + if( access(user_templatename,004)==-1 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(user_templatename,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + return in; + } + + cp = strrchr(lemp->filename,'.'); + if( cp ){ + lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); + }else{ + lemon_sprintf(buf,"%s.lt",lemp->filename); + } + if( access(buf,004)==0 ){ + tpltname = buf; + }else if( access(templatename,004)==0 ){ + tpltname = templatename; + }else{ + tpltname = pathsearch(lemp->argv0,templatename,0); + } + if( tpltname==0 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(tpltname,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); + lemp->errorcnt++; + return 0; + } + return in; +} + +/* Print a #line directive line to the output file. */ +PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) +{ + fprintf(out,"// line %d \"",lineno); + while( *filename ){ + if( *filename == '\\' ) putc('\\',out); + putc(*filename,out); + filename++; + } + fprintf(out,"\"\n"); +} + +/* Print a string to the file and keep the linenumber up to date */ +PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) +{ + if( str==0 ) return; + while( *str ){ + putc(*str,out); + if( *str=='\n' ) (*lineno)++; + str++; + } + if( str[-1]!='\n' ){ + putc('\n',out); + (*lineno)++; + } + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + return; +} + +/* +** The following routine emits code for the destructor for the +** symbol sp +*/ +void emit_destructor_code( + FILE *out, + struct symbol *sp, + struct lemon *lemp, + int *lineno +){ + char *cp = 0; + + if( sp->type==TERMINAL ){ + cp = lemp->tokendest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else if( sp->destructor ){ + cp = sp->destructor; + fprintf(out,"{\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,sp->destLineno,lemp->filename); + } + }else if( lemp->vardest ){ + cp = lemp->vardest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else{ + assert( 0 ); /* Cannot happen */ + } + for(; *cp; cp++){ + if( *cp=='$' && cp[1]=='$' ){ + fprintf(out,"(yypminor->yy%d)",sp->dtnum); + cp++; + continue; + } + if( *cp=='\n' ) (*lineno)++; + fputc(*cp,out); + } + fprintf(out,"\n"); (*lineno)++; + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + fprintf(out,"}\n"); (*lineno)++; + return; +} + +/* +** Return TRUE (non-zero) if the given symbol has a destructor. +*/ +int has_destructor(struct symbol *sp, struct lemon *lemp) +{ + int ret; + if( sp->type==TERMINAL ){ + ret = lemp->tokendest!=0; + }else{ + ret = lemp->vardest!=0 || sp->destructor!=0; + } + return ret; +} + +/* +** Append text to a dynamically allocated string. If zText is 0 then +** reset the string to be empty again. Always return the complete text +** of the string (which is overwritten with each call). +** +** n bytes of zText are stored. If n==0 then all of zText up to the first +** \000 terminator is stored. zText can contain up to two instances of +** %d. The values of p1 and p2 are written into the first and second +** %d. +** +** If n==-1, then the previous character is overwritten. +*/ +PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ + static char empty[1] = { 0 }; + static char *z = 0; + static int alloced = 0; + static int used = 0; + int c; + char zInt[40]; + if( zText==0 ){ + if( used==0 && z!=0 ) z[0] = 0; + used = 0; + return z; + } + if( n<=0 ){ + if( n<0 ){ + used += n; + assert( used>=0 ); + } + n = lemonStrlen(zText); + } + if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ + alloced = n + sizeof(zInt)*2 + used + 200; + z = (char *) realloc(z, alloced); + } + if( z==0 ) return empty; + while( n-- > 0 ){ + c = *(zText++); + if( c=='%' && n>0 && zText[0]=='d' ){ + lemon_sprintf(zInt, "%d", p1); + p1 = p2; + lemon_strcpy(&z[used], zInt); + used += lemonStrlen(&z[used]); + zText++; + n--; + }else{ + z[used++] = (char)c; + } + } + z[used] = 0; + return z; +} + +/* +** Write and transform the rp->code string so that symbols are expanded. +** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. +** +** Return 1 if the expanded code requires that "yylhsminor" local variable +** to be defined. +*/ +PRIVATE int translate_code(struct lemon *lemp, struct rule *rp){ + char *cp, *xp; + int i; + int rc = 0; /* True if yylhsminor is used */ + int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ + const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ + char lhsused = 0; /* True if the LHS element has been used */ + char lhsdirect; /* True if LHS writes directly into stack */ + char used[MAXRHS]; /* True for each RHS element which is used */ + char zLhs[50]; /* Convert the LHS symbol into this string */ + char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ + + for(i=0; inrhs; i++) used[i] = 0; + lhsused = 0; + + if( rp->code==0 ){ + static char newlinestr[2] = { '\n', '\0' }; + rp->code = newlinestr; + rp->line = rp->ruleline; + rp->noCode = 1; + }else{ + rp->noCode = 0; + } + + + if( rp->nrhs==0 ){ + /* If there are no RHS symbols, then writing directly to the LHS is ok */ + lhsdirect = 1; + }else if( rp->rhsalias[0]==0 ){ + /* The left-most RHS symbol has no value. LHS direct is ok. But + ** we have to call the distructor on the RHS symbol first. */ + lhsdirect = 1; + if( has_destructor(rp->rhs[0],lemp) ){ + append_str(0,0,0,0); + append_str(" this.yy_destructor(%d, this.yystack[this.yyidx + %d].minor);\n", 0, + rp->rhs[0]->index,1-rp->nrhs); + rp->codePrefix = Strsafe(append_str(0,0,0,0)); + rp->noCode = 0; + } + }else if( rp->lhsalias==0 ){ + /* There is no LHS value symbol. */ + lhsdirect = 1; + }else if( strcmp(rp->lhsalias,rp->rhsalias[0])==0 ){ + /* The LHS symbol and the left-most RHS symbol are the same, so + ** direct writing is allowed */ + lhsdirect = 1; + lhsused = 1; + used[0] = 1; + if( rp->lhs->dtnum!=rp->rhs[0]->dtnum ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) and %s(%s) share the same label but have " + "different datatypes.", + rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); + lemp->errorcnt++; + } + }else{ + lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/", + rp->lhsalias, rp->rhsalias[0]); + zSkip = strstr(rp->code, zOvwrt); + if( zSkip!=0 ){ + /* The code contains a special comment that indicates that it is safe + ** for the LHS label to overwrite left-most RHS label. */ + lhsdirect = 1; + }else{ + lhsdirect = 0; + } + } + if( lhsdirect ){ + sprintf(zLhs, "this.yystack[this.yyidx + %d].minor",1-rp->nrhs/*,rp->lhs->dtnum*/); + }else{ + rc = 1; + sprintf(zLhs, "yylhsminor"/*,rp->lhs->dtnum*/); + } + + append_str(0,0,0,0); + + /* This const cast is wrong but harmless, if we're careful. */ + for(cp=(char *)rp->code; *cp; cp++){ + if( cp==zSkip ){ + append_str(zOvwrt,0,0,0); + cp += lemonStrlen(zOvwrt)-1; + dontUseRhs0 = 1; + continue; + } + if( ISALPHA(*cp) && (cp==rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ){ + char saved; + for(xp= &cp[1]; ISALNUM(*xp) || *xp=='_'; xp++); + saved = *xp; + *xp = 0; + if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ + append_str(zLhs,0,0,0); + cp = xp; + lhsused = 1; + }else{ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ + if( i==0 && dontUseRhs0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used after '%s'.", + rp->rhsalias[0], zOvwrt); + lemp->errorcnt++; + }else if( cp!=rp->code && cp[-1]=='@' ){ + /* If the argument is of the form @X then substituted + ** the token number of X, not the value of X */ + append_str("this.yystack[this.yyidx + %d].major",-1,i-rp->nrhs+1,0); + }else{ + struct symbol *sp = rp->rhs[i]; + int dtnum; + if( sp->type==MULTITERMINAL ){ + dtnum = sp->subsym[0]->dtnum; + }else{ + dtnum = sp->dtnum; + } + append_str("this.yystack[this.yyidx + %d].minor",0,i-rp->nrhs+1, dtnum); + } + cp = xp; + used[i] = 1; + break; + } + } + } + *xp = saved; + } + append_str(cp, 1, 0, 0); + } /* End loop */ + + /* Main code generation completed */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ) rp->code = Strsafe(cp); + append_str(0,0,0,0); + + /* Check to make sure the LHS has been used */ + if( rp->lhsalias && !lhsused ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label \"%s\" for \"%s(%s)\" is never used.", + rp->lhsalias,rp->lhs->name,rp->lhsalias); + lemp->errorcnt++; + } + + /* Generate destructor code for RHS minor values which are not referenced. + ** Generate error messages for unused labels and duplicate labels. + */ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] ){ + if( i>0 ){ + int j; + if( rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) has the same label as the LHS but is not the left-most " + "symbol on the RHS.", + rp->rhs[i]->name, rp->rhsalias); + lemp->errorcnt++; + } + for(j=0; jrhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used for multiple symbols on the RHS of a rule.", + rp->rhsalias[i]); + lemp->errorcnt++; + break; + } + } + } + if( !used[i] ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s for \"%s(%s)\" is never used.", + rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); + lemp->errorcnt++; + } + }else if( i>0 && has_destructor(rp->rhs[i],lemp) ){ + append_str(" this.yy_destructor(%d, this.yystack[this.yyidx + %d].minor);\n", 0, + rp->rhs[i]->index,i-rp->nrhs+1); + } + } + + /* If unable to write LHS values directly into the stack, write the + ** saved LHS value now. */ + if( lhsdirect==0 ){ + append_str(" this.yystack[this.yyidx + %d].minor = ", 0, 1-rp->nrhs, rp->lhs->dtnum); + append_str(zLhs, 0, 0, 0); + append_str(";\n", 0, 0, 0); + } + + /* Suffix code generation complete */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ){ + rp->codeSuffix = Strsafe(cp); + rp->noCode = 0; + } + + return rc; +} + +/* +** Generate code which executes when the rule "rp" is reduced. Write +** the code to "out". Make sure lineno stays up-to-date. +*/ +PRIVATE void emit_code( + FILE *out, + struct rule *rp, + struct lemon *lemp, + int *lineno +){ + const char *cp; + + /* Setup code prior to the #line directive */ + if( rp->codePrefix && rp->codePrefix[0] ){ + fprintf(out, "{%s", rp->codePrefix); + for(cp=rp->codePrefix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + /* Generate code to do the reduce action */ + if( rp->code ){ + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,rp->line,lemp->filename); + } + fprintf(out,"{%s",rp->code); + for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + fprintf(out,"}\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,*lineno,lemp->outname); + } + } + + /* Generate breakdown code that occurs after the #line directive */ + if( rp->codeSuffix && rp->codeSuffix[0] ){ + fprintf(out, "%s", rp->codeSuffix); + for(cp=rp->codeSuffix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + if( rp->codePrefix ){ + fprintf(out, "}\n"); (*lineno)++; + } + + return; +} + +/* +** Print the definition of the union used for the parser's data stack. +** This union contains fields for every possible data type for tokens +** and nonterminals. In the process of computing and printing this +** union, also set the ".dtnum" field of every terminal and nonterminal +** symbol. +*/ +void print_stack_union( + FILE *out, /* The output stream */ + struct lemon *lemp, /* The main info structure for this parser */ + int *plineno, /* Pointer to the line number */ + int mhflag /* True if generating makeheaders output */ +){ + int lineno = *plineno; /* The line number of the output */ + char **types; /* A hash table of datatypes */ + int arraysize; /* Size of the "types" array */ + int maxdtlength; /* Maximum length of any ".datatype" field. */ + char *stddt; /* Standardized name for a datatype */ + int i,j; /* Loop counters */ + unsigned hash; /* For hashing the name of a type */ + const char *name; /* Name of the parser */ + + /* Allocate and initialize types[] and allocate stddt[] */ + arraysize = lemp->nsymbol * 2; + types = (char**)calloc( arraysize, sizeof(char*) ); + if( types==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + for(i=0; ivartype ){ + maxdtlength = lemonStrlen(lemp->vartype); + } + for(i=0; insymbol; i++){ + int len; + struct symbol *sp = lemp->symbols[i]; + if( sp->datatype==0 ) continue; + len = lemonStrlen(sp->datatype); + if( len>maxdtlength ) maxdtlength = len; + } + stddt = (char*)malloc( maxdtlength*2 + 1 ); + if( stddt==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + + /* Build a hash table of datatypes. The ".dtnum" field of each symbol + ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is + ** used for terminal symbols. If there is no %default_type defined then + ** 0 is also used as the .dtnum value for nonterminals which do not specify + ** a datatype using the %type directive. + */ + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + char *cp; + if( sp==lemp->errsym ){ + sp->dtnum = arraysize+1; + continue; + } + if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ + sp->dtnum = 0; + continue; + } + cp = sp->datatype; + if( cp==0 ) cp = lemp->vartype; + j = 0; + while( ISSPACE(*cp) ) cp++; + while( *cp ) stddt[j++] = *cp++; + while( j>0 && ISSPACE(stddt[j-1]) ) j--; + stddt[j] = 0; + if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ + sp->dtnum = 0; + continue; + } + hash = 0; + for(j=0; stddt[j]; j++){ + hash = hash*53 + stddt[j]; + } + hash = (hash & 0x7fffffff)%arraysize; + while( types[hash] ){ + if( strcmp(types[hash],stddt)==0 ){ + sp->dtnum = hash + 1; + break; + } + hash++; + if( hash>=(unsigned)arraysize ) hash = 0; + } + if( types[hash]==0 ){ + sp->dtnum = hash + 1; + types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); + if( types[hash]==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + lemon_strcpy(types[hash],stddt); + } + } + + /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ + /* + name = lemp->name ? lemp->name : "Parse"; + lineno = *plineno; + if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } + fprintf(out,"#define %sTOKENTYPE %s\n",name, + lemp->tokentype?lemp->tokentype:"void*"); lineno++; + if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } + fprintf(out,"typedef union {\n"); lineno++; + fprintf(out," int yyinit;\n"); lineno++; + fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; + for(i=0; ierrsym->useCnt ){ + fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; + } + free(stddt); + free(types); + fprintf(out,"} YYMINORTYPE;\n"); lineno++; + *plineno = lineno; + */ +} + +/* +** Return the name of a C datatype able to represent values between +** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof +** for that type (1, 2, or 4) into *pnByte. +*/ +static const char *minimum_size_type(int lwr, int upr, int *pnByte){ + const char *zType = "int"; + int nByte = 4; + if( lwr>=0 ){ + if( upr<=255 ){ + zType = "unsigned char"; + nByte = 1; + }else if( upr<65535 ){ + zType = "unsigned short int"; + nByte = 2; + }else{ + zType = "unsigned int"; + nByte = 4; + } + }else if( lwr>=-127 && upr<=127 ){ + zType = "signed char"; + nByte = 1; + }else if( lwr>=-32767 && upr<32767 ){ + zType = "short"; + nByte = 2; + } + if( pnByte ) *pnByte = nByte; + return zType; +} + +/* +** Each state contains a set of token transaction and a set of +** nonterminal transactions. Each of these sets makes an instance +** of the following structure. An array of these structures is used +** to order the creation of entries in the yy_action[] table. +*/ +struct axset { + struct state *stp; /* A pointer to a state */ + int isTkn; /* True to use tokens. False for non-terminals */ + int nAction; /* Number of actions */ + int iOrder; /* Original order of action sets */ +}; + +/* +** Compare to axset structures for sorting purposes +*/ +static int axset_compare(const void *a, const void *b){ + struct axset *p1 = (struct axset*)a; + struct axset *p2 = (struct axset*)b; + int c; + c = p2->nAction - p1->nAction; + if( c==0 ){ + c = p1->iOrder - p2->iOrder; + } + assert( c!=0 || p1==p2 ); + return c; +} + +/* +** Write text on "out" that describes the rule "rp". +*/ +static void writeRuleText(FILE *out, struct rule *rp){ + int j; + fprintf(out,"%s ::=", rp->lhs->name); + for(j=0; jnrhs; j++){ + struct symbol *sp = rp->rhs[j]; + if( sp->type!=MULTITERMINAL ){ + fprintf(out," %s", sp->name); + }else{ + int k; + fprintf(out," %s", sp->subsym[0]->name); + for(k=1; knsubsym; k++){ + fprintf(out,"|%s",sp->subsym[k]->name); + } + } + } +} + + +/* Generate C source code for the parser */ +void ReportTable( + struct lemon *lemp, + int mhflag /* Output in makeheaders format if true */ +){ + FILE *out, *in; + char line[LINESIZE]; + int lineno; + struct state *stp; + struct action *ap; + struct rule *rp; + struct acttab *pActtab; + int i, j, n, sz; + int szActionType; /* sizeof(YYACTIONTYPE) */ + int szCodeType; /* sizeof(YYCODETYPE) */ + const char *name; + int mnTknOfst, mxTknOfst; + int mnNtOfst, mxNtOfst; + struct axset *ax; + + in = tplt_open(lemp); + if( in==0 ) return; + out = file_open(lemp,".js","wb"); + if( out==0 ){ + fclose(in); + return; + } + lineno = 1; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the include code, if any */ + tplt_print(out,lemp,lemp->include,&lineno); + /* + if( mhflag ){ + char *incName = file_makename(lemp, ".h"); + fprintf(out,"#include \"%s\"\n", incName); lineno++; + free(incName); + } + */ + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate #defines for all tokens */ + //if( mhflag ){ + const char *prefix; + //fprintf(out,"#if INTERFACE\n"); lineno++; + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + for(i=1; interminal; i++){ + fprintf(out,"this.%s%-30s = %2d;\n",prefix,lemp->symbols[i]->name,i); + lineno++; + } + //fprintf(out,"#endif\n"); lineno++; + //} + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the defines */ + //fprintf(out,"#define YYCODETYPE %s\n", + // minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++; + fprintf(out,"this.YYNOCODE = %d;\n",lemp->nsymbol+1); lineno++; + //fprintf(out,"#define YYACTIONTYPE %s\n", + // minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++; + if( lemp->wildcard ){ + fprintf(out,"this.YYWILDCARD = %d;\n", + lemp->wildcard->index); lineno++; + } + print_stack_union(out,lemp,&lineno,mhflag); + //fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; + if( lemp->stacksize ){ + fprintf(out,"this.YYSTACKDEPTH = %s;\n",lemp->stacksize); lineno++; + }else{ + fprintf(out,"this.YYSTACKDEPTH = 100;\n"); lineno++; + } + //fprintf(out, "#endif\n"); lineno++; + /* + if( mhflag ){ + fprintf(out,"#if INTERFACE\n"); lineno++; + } + name = lemp->name ? lemp->name : "Parse"; + if( lemp->arg && lemp->arg[0] ){ + i = lemonStrlen(lemp->arg); + while( i>=1 && ISSPACE(lemp->arg[i-1]) ) i--; + while( i>=1 && (ISALNUM(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; + fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", + name,lemp->arg,&lemp->arg[i]); lineno++; + fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", + name,&lemp->arg[i],&lemp->arg[i]); lineno++; + }else{ + fprintf(out,"#define %sARG_SDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PDECL\n",name); lineno++; + fprintf(out,"#define %sARG_FETCH\n",name); lineno++; + fprintf(out,"#define %sARG_STORE\n",name); lineno++; + } + if( mhflag ){ + fprintf(out,"#endif\n"); lineno++; + } + */ + if( lemp->errsym->useCnt ){ + fprintf(out,"this.YYERRORSYMBOL = %d;\n",lemp->errsym->index); lineno++; + // fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + } + fprintf(out,"this.YYFALLBACK = %s;\n", lemp->has_fallback ? "true" : "false"); lineno++; + + /* Compute the action table, but do not output it yet. The action + ** table must be computed before generating the YYNSTATE macro because + ** we need to know how many states can be eliminated. + */ + ax = (struct axset *) calloc(lemp->nxstate*2, sizeof(ax[0])); + if( ax==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + ax[i*2].stp = stp; + ax[i*2].isTkn = 1; + ax[i*2].nAction = stp->nTknAct; + ax[i*2+1].stp = stp; + ax[i*2+1].isTkn = 0; + ax[i*2+1].nAction = stp->nNtAct; + } + mxTknOfst = mnTknOfst = 0; + mxNtOfst = mnNtOfst = 0; + /* In an effort to minimize the action table size, use the heuristic + ** of placing the largest action sets first */ + for(i=0; inxstate*2; i++) ax[i].iOrder = i; + qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); + pActtab = acttab_alloc(); + for(i=0; inxstate*2 && ax[i].nAction>0; i++){ + stp = ax[i].stp; + if( ax[i].isTkn ){ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->index>=lemp->nterminal ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iTknOfst = acttab_insert(pActtab); + if( stp->iTknOfstiTknOfst; + if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; + }else{ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->indexnterminal ) continue; + if( ap->sp->index==lemp->nsymbol ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iNtOfst = acttab_insert(pActtab); + if( stp->iNtOfstiNtOfst; + if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; + } +#if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ + { int jj, nn; + for(jj=nn=0; jjnAction; jj++){ + if( pActtab->aAction[jj].action<0 ) nn++; + } + printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", + i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", + ax[i].nAction, pActtab->nAction, nn); + } +#endif + } + free(ax); + + /* Mark rules that are actually used for reduce actions after all + ** optimizations have been applied + */ + for(rp=lemp->rule; rp; rp=rp->next) rp->doesReduce = LEMON_FALSE; + for(i=0; inxstate; i++){ + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE || ap->type==SHIFTREDUCE ){ + ap->x.rp->doesReduce = i; + } + } + } + + /* Finish rendering the constants now that the action table has + ** been computed */ + fprintf(out,"this.YYNSTATE = %d;\n",lemp->nxstate); lineno++; + fprintf(out,"this.YYNRULE = %d;\n",lemp->nrule); lineno++; + fprintf(out,"this.YY_MAX_SHIFT = %d;\n",lemp->nxstate-1); lineno++; + fprintf(out,"this.YY_MIN_SHIFTREDUCE = %d;\n",lemp->nstate); lineno++; + i = lemp->nstate + lemp->nrule; + fprintf(out,"this.YY_MAX_SHIFTREDUCE = %d;\n", i-1); lineno++; + fprintf(out,"this.YY_MIN_REDUCE = %d;\n", i); lineno++; + i = lemp->nstate + lemp->nrule*2; + fprintf(out,"this.YY_MAX_REDUCE = %d;\n", i-1); lineno++; + fprintf(out,"this.YY_ERROR_ACTION = %d;\n", i); lineno++; + fprintf(out,"this.YY_ACCEPT_ACTION = %d;\n", i+1); lineno++; + fprintf(out,"this.YY_NO_ACTION = %d;\n", i+2); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Now output the action table and its associates: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + */ + + /* Output the yy_action table */ + lemp->nactiontab = n = acttab_size(pActtab); + lemp->tablesize += n*szActionType; + fprintf(out,"this.yy_action = [\n"); lineno++; + for(i=j=0; instate + lemp->nrule + 2; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", action); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "];\n"); lineno++; + + /* Output the yy_lookahead table */ + lemp->tablesize += n*szCodeType; + fprintf(out,"this.yy_lookahead = [\n"); lineno++; + for(i=j=0; insymbol; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", la); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "];\n"); lineno++; + + /* Output the yy_shift_ofst[] table */ + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; + fprintf(out, "this.YY_SHIFT_USE_DFLT = %d;\n", lemp->nactiontab); lineno++; + fprintf(out, "this.YY_SHIFT_COUNT = %d;\n", n-1); lineno++; + fprintf(out, "this.YY_SHIFT_MIN = %d;\n", mnTknOfst); lineno++; + fprintf(out, "this.YY_SHIFT_MAX = %d;\n", mxTknOfst); lineno++; + fprintf(out, "this.yy_shift_ofst = [\n"/*, + minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)*/); + lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iTknOfst; + if( ofst==NO_OFFSET ) ofst = lemp->nactiontab; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "];\n"); lineno++; + + /* Output the yy_reduce_ofst[] table */ + fprintf(out, "this.YY_REDUCE_USE_DFLT = %d;\n", mnNtOfst-1); lineno++; + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; + fprintf(out, "this.YY_REDUCE_COUNT = %d;\n", n-1); lineno++; + fprintf(out, "this.YY_REDUCE_MIN = %d;\n", mnNtOfst); lineno++; + fprintf(out, "this.YY_REDUCE_MAX = %d;\n", mxNtOfst); lineno++; + fprintf(out, "this.yy_reduce_ofst = [\n"/*, + minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)*/); lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iNtOfst; + if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "];\n"); lineno++; + + /* Output the default action table */ + fprintf(out, "this.yy_default = [\n"); lineno++; + n = lemp->nxstate; + lemp->tablesize += n*szActionType; + for(i=j=0; isorted[i]; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "];\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of fallback tokens. + */ + if( lemp->has_fallback ){ + int mx = lemp->nterminal - 1; + while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } + lemp->tablesize += (mx+1)*szCodeType; + for(i=0; i<=mx; i++){ + struct symbol *p = lemp->symbols[i]; + if( p->fallback==0 ){ + fprintf(out, " 0, /* %10s => nothing */\n", p->name); + }else{ + fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, + p->name, p->fallback->name); + } + lineno++; + } + } + tplt_xfer(lemp->name, in, out, &lineno); + + /* Generate a table containing the symbolic name of every symbol + */ + for(i=0; insymbol; i++){ + lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); + fprintf(out," %-15s",line); + if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + } + if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate a table containing a text string that describes every + ** rule in the rule set of the grammar. This information is used + ** when tracing REDUCE actions. + */ + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + assert( rp->iRule==i ); + fprintf(out," /* %3d */ \"", i); + writeRuleText(out, rp); + fprintf(out,"\",\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes every time a symbol is popped from + ** the stack while processing errors or while destroying the parser. + ** (In other words, generate the %destructor actions) + */ + if( lemp->tokendest ){ + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type!=TERMINAL ) continue; + if( once ){ + fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + } + for(i=0; insymbol && lemp->symbols[i]->type!=TERMINAL; i++); + if( insymbol ){ + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + } + if( lemp->vardest ){ + struct symbol *dflt_sp = 0; + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || + sp->index<=0 || sp->destructor!=0 ) continue; + if( once ){ + fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + dflt_sp = sp; + } + if( dflt_sp!=0 ){ + emit_destructor_code(out,dflt_sp,lemp,&lineno); + } + fprintf(out," break;\n"); lineno++; + } + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; + if( sp->destLineno<0 ) continue; /* Already emitted */ + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + + /* Combine duplicate destructors into a single case */ + for(j=i+1; jnsymbol; j++){ + struct symbol *sp2 = lemp->symbols[j]; + if( sp2 && sp2->type!=TERMINAL && sp2->destructor + && sp2->dtnum==sp->dtnum + && strcmp(sp->destructor,sp2->destructor)==0 ){ + fprintf(out," case %d: /* %s */\n", + sp2->index, sp2->name); lineno++; + sp2->destLineno = -1; /* Avoid emitting this destructor again */ + } + } + + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes whenever the parser stack overflows */ + tplt_print(out,lemp,lemp->overflow,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of rule information + ** + ** Note: This code depends on the fact that rules are number + ** sequentually beginning with 0. + */ + for(rp=lemp->rule; rp; rp=rp->next){ + fprintf(out," { lhs: %d, nrhs: %d },\n",rp->lhs->index,rp->nrhs); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which execution during each REDUCE action */ + i = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + i += translate_code(lemp, rp); + } + //if( i ){ + // fprintf(out," var yylhsminor;\n"); lineno++; + //} + /* First output rules other than the default: rule */ + for(rp=lemp->rule; rp; rp=rp->next){ + struct rule *rp2; /* Other rules with the same action */ + if( rp->codeEmitted ) continue; + if( rp->noCode ){ + /* No C code actions, so this will be part of the "default:" rule */ + continue; + } + fprintf(out," case %d: /* ", rp->iRule); + writeRuleText(out, rp); + fprintf(out, " */\n"); lineno++; + for(rp2=rp->next; rp2; rp2=rp2->next){ + if( rp2->code==rp->code && rp2->codePrefix==rp->codePrefix + && rp2->codeSuffix==rp->codeSuffix ){ + fprintf(out," case %d: /* ", rp2->iRule); + writeRuleText(out, rp2); + fprintf(out," */ this.yytestcase(yyruleno==%d);\n", rp2->iRule); lineno++; + rp2->codeEmitted = 1; + } + } + emit_code(out,rp,lemp,&lineno); + fprintf(out," break;\n"); lineno++; + rp->codeEmitted = 1; + } + /* Finally, output the default: rule. We choose as the default: all + ** empty actions. */ + fprintf(out," default:\n"); lineno++; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->codeEmitted ) continue; + assert( rp->noCode ); + fprintf(out," /* (%d) ", rp->iRule); + writeRuleText(out, rp); + if( rp->doesReduce ){ + fprintf(out, " */ this.yytestcase(yyruleno==%d);\n", rp->iRule); lineno++; + }else{ + fprintf(out, " (OPTIMIZED OUT) */ assert(yyruleno!=%d);\n", + rp->iRule); lineno++; + } + } + fprintf(out," break;\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes if a parse fails */ + tplt_print(out,lemp,lemp->failure,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when a syntax error occurs */ + tplt_print(out,lemp,lemp->error,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when the parser accepts its input */ + tplt_print(out,lemp,lemp->accept,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Append any addition code the user desires */ + tplt_print(out,lemp,lemp->extracode,&lineno); + + fclose(in); + fclose(out); + return; +} + +/* Generate a header file for the parser */ +void ReportHeader(struct lemon *lemp) +{ + FILE *out, *in; + const char *prefix; + char line[LINESIZE]; + char pattern[LINESIZE]; + int i; + + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + in = file_open(lemp,".h","rb"); + if( in ){ + int nextChar; + for(i=1; interminal && fgets(line,LINESIZE,in); i++){ + lemon_sprintf(pattern,"#define %s%-30s %3d\n", + prefix,lemp->symbols[i]->name,i); + if( strcmp(line,pattern) ) break; + } + nextChar = fgetc(in); + fclose(in); + if( i==lemp->nterminal && nextChar==EOF ){ + /* No change in the file. Don't rewrite it. */ + return; + } + } + out = file_open(lemp,".h","wb"); + if( out ){ + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); + } + fclose(out); + } + return; +} + +/* Reduce the size of the action tables, if possible, by making use +** of defaults. +** +** In this version, we take the most frequent REDUCE action and make +** it the default. Except, there is no default if the wildcard token +** is a possible look-ahead. +*/ +void CompressTables(struct lemon *lemp) +{ + struct state *stp; + struct action *ap, *ap2, *nextap; + struct rule *rp, *rp2, *rbest; + int nbest, n; + int i; + int usesWildcard; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + nbest = 0; + rbest = 0; + usesWildcard = 0; + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ + usesWildcard = 1; + } + if( ap->type!=REDUCE ) continue; + rp = ap->x.rp; + if( rp->lhsStart ) continue; + if( rp==rbest ) continue; + n = 1; + for(ap2=ap->next; ap2; ap2=ap2->next){ + if( ap2->type!=REDUCE ) continue; + rp2 = ap2->x.rp; + if( rp2==rbest ) continue; + if( rp2==rp ) n++; + } + if( n>nbest ){ + nbest = n; + rbest = rp; + } + } + + /* Do not make a default if the number of rules to default + ** is not at least 1 or if the wildcard token is a possible + ** lookahead. + */ + if( nbest<1 || usesWildcard ) continue; + + + /* Combine matching REDUCE actions into a single default */ + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) break; + } + assert( ap ); + ap->sp = Symbol_new("{default}"); + for(ap=ap->next; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; + } + stp->ap = Action_sort(stp->ap); + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT ) break; + if( ap->type==REDUCE && ap->x.rp!=rbest ) break; + } + if( ap==0 ){ + stp->autoReduce = 1; + stp->pDfltReduce = rbest; + } + } + + /* Make a second pass over all states and actions. Convert + ** every action that is a SHIFT to an autoReduce state into + ** a SHIFTREDUCE action. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=ap->next){ + struct state *pNextState; + if( ap->type!=SHIFT ) continue; + pNextState = ap->x.stp; + if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ + ap->type = SHIFTREDUCE; + ap->x.rp = pNextState->pDfltReduce; + } + } + } + + /* If a SHIFTREDUCE action specifies a rule that has a single RHS term + ** (meaning that the SHIFTREDUCE will land back in the state where it + ** started) and if there is no C-code associated with the reduce action, + ** then we can go ahead and convert the action to be the same as the + ** action for the RHS of the rule. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=nextap){ + nextap = ap->next; + if( ap->type!=SHIFTREDUCE ) continue; + rp = ap->x.rp; + if( rp->noCode==0 ) continue; + if( rp->nrhs!=1 ) continue; +#if 1 + /* Only apply this optimization to non-terminals. It would be OK to + ** apply it to terminal symbols too, but that makes the parser tables + ** larger. */ + if( ap->sp->indexnterminal ) continue; +#endif + /* If we reach this point, it means the optimization can be applied */ + nextap = ap; + for(ap2=stp->ap; ap2 && (ap2==ap || ap2->sp!=rp->lhs); ap2=ap2->next){} + assert( ap2!=0 ); + ap->spOpt = ap2->sp; + ap->type = ap2->type; + ap->x = ap2->x; + } + } +} + + +/* +** Compare two states for sorting purposes. The smaller state is the +** one with the most non-terminal actions. If they have the same number +** of non-terminal actions, then the smaller is the one with the most +** token actions. +*/ +static int stateResortCompare(const void *a, const void *b){ + const struct state *pA = *(const struct state**)a; + const struct state *pB = *(const struct state**)b; + int n; + + n = pB->nNtAct - pA->nNtAct; + if( n==0 ){ + n = pB->nTknAct - pA->nTknAct; + if( n==0 ){ + n = pB->statenum - pA->statenum; + } + } + assert( n!=0 ); + return n; +} + + +/* +** Renumber and resort states so that states with fewer choices +** occur at the end. Except, keep state 0 as the first state. +*/ +void ResortStates(struct lemon *lemp) +{ + int i; + struct state *stp; + struct action *ap; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + stp->nTknAct = stp->nNtAct = 0; + stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */ + stp->iTknOfst = NO_OFFSET; + stp->iNtOfst = NO_OFFSET; + for(ap=stp->ap; ap; ap=ap->next){ + int iAction = compute_action(lemp,ap); + if( iAction>=0 ){ + if( ap->sp->indexnterminal ){ + stp->nTknAct++; + }else if( ap->sp->indexnsymbol ){ + stp->nNtAct++; + }else{ + assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); + stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule; + } + } + } + } + qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), + stateResortCompare); + for(i=0; instate; i++){ + lemp->sorted[i]->statenum = i; + } + lemp->nxstate = lemp->nstate; + while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ + lemp->nxstate--; + } +} + + +/***************** From the file "set.c" ************************************/ +/* +** Set manipulation routines for the LEMON parser generator. +*/ + +static int size = 0; + +/* Set the set size */ +void SetSize(int n) +{ + size = n+1; +} + +/* Allocate a new set */ +char *SetNew(){ + char *s; + s = (char*)calloc( size, 1); + if( s==0 ){ + extern void memory_error(); + memory_error(); + } + return s; +} + +/* Deallocate a set */ +void SetFree(char *s) +{ + free(s); +} + +/* Add a new element to the set. Return TRUE if the element was added +** and FALSE if it was already there. */ +int SetAdd(char *s, int e) +{ + int rv; + assert( e>=0 && esize = 1024; + x1a->count = 0; + x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); + if( x1a->tbl==0 ){ + free(x1a); + x1a = 0; + }else{ + int i; + x1a->ht = (x1node**)&(x1a->tbl[1024]); + for(i=0; i<1024; i++) x1a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Strsafe_insert(const char *data) +{ + x1node *np; + unsigned h; + unsigned ph; + + if( x1a==0 ) return 0; + ph = strhash(data); + h = ph & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x1a->count>=x1a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x1 array; + array.size = arrSize = x1a->size*2; + array.count = x1a->count; + array.tbl = (x1node*)calloc(arrSize, sizeof(x1node) + sizeof(x1node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x1node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x1node *oldnp, *newnp; + oldnp = &(x1a->tbl[i]); + h = strhash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x1a->tbl); + *x1a = array; + } + /* Insert the new data */ + h = ph & (x1a->size-1); + np = &(x1a->tbl[x1a->count++]); + np->data = data; + if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); + np->next = x1a->ht[h]; + x1a->ht[h] = np; + np->from = &(x1a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +const char *Strsafe_find(const char *key) +{ + unsigned h; + x1node *np; + + if( x1a==0 ) return 0; + h = strhash(key) & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return a pointer to the (terminal or nonterminal) symbol "x". +** Create a new symbol if this is the first time "x" has been seen. +*/ +struct symbol *Symbol_new(const char *x) +{ + struct symbol *sp; + + sp = Symbol_find(x); + if( sp==0 ){ + sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); + MemoryCheck(sp); + sp->name = Strsafe(x); + sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; + sp->rule = 0; + sp->fallback = 0; + sp->prec = -1; + sp->assoc = UNK; + sp->firstset = 0; + sp->lambda = LEMON_FALSE; + sp->destructor = 0; + sp->destLineno = 0; + sp->datatype = 0; + sp->useCnt = 0; + Symbol_insert(sp,sp->name); + } + sp->useCnt++; + return sp; +} + +/* Compare two symbols for sorting purposes. Return negative, +** zero, or positive if a is less then, equal to, or greater +** than b. +** +** Symbols that begin with upper case letters (terminals or tokens) +** must sort before symbols that begin with lower case letters +** (non-terminals). And MULTITERMINAL symbols (created using the +** %token_class directive) must sort at the very end. Other than +** that, the order does not matter. +** +** We find experimentally that leaving the symbols in their original +** order (the order they appeared in the grammar file) gives the +** smallest parser tables in SQLite. +*/ +int Symbolcmpp(const void *_a, const void *_b) +{ + const struct symbol *a = *(const struct symbol **) _a; + const struct symbol *b = *(const struct symbol **) _b; + int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; + int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; + return i1==i2 ? a->index - b->index : i1 - i2; +} + +/* There is one instance of the following structure for each +** associative array of type "x2". +*/ +struct s_x2 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x2node *tbl; /* The data stored here */ + struct s_x2node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x2". +*/ +typedef struct s_x2node { + struct symbol *data; /* The data */ + const char *key; /* The key */ + struct s_x2node *next; /* Next entry with the same hash */ + struct s_x2node **from; /* Previous link */ +} x2node; + +/* There is only one instance of the array, which is the following */ +static struct s_x2 *x2a; + +/* Allocate a new associative array */ +void Symbol_init(){ + if( x2a ) return; + x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); + if( x2a ){ + x2a->size = 128; + x2a->count = 0; + x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); + if( x2a->tbl==0 ){ + free(x2a); + x2a = 0; + }else{ + int i; + x2a->ht = (x2node**)&(x2a->tbl[128]); + for(i=0; i<128; i++) x2a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Symbol_insert(struct symbol *data, const char *key) +{ + x2node *np; + unsigned h; + unsigned ph; + + if( x2a==0 ) return 0; + ph = strhash(key); + h = ph & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x2a->count>=x2a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x2 array; + array.size = arrSize = x2a->size*2; + array.count = x2a->count; + array.tbl = (x2node*)calloc(arrSize, sizeof(x2node) + sizeof(x2node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x2node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x2node *oldnp, *newnp; + oldnp = &(x2a->tbl[i]); + h = strhash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x2a->tbl); + *x2a = array; + } + /* Insert the new data */ + h = ph & (x2a->size-1); + np = &(x2a->tbl[x2a->count++]); + np->key = key; + np->data = data; + if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); + np->next = x2a->ht[h]; + x2a->ht[h] = np; + np->from = &(x2a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct symbol *Symbol_find(const char *key) +{ + unsigned h; + x2node *np; + + if( x2a==0 ) return 0; + h = strhash(key) & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return the n-th data. Return NULL if n is out of range. */ +struct symbol *Symbol_Nth(int n) +{ + struct symbol *data; + if( x2a && n>0 && n<=x2a->count ){ + data = x2a->tbl[n-1].data; + }else{ + data = 0; + } + return data; +} + +/* Return the size of the array */ +int Symbol_count() +{ + return x2a ? x2a->count : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct symbol **Symbol_arrayof() +{ + struct symbol **array; + int i,arrSize; + if( x2a==0 ) return 0; + arrSize = x2a->count; + array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Compare two configurations */ +int Configcmp(const char *_a,const char *_b) +{ + const struct config *a = (struct config *) _a; + const struct config *b = (struct config *) _b; + int x; + x = a->rp->index - b->rp->index; + if( x==0 ) x = a->dot - b->dot; + return x; +} + +/* Compare two states */ +PRIVATE int statecmp(struct config *a, struct config *b) +{ + int rc; + for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ + rc = a->rp->index - b->rp->index; + if( rc==0 ) rc = a->dot - b->dot; + } + if( rc==0 ){ + if( a ) rc = 1; + if( b ) rc = -1; + } + return rc; +} + +/* Hash a state */ +PRIVATE unsigned statehash(struct config *a) +{ + unsigned h=0; + while( a ){ + h = h*571 + a->rp->index*37 + a->dot; + a = a->bp; + } + return h; +} + +/* Allocate a new state structure */ +struct state *State_new() +{ + struct state *newstate; + newstate = (struct state *)calloc(1, sizeof(struct state) ); + MemoryCheck(newstate); + return newstate; +} + +/* There is one instance of the following structure for each +** associative array of type "x3". +*/ +struct s_x3 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x3node *tbl; /* The data stored here */ + struct s_x3node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x3". +*/ +typedef struct s_x3node { + struct state *data; /* The data */ + struct config *key; /* The key */ + struct s_x3node *next; /* Next entry with the same hash */ + struct s_x3node **from; /* Previous link */ +} x3node; + +/* There is only one instance of the array, which is the following */ +static struct s_x3 *x3a; + +/* Allocate a new associative array */ +void State_init(){ + if( x3a ) return; + x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); + if( x3a ){ + x3a->size = 128; + x3a->count = 0; + x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); + if( x3a->tbl==0 ){ + free(x3a); + x3a = 0; + }else{ + int i; + x3a->ht = (x3node**)&(x3a->tbl[128]); + for(i=0; i<128; i++) x3a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int State_insert(struct state *data, struct config *key) +{ + x3node *np; + unsigned h; + unsigned ph; + + if( x3a==0 ) return 0; + ph = statehash(key); + h = ph & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x3a->count>=x3a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x3 array; + array.size = arrSize = x3a->size*2; + array.count = x3a->count; + array.tbl = (x3node*)calloc(arrSize, sizeof(x3node) + sizeof(x3node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x3node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x3node *oldnp, *newnp; + oldnp = &(x3a->tbl[i]); + h = statehash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x3a->tbl); + *x3a = array; + } + /* Insert the new data */ + h = ph & (x3a->size-1); + np = &(x3a->tbl[x3a->count++]); + np->key = key; + np->data = data; + if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); + np->next = x3a->ht[h]; + x3a->ht[h] = np; + np->from = &(x3a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct state *State_find(struct config *key) +{ + unsigned h; + x3node *np; + + if( x3a==0 ) return 0; + h = statehash(key) & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct state **State_arrayof() +{ + struct state **array; + int i,arrSize; + if( x3a==0 ) return 0; + arrSize = x3a->count; + array = (struct state **)calloc(arrSize, sizeof(struct state *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Hash a configuration */ +PRIVATE unsigned confighash(struct config *a) +{ + unsigned h=0; + h = h*571 + a->rp->index*37 + a->dot; + return h; +} + +/* There is one instance of the following structure for each +** associative array of type "x4". +*/ +struct s_x4 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x4node *tbl; /* The data stored here */ + struct s_x4node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x4". +*/ +typedef struct s_x4node { + struct config *data; /* The data */ + struct s_x4node *next; /* Next entry with the same hash */ + struct s_x4node **from; /* Previous link */ +} x4node; + +/* There is only one instance of the array, which is the following */ +static struct s_x4 *x4a; + +/* Allocate a new associative array */ +void Configtable_init(){ + if( x4a ) return; + x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); + if( x4a ){ + x4a->size = 64; + x4a->count = 0; + x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); + if( x4a->tbl==0 ){ + free(x4a); + x4a = 0; + }else{ + int i; + x4a->ht = (x4node**)&(x4a->tbl[64]); + for(i=0; i<64; i++) x4a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Configtable_insert(struct config *data) +{ + x4node *np; + unsigned h; + unsigned ph; + + if( x4a==0 ) return 0; + ph = confighash(data); + h = ph & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x4a->count>=x4a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x4 array; + array.size = arrSize = x4a->size*2; + array.count = x4a->count; + array.tbl = (x4node*)calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x4node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x4node *oldnp, *newnp; + oldnp = &(x4a->tbl[i]); + h = confighash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x4a->tbl); + *x4a = array; + } + /* Insert the new data */ + h = ph & (x4a->size-1); + np = &(x4a->tbl[x4a->count++]); + np->data = data; + if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); + np->next = x4a->ht[h]; + x4a->ht[h] = np; + np->from = &(x4a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct config *Configtable_find(struct config *key) +{ + int h; + x4node *np; + + if( x4a==0 ) return 0; + h = confighash(key) & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Remove all data from the table. Pass each data to the function "f" +** as it is removed. ("f" may be null to avoid this step.) */ +void Configtable_clear(int(*f)(struct config *)) +{ + int i; + if( x4a==0 || x4a->count==0 ) return; + if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); + for(i=0; isize; i++) x4a->ht[i] = 0; + x4a->count = 0; + return; +} diff --git a/lemon-src/lemon.c b/lemon-src/lemon.c new file mode 100644 index 0000000..aa0f4e3 --- /dev/null +++ b/lemon-src/lemon.c @@ -0,0 +1,5436 @@ +/* +** This file contains all sources (including headers) to the LEMON +** LALR(1) parser generator. The sources have been combined into a +** single file to make it easy to include LEMON in the source tree +** and Makefile of another program. +** +** The author of this program disclaims copyright. +*/ +#include +#include +#include +#include +#include +#include + +#define ISSPACE(X) isspace((unsigned char)(X)) +#define ISDIGIT(X) isdigit((unsigned char)(X)) +#define ISALNUM(X) isalnum((unsigned char)(X)) +#define ISALPHA(X) isalpha((unsigned char)(X)) +#define ISUPPER(X) isupper((unsigned char)(X)) +#define ISLOWER(X) islower((unsigned char)(X)) + + +#ifndef __WIN32__ +# if defined(_WIN32) || defined(WIN32) +# define __WIN32__ +# endif +#endif + +#ifdef __WIN32__ +#ifdef __cplusplus +extern "C" { +#endif +extern int access(const char *path, int mode); +#ifdef __cplusplus +} +#endif +#else +#include +#endif + +/* #define PRIVATE static */ +#define PRIVATE + +#ifdef TEST +#define MAXRHS 5 /* Set low to exercise exception code */ +#else +#define MAXRHS 1000 +#endif + +static int showPrecedenceConflict = 0; +static char *msort(char*,char**,int(*)(const char*,const char*)); + +/* +** Compilers are getting increasingly pedantic about type conversions +** as C evolves ever closer to Ada.... To work around the latest problems +** we have to define the following variant of strlen(). +*/ +#define lemonStrlen(X) ((int)strlen(X)) + +/* +** Compilers are starting to complain about the use of sprintf() and strcpy(), +** saying they are unsafe. So we define our own versions of those routines too. +** +** There are three routines here: lemon_sprintf(), lemon_vsprintf(), and +** lemon_addtext(). The first two are replacements for sprintf() and vsprintf(). +** The third is a helper routine for vsnprintf() that adds texts to the end of a +** buffer, making sure the buffer is always zero-terminated. +** +** The string formatter is a minimal subset of stdlib sprintf() supporting only +** a few simply conversions: +** +** %d +** %s +** %.*s +** +*/ +static void lemon_addtext( + char *zBuf, /* The buffer to which text is added */ + int *pnUsed, /* Slots of the buffer used so far */ + const char *zIn, /* Text to add */ + int nIn, /* Bytes of text to add. -1 to use strlen() */ + int iWidth /* Field width. Negative to left justify */ +){ + if( nIn<0 ) for(nIn=0; zIn[nIn]; nIn++){} + while( iWidth>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth--; } + if( nIn==0 ) return; + memcpy(&zBuf[*pnUsed], zIn, nIn); + *pnUsed += nIn; + while( (-iWidth)>nIn ){ zBuf[(*pnUsed)++] = ' '; iWidth++; } + zBuf[*pnUsed] = 0; +} +static int lemon_vsprintf(char *str, const char *zFormat, va_list ap){ + int i, j, k, c; + int nUsed = 0; + const char *z; + char zTemp[50]; + str[0] = 0; + for(i=j=0; (c = zFormat[i])!=0; i++){ + if( c=='%' ){ + int iWidth = 0; + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + c = zFormat[++i]; + if( ISDIGIT(c) || (c=='-' && ISDIGIT(zFormat[i+1])) ){ + if( c=='-' ) i++; + while( ISDIGIT(zFormat[i]) ) iWidth = iWidth*10 + zFormat[i++] - '0'; + if( c=='-' ) iWidth = -iWidth; + c = zFormat[i]; + } + if( c=='d' ){ + int v = va_arg(ap, int); + if( v<0 ){ + lemon_addtext(str, &nUsed, "-", 1, iWidth); + v = -v; + }else if( v==0 ){ + lemon_addtext(str, &nUsed, "0", 1, iWidth); + } + k = 0; + while( v>0 ){ + k++; + zTemp[sizeof(zTemp)-k] = (v%10) + '0'; + v /= 10; + } + lemon_addtext(str, &nUsed, &zTemp[sizeof(zTemp)-k], k, iWidth); + }else if( c=='s' ){ + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, -1, iWidth); + }else if( c=='.' && memcmp(&zFormat[i], ".*s", 3)==0 ){ + i += 2; + k = va_arg(ap, int); + z = va_arg(ap, const char*); + lemon_addtext(str, &nUsed, z, k, iWidth); + }else if( c=='%' ){ + lemon_addtext(str, &nUsed, "%", 1, 0); + }else{ + fprintf(stderr, "illegal format\n"); + exit(1); + } + j = i+1; + } + } + lemon_addtext(str, &nUsed, &zFormat[j], i-j, 0); + return nUsed; +} +static int lemon_sprintf(char *str, const char *format, ...){ + va_list ap; + int rc; + va_start(ap, format); + rc = lemon_vsprintf(str, format, ap); + va_end(ap); + return rc; +} +static void lemon_strcpy(char *dest, const char *src){ + while( (*(dest++) = *(src++))!=0 ){} +} +static void lemon_strcat(char *dest, const char *src){ + while( *dest ) dest++; + lemon_strcpy(dest, src); +} + + +/* a few forward declarations... */ +struct rule; +struct lemon; +struct action; + +static struct action *Action_new(void); +static struct action *Action_sort(struct action *); + +/********** From the file "build.h" ************************************/ +void FindRulePrecedences(); +void FindFirstSets(); +void FindStates(); +void FindLinks(); +void FindFollowSets(); +void FindActions(); + +/********* From the file "configlist.h" *********************************/ +void Configlist_init(void); +struct config *Configlist_add(struct rule *, int); +struct config *Configlist_addbasis(struct rule *, int); +void Configlist_closure(struct lemon *); +void Configlist_sort(void); +void Configlist_sortbasis(void); +struct config *Configlist_return(void); +struct config *Configlist_basis(void); +void Configlist_eat(struct config *); +void Configlist_reset(void); + +/********* From the file "error.h" ***************************************/ +void ErrorMsg(const char *, int,const char *, ...); + +/****** From the file "option.h" ******************************************/ +enum option_type { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, + OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR}; +struct s_options { + enum option_type type; + const char *label; + char *arg; + const char *message; +}; +int OptInit(char**,struct s_options*,FILE*); +int OptNArgs(void); +char *OptArg(int); +void OptErr(int); +void OptPrint(void); + +/******** From the file "parse.h" *****************************************/ +void Parse(struct lemon *lemp); + +/********* From the file "plink.h" ***************************************/ +struct plink *Plink_new(void); +void Plink_add(struct plink **, struct config *); +void Plink_copy(struct plink **, struct plink *); +void Plink_delete(struct plink *); + +/********** From the file "report.h" *************************************/ +void Reprint(struct lemon *); +void ReportOutput(struct lemon *); +void ReportTable(struct lemon *, int); +void ReportHeader(struct lemon *); +void CompressTables(struct lemon *); +void ResortStates(struct lemon *); + +/********** From the file "set.h" ****************************************/ +void SetSize(int); /* All sets will be of size N */ +char *SetNew(void); /* A new set for element 0..N */ +void SetFree(char*); /* Deallocate a set */ +int SetAdd(char*,int); /* Add element to a set */ +int SetUnion(char *,char *); /* A <- A U B, thru element N */ +#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ + +/********** From the file "struct.h" *************************************/ +/* +** Principal data structures for the LEMON parser generator. +*/ + +typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; + +/* Symbols (terminals and nonterminals) of the grammar are stored +** in the following: */ +enum symbol_type { + TERMINAL, + NONTERMINAL, + MULTITERMINAL +}; +enum e_assoc { + LEFT, + RIGHT, + NONE, + UNK +}; +struct symbol { + const char *name; /* Name of the symbol */ + int index; /* Index number for this symbol */ + enum symbol_type type; /* Symbols are all either TERMINALS or NTs */ + struct rule *rule; /* Linked list of rules of this (if an NT) */ + struct symbol *fallback; /* fallback token in case this token doesn't parse */ + int prec; /* Precedence if defined (-1 otherwise) */ + enum e_assoc assoc; /* Associativity if precedence is defined */ + char *firstset; /* First-set for all rules of this symbol */ + Boolean lambda; /* True if NT and can generate an empty string */ + int useCnt; /* Number of times used */ + char *destructor; /* Code which executes whenever this symbol is + ** popped from the stack during error processing */ + int destLineno; /* Line number for start of destructor. Set to + ** -1 for duplicate destructors. */ + char *datatype; /* The data type of information held by this + ** object. Only used if type==NONTERMINAL */ + int dtnum; /* The data type number. In the parser, the value + ** stack is a union. The .yy%d element of this + ** union is the correct data type for this object */ + /* The following fields are used by MULTITERMINALs only */ + int nsubsym; /* Number of constituent symbols in the MULTI */ + struct symbol **subsym; /* Array of constituent symbols */ +}; + +/* Each production rule in the grammar is stored in the following +** structure. */ +struct rule { + struct symbol *lhs; /* Left-hand side of the rule */ + const char *lhsalias; /* Alias for the LHS (NULL if none) */ + int lhsStart; /* True if left-hand side is the start symbol */ + int ruleline; /* Line number for the rule */ + int nrhs; /* Number of RHS symbols */ + struct symbol **rhs; /* The RHS symbols */ + const char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ + int line; /* Line number at which code begins */ + const char *code; /* The code executed when this rule is reduced */ + const char *codePrefix; /* Setup code before code[] above */ + const char *codeSuffix; /* Breakdown code after code[] above */ + int noCode; /* True if this rule has no associated C code */ + int codeEmitted; /* True if the code has been emitted already */ + struct symbol *precsym; /* Precedence symbol for this rule */ + int index; /* An index number for this rule */ + int iRule; /* Rule number as used in the generated tables */ + Boolean canReduce; /* True if this rule is ever reduced */ + Boolean doesReduce; /* Reduce actions occur after optimization */ + struct rule *nextlhs; /* Next rule with the same LHS */ + struct rule *next; /* Next rule in the global list */ +}; + +/* A configuration is a production rule of the grammar together with +** a mark (dot) showing how much of that rule has been processed so far. +** Configurations also contain a follow-set which is a list of terminal +** symbols which are allowed to immediately follow the end of the rule. +** Every configuration is recorded as an instance of the following: */ +enum cfgstatus { + COMPLETE, + INCOMPLETE +}; +struct config { + struct rule *rp; /* The rule upon which the configuration is based */ + int dot; /* The parse point */ + char *fws; /* Follow-set for this configuration only */ + struct plink *fplp; /* Follow-set forward propagation links */ + struct plink *bplp; /* Follow-set backwards propagation links */ + struct state *stp; /* Pointer to state which contains this */ + enum cfgstatus status; /* used during followset and shift computations */ + struct config *next; /* Next configuration in the state */ + struct config *bp; /* The next basis configuration */ +}; + +enum e_action { + SHIFT, + ACCEPT, + REDUCE, + ERROR, + SSCONFLICT, /* A shift/shift conflict */ + SRCONFLICT, /* Was a reduce, but part of a conflict */ + RRCONFLICT, /* Was a reduce, but part of a conflict */ + SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ + RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ + NOT_USED, /* Deleted by compression */ + SHIFTREDUCE /* Shift first, then reduce */ +}; + +/* Every shift or reduce operation is stored as one of the following */ +struct action { + struct symbol *sp; /* The look-ahead symbol */ + enum e_action type; + union { + struct state *stp; /* The new state, if a shift */ + struct rule *rp; /* The rule, if a reduce */ + } x; + struct symbol *spOpt; /* SHIFTREDUCE optimization to this symbol */ + struct action *next; /* Next action for this state */ + struct action *collide; /* Next action with the same hash */ +}; + +/* Each state of the generated parser's finite state machine +** is encoded as an instance of the following structure. */ +struct state { + struct config *bp; /* The basis configurations for this state */ + struct config *cfp; /* All configurations in this set */ + int statenum; /* Sequential number for this state */ + struct action *ap; /* List of actions for this state */ + int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ + int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ + int iDfltReduce; /* Default action is to REDUCE by this rule */ + struct rule *pDfltReduce;/* The default REDUCE rule. */ + int autoReduce; /* True if this is an auto-reduce state */ +}; +#define NO_OFFSET (-2147483647) + +/* A followset propagation link indicates that the contents of one +** configuration followset should be propagated to another whenever +** the first changes. */ +struct plink { + struct config *cfp; /* The configuration to which linked */ + struct plink *next; /* The next propagate link */ +}; + +/* The state vector for the entire parser generator is recorded as +** follows. (LEMON uses no global variables and makes little use of +** static variables. Fields in the following structure can be thought +** of as begin global variables in the program.) */ +struct lemon { + struct state **sorted; /* Table of states sorted by state number */ + struct rule *rule; /* List of all rules */ + struct rule *startRule; /* First rule */ + int nstate; /* Number of states */ + int nxstate; /* nstate with tail degenerate states removed */ + int nrule; /* Number of rules */ + int nsymbol; /* Number of terminal and nonterminal symbols */ + int nterminal; /* Number of terminal symbols */ + struct symbol **symbols; /* Sorted array of pointers to symbols */ + int errorcnt; /* Number of errors */ + struct symbol *errsym; /* The error symbol */ + struct symbol *wildcard; /* Token that matches anything */ + char *name; /* Name of the generated parser */ + char *arg; /* Declaration of the 3th argument to parser */ + char *tokentype; /* Type of terminal symbols in the parser stack */ + char *vartype; /* The default type of non-terminal symbols */ + char *start; /* Name of the start symbol for the grammar */ + char *stacksize; /* Size of the parser stack */ + char *include; /* Code to put at the start of the C file */ + char *error; /* Code to execute when an error is seen */ + char *overflow; /* Code to execute on a stack overflow */ + char *failure; /* Code to execute on parser failure */ + char *accept; /* Code to execute when the parser excepts */ + char *extracode; /* Code appended to the generated file */ + char *tokendest; /* Code to execute to destroy token data */ + char *vardest; /* Code for the default non-terminal destructor */ + char *filename; /* Name of the input file */ + char *outname; /* Name of the current output file */ + char *tokenprefix; /* A prefix added to token names in the .h file */ + int nconflict; /* Number of parsing conflicts */ + int nactiontab; /* Number of entries in the yy_action[] table */ + int tablesize; /* Total table size of all tables in bytes */ + int basisflag; /* Print only basis configurations */ + int has_fallback; /* True if any %fallback is seen in the grammar */ + int nolinenosflag; /* True if #line statements should not be printed */ + char *argv0; /* Name of the program */ +}; + +#define MemoryCheck(X) if((X)==0){ \ + extern void memory_error(); \ + memory_error(); \ +} + +/**************** From the file "table.h" *********************************/ +/* +** All code in this file has been automatically generated +** from a specification in the file +** "table.q" +** by the associative array code building program "aagen". +** Do not edit this file! Instead, edit the specification +** file, then rerun aagen. +*/ +/* +** Code for processing tables in the LEMON parser generator. +*/ +/* Routines for handling a strings */ + +const char *Strsafe(const char *); + +void Strsafe_init(void); +int Strsafe_insert(const char *); +const char *Strsafe_find(const char *); + +/* Routines for handling symbols of the grammar */ + +struct symbol *Symbol_new(const char *); +int Symbolcmpp(const void *, const void *); +void Symbol_init(void); +int Symbol_insert(struct symbol *, const char *); +struct symbol *Symbol_find(const char *); +struct symbol *Symbol_Nth(int); +int Symbol_count(void); +struct symbol **Symbol_arrayof(void); + +/* Routines to manage the state table */ + +int Configcmp(const char *, const char *); +struct state *State_new(void); +void State_init(void); +int State_insert(struct state *, struct config *); +struct state *State_find(struct config *); +struct state **State_arrayof(/* */); + +/* Routines used for efficiency in Configlist_add */ + +void Configtable_init(void); +int Configtable_insert(struct config *); +struct config *Configtable_find(struct config *); +void Configtable_clear(int(*)(struct config *)); + +/****************** From the file "action.c" *******************************/ +/* +** Routines processing parser actions in the LEMON parser generator. +*/ + +/* Allocate a new parser action */ +static struct action *Action_new(void){ + static struct action *freelist = 0; + struct action *newaction; + + if( freelist==0 ){ + int i; + int amt = 100; + freelist = (struct action *)calloc(amt, sizeof(struct action)); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new parser action."); + exit(1); + } + for(i=0; inext; + return newaction; +} + +/* Compare two actions for sorting purposes. Return negative, zero, or +** positive if the first action is less than, equal to, or greater than +** the first +*/ +static int actioncmp( + struct action *ap1, + struct action *ap2 +){ + int rc; + rc = ap1->sp->index - ap2->sp->index; + if( rc==0 ){ + rc = (int)ap1->type - (int)ap2->type; + } + if( rc==0 && (ap1->type==REDUCE || ap1->type==SHIFTREDUCE) ){ + rc = ap1->x.rp->index - ap2->x.rp->index; + } + if( rc==0 ){ + rc = (int) (ap2 - ap1); + } + return rc; +} + +/* Sort parser actions */ +static struct action *Action_sort( + struct action *ap +){ + ap = (struct action *)msort((char *)ap,(char **)&ap->next, + (int(*)(const char*,const char*))actioncmp); + return ap; +} + +void Action_add( + struct action **app, + enum e_action type, + struct symbol *sp, + char *arg +){ + struct action *newaction; + newaction = Action_new(); + newaction->next = *app; + *app = newaction; + newaction->type = type; + newaction->sp = sp; + newaction->spOpt = 0; + if( type==SHIFT ){ + newaction->x.stp = (struct state *)arg; + }else{ + newaction->x.rp = (struct rule *)arg; + } +} +/********************** New code to implement the "acttab" module ***********/ +/* +** This module implements routines use to construct the yy_action[] table. +*/ + +/* +** The state of the yy_action table under construction is an instance of +** the following structure. +** +** The yy_action table maps the pair (state_number, lookahead) into an +** action_number. The table is an array of integers pairs. The state_number +** determines an initial offset into the yy_action array. The lookahead +** value is then added to this initial offset to get an index X into the +** yy_action array. If the aAction[X].lookahead equals the value of the +** of the lookahead input, then the value of the action_number output is +** aAction[X].action. If the lookaheads do not match then the +** default action for the state_number is returned. +** +** All actions associated with a single state_number are first entered +** into aLookahead[] using multiple calls to acttab_action(). Then the +** actions for that single state_number are placed into the aAction[] +** array with a single call to acttab_insert(). The acttab_insert() call +** also resets the aLookahead[] array in preparation for the next +** state number. +*/ +struct lookahead_action { + int lookahead; /* Value of the lookahead token */ + int action; /* Action to take on the given lookahead */ +}; +typedef struct acttab acttab; +struct acttab { + int nAction; /* Number of used slots in aAction[] */ + int nActionAlloc; /* Slots allocated for aAction[] */ + struct lookahead_action + *aAction, /* The yy_action[] table under construction */ + *aLookahead; /* A single new transaction set */ + int mnLookahead; /* Minimum aLookahead[].lookahead */ + int mnAction; /* Action associated with mnLookahead */ + int mxLookahead; /* Maximum aLookahead[].lookahead */ + int nLookahead; /* Used slots in aLookahead[] */ + int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ +}; + +/* Return the number of entries in the yy_action table */ +#define acttab_size(X) ((X)->nAction) + +/* The value for the N-th entry in yy_action */ +#define acttab_yyaction(X,N) ((X)->aAction[N].action) + +/* The value for the N-th entry in yy_lookahead */ +#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) + +/* Free all memory associated with the given acttab */ +void acttab_free(acttab *p){ + free( p->aAction ); + free( p->aLookahead ); + free( p ); +} + +/* Allocate a new acttab structure */ +acttab *acttab_alloc(void){ + acttab *p = (acttab *) calloc( 1, sizeof(*p) ); + if( p==0 ){ + fprintf(stderr,"Unable to allocate memory for a new acttab."); + exit(1); + } + memset(p, 0, sizeof(*p)); + return p; +} + +/* Add a new action to the current transaction set. +** +** This routine is called once for each lookahead for a particular +** state. +*/ +void acttab_action(acttab *p, int lookahead, int action){ + if( p->nLookahead>=p->nLookaheadAlloc ){ + p->nLookaheadAlloc += 25; + p->aLookahead = (struct lookahead_action *) realloc( p->aLookahead, + sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); + if( p->aLookahead==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + } + if( p->nLookahead==0 ){ + p->mxLookahead = lookahead; + p->mnLookahead = lookahead; + p->mnAction = action; + }else{ + if( p->mxLookaheadmxLookahead = lookahead; + if( p->mnLookahead>lookahead ){ + p->mnLookahead = lookahead; + p->mnAction = action; + } + } + p->aLookahead[p->nLookahead].lookahead = lookahead; + p->aLookahead[p->nLookahead].action = action; + p->nLookahead++; +} + +/* +** Add the transaction set built up with prior calls to acttab_action() +** into the current action table. Then reset the transaction set back +** to an empty set in preparation for a new round of acttab_action() calls. +** +** Return the offset into the action table of the new transaction. +*/ +int acttab_insert(acttab *p){ + int i, j, k, n; + assert( p->nLookahead>0 ); + + /* Make sure we have enough space to hold the expanded action table + ** in the worst case. The worst case occurs if the transaction set + ** must be appended to the current action table + */ + n = p->mxLookahead + 1; + if( p->nAction + n >= p->nActionAlloc ){ + int oldAlloc = p->nActionAlloc; + p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; + p->aAction = (struct lookahead_action *) realloc( p->aAction, + sizeof(p->aAction[0])*p->nActionAlloc); + if( p->aAction==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=oldAlloc; inActionAlloc; i++){ + p->aAction[i].lookahead = -1; + p->aAction[i].action = -1; + } + } + + /* Scan the existing action table looking for an offset that is a + ** duplicate of the current transaction set. Fall out of the loop + ** if and when the duplicate is found. + ** + ** i is the index in p->aAction[] where p->mnLookahead is inserted. + */ + for(i=p->nAction-1; i>=0; i--){ + if( p->aAction[i].lookahead==p->mnLookahead ){ + /* All lookaheads and actions in the aLookahead[] transaction + ** must match against the candidate aAction[i] entry. */ + if( p->aAction[i].action!=p->mnAction ) continue; + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 || k>=p->nAction ) break; + if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; + if( p->aLookahead[j].action!=p->aAction[k].action ) break; + } + if( jnLookahead ) continue; + + /* No possible lookahead value that is not in the aLookahead[] + ** transaction is allowed to match aAction[i] */ + n = 0; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead<0 ) continue; + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; + } + if( n==p->nLookahead ){ + break; /* An exact match is found at offset i */ + } + } + } + + /* If no existing offsets exactly match the current transaction, find an + ** an empty offset in the aAction[] table in which we can add the + ** aLookahead[] transaction. + */ + if( i<0 ){ + /* Look for holes in the aAction[] table that fit the current + ** aLookahead[] transaction. Leave i set to the offset of the hole. + ** If no holes are found, i is left at p->nAction, which means the + ** transaction will be appended. */ + for(i=0; inActionAlloc - p->mxLookahead; i++){ + if( p->aAction[i].lookahead<0 ){ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 ) break; + if( p->aAction[k].lookahead>=0 ) break; + } + if( jnLookahead ) continue; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; + } + if( j==p->nAction ){ + break; /* Fits in empty slots */ + } + } + } + } + /* Insert transaction set at index i. */ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + p->aAction[k] = p->aLookahead[j]; + if( k>=p->nAction ) p->nAction = k+1; + } + p->nLookahead = 0; + + /* Return the offset that is added to the lookahead in order to get the + ** index into yy_action of the action */ + return i - p->mnLookahead; +} + +/********************** From the file "build.c" *****************************/ +/* +** Routines to construction the finite state machine for the LEMON +** parser generator. +*/ + +/* Find a precedence symbol of every rule in the grammar. +** +** Those rules which have a precedence symbol coded in the input +** grammar using the "[symbol]" construct will already have the +** rp->precsym field filled. Other rules take as their precedence +** symbol the first RHS symbol with a defined precedence. If there +** are not RHS symbols with a defined precedence, the precedence +** symbol field is left blank. +*/ +void FindRulePrecedences(struct lemon *xp) +{ + struct rule *rp; + for(rp=xp->rule; rp; rp=rp->next){ + if( rp->precsym==0 ){ + int i, j; + for(i=0; inrhs && rp->precsym==0; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + if( sp->subsym[j]->prec>=0 ){ + rp->precsym = sp->subsym[j]; + break; + } + } + }else if( sp->prec>=0 ){ + rp->precsym = rp->rhs[i]; + } + } + } + } + return; +} + +/* Find all nonterminals which will generate the empty string. +** Then go back and compute the first sets of every nonterminal. +** The first set is the set of all terminal symbols which can begin +** a string generated by that nonterminal. +*/ +void FindFirstSets(struct lemon *lemp) +{ + int i, j; + struct rule *rp; + int progress; + + for(i=0; insymbol; i++){ + lemp->symbols[i]->lambda = LEMON_FALSE; + } + for(i=lemp->nterminal; insymbol; i++){ + lemp->symbols[i]->firstset = SetNew(); + } + + /* First compute all lambdas */ + do{ + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->lhs->lambda ) continue; + for(i=0; inrhs; i++){ + struct symbol *sp = rp->rhs[i]; + assert( sp->type==NONTERMINAL || sp->lambda==LEMON_FALSE ); + if( sp->lambda==LEMON_FALSE ) break; + } + if( i==rp->nrhs ){ + rp->lhs->lambda = LEMON_TRUE; + progress = 1; + } + } + }while( progress ); + + /* Now compute all first sets */ + do{ + struct symbol *s1, *s2; + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + s1 = rp->lhs; + for(i=0; inrhs; i++){ + s2 = rp->rhs[i]; + if( s2->type==TERMINAL ){ + progress += SetAdd(s1->firstset,s2->index); + break; + }else if( s2->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + progress += SetAdd(s1->firstset,s2->subsym[j]->index); + } + break; + }else if( s1==s2 ){ + if( s1->lambda==LEMON_FALSE ) break; + }else{ + progress += SetUnion(s1->firstset,s2->firstset); + if( s2->lambda==LEMON_FALSE ) break; + } + } + } + }while( progress ); + return; +} + +/* Compute all LR(0) states for the grammar. Links +** are added to between some states so that the LR(1) follow sets +** can be computed later. +*/ +PRIVATE struct state *getstate(struct lemon *); /* forward reference */ +void FindStates(struct lemon *lemp) +{ + struct symbol *sp; + struct rule *rp; + + Configlist_init(); + + /* Find the start symbol */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ){ + ErrorMsg(lemp->filename,0, +"The specified start symbol \"%s\" is not \ +in a nonterminal of the grammar. \"%s\" will be used as the start \ +symbol instead.",lemp->start,lemp->startRule->lhs->name); + lemp->errorcnt++; + sp = lemp->startRule->lhs; + } + }else{ + sp = lemp->startRule->lhs; + } + + /* Make sure the start symbol doesn't occur on the right-hand side of + ** any rule. Report an error if it does. (YACC would generate a new + ** start symbol in this case.) */ + for(rp=lemp->rule; rp; rp=rp->next){ + int i; + for(i=0; inrhs; i++){ + if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ + ErrorMsg(lemp->filename,0, +"The start symbol \"%s\" occurs on the \ +right-hand side of a rule. This will result in a parser which \ +does not work properly.",sp->name); + lemp->errorcnt++; + } + } + } + + /* The basis configuration set for the first state + ** is all rules which have the start symbol as their + ** left-hand side */ + for(rp=sp->rule; rp; rp=rp->nextlhs){ + struct config *newcfp; + rp->lhsStart = 1; + newcfp = Configlist_addbasis(rp,0); + SetAdd(newcfp->fws,0); + } + + /* Compute the first state. All other states will be + ** computed automatically during the computation of the first one. + ** The returned pointer to the first state is not used. */ + (void)getstate(lemp); + return; +} + +/* Return a pointer to a state which is described by the configuration +** list which has been built from calls to Configlist_add. +*/ +PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ +PRIVATE struct state *getstate(struct lemon *lemp) +{ + struct config *cfp, *bp; + struct state *stp; + + /* Extract the sorted basis of the new state. The basis was constructed + ** by prior calls to "Configlist_addbasis()". */ + Configlist_sortbasis(); + bp = Configlist_basis(); + + /* Get a state with the same basis */ + stp = State_find(bp); + if( stp ){ + /* A state with the same basis already exists! Copy all the follow-set + ** propagation links from the state under construction into the + ** preexisting state, then return a pointer to the preexisting state */ + struct config *x, *y; + for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ + Plink_copy(&y->bplp,x->bplp); + Plink_delete(x->fplp); + x->fplp = x->bplp = 0; + } + cfp = Configlist_return(); + Configlist_eat(cfp); + }else{ + /* This really is a new state. Construct all the details */ + Configlist_closure(lemp); /* Compute the configuration closure */ + Configlist_sort(); /* Sort the configuration closure */ + cfp = Configlist_return(); /* Get a pointer to the config list */ + stp = State_new(); /* A new state structure */ + MemoryCheck(stp); + stp->bp = bp; /* Remember the configuration basis */ + stp->cfp = cfp; /* Remember the configuration closure */ + stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ + stp->ap = 0; /* No actions, yet. */ + State_insert(stp,stp->bp); /* Add to the state table */ + buildshifts(lemp,stp); /* Recursively compute successor states */ + } + return stp; +} + +/* +** Return true if two symbols are the same. +*/ +int same_symbol(struct symbol *a, struct symbol *b) +{ + int i; + if( a==b ) return 1; + if( a->type!=MULTITERMINAL ) return 0; + if( b->type!=MULTITERMINAL ) return 0; + if( a->nsubsym!=b->nsubsym ) return 0; + for(i=0; insubsym; i++){ + if( a->subsym[i]!=b->subsym[i] ) return 0; + } + return 1; +} + +/* Construct all successor states to the given state. A "successor" +** state is any state which can be reached by a shift action. +*/ +PRIVATE void buildshifts(struct lemon *lemp, struct state *stp) +{ + struct config *cfp; /* For looping thru the config closure of "stp" */ + struct config *bcfp; /* For the inner loop on config closure of "stp" */ + struct config *newcfg; /* */ + struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ + struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ + struct state *newstp; /* A pointer to a successor state */ + + /* Each configuration becomes complete after it contibutes to a successor + ** state. Initially, all configurations are incomplete */ + for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; + + /* Loop through all configurations of the state "stp" */ + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ + if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ + Configlist_reset(); /* Reset the new config set */ + sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ + + /* For every configuration in the state "stp" which has the symbol "sp" + ** following its dot, add the same configuration to the basis set under + ** construction but with the dot shifted one symbol to the right. */ + for(bcfp=cfp; bcfp; bcfp=bcfp->next){ + if( bcfp->status==COMPLETE ) continue; /* Already used */ + if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ + bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ + if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ + bcfp->status = COMPLETE; /* Mark this config as used */ + newcfg = Configlist_addbasis(bcfp->rp,bcfp->dot+1); + Plink_add(&newcfg->bplp,bcfp); + } + + /* Get a pointer to the state described by the basis configuration set + ** constructed in the preceding loop */ + newstp = getstate(lemp); + + /* The state "newstp" is reached from the state "stp" by a shift action + ** on the symbol "sp" */ + if( sp->type==MULTITERMINAL ){ + int i; + for(i=0; insubsym; i++){ + Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); + } + }else{ + Action_add(&stp->ap,SHIFT,sp,(char *)newstp); + } + } +} + +/* +** Construct the propagation links +*/ +void FindLinks(struct lemon *lemp) +{ + int i; + struct config *cfp, *other; + struct state *stp; + struct plink *plp; + + /* Housekeeping detail: + ** Add to every propagate link a pointer back to the state to + ** which the link is attached. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + cfp->stp = stp; + } + } + + /* Convert all backlinks into forward links. Only the forward + ** links are used in the follow-set computation. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + for(plp=cfp->bplp; plp; plp=plp->next){ + other = plp->cfp; + Plink_add(&other->fplp,cfp); + } + } + } +} + +/* Compute all followsets. +** +** A followset is the set of all symbols which can come immediately +** after a configuration. +*/ +void FindFollowSets(struct lemon *lemp) +{ + int i; + struct config *cfp; + struct plink *plp; + int progress; + int change; + + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + cfp->status = INCOMPLETE; + } + } + + do{ + progress = 0; + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; + for(plp=cfp->fplp; plp; plp=plp->next){ + change = SetUnion(plp->cfp->fws,cfp->fws); + if( change ){ + plp->cfp->status = INCOMPLETE; + progress = 1; + } + } + cfp->status = COMPLETE; + } + } + }while( progress ); +} + +static int resolve_conflict(struct action *,struct action *); + +/* Compute the reduce actions, and resolve conflicts. +*/ +void FindActions(struct lemon *lemp) +{ + int i,j; + struct config *cfp; + struct state *stp; + struct symbol *sp; + struct rule *rp; + + /* Add all of the reduce actions + ** A reduce action is added for each element of the followset of + ** a configuration which has its dot at the extreme right. + */ + for(i=0; instate; i++){ /* Loop over all states */ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ + if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ + for(j=0; jnterminal; j++){ + if( SetFind(cfp->fws,j) ){ + /* Add a reduce action to the state "stp" which will reduce by the + ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ + Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); + } + } + } + } + } + + /* Add the accepting token */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ) sp = lemp->startRule->lhs; + }else{ + sp = lemp->startRule->lhs; + } + /* Add to the first state (which is always the starting state of the + ** finite state machine) an action to ACCEPT if the lookahead is the + ** start nonterminal. */ + Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); + + /* Resolve conflicts */ + for(i=0; instate; i++){ + struct action *ap, *nap; + stp = lemp->sorted[i]; + /* assert( stp->ap ); */ + stp->ap = Action_sort(stp->ap); + for(ap=stp->ap; ap && ap->next; ap=ap->next){ + for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ + /* The two actions "ap" and "nap" have the same lookahead. + ** Figure out which one should be used */ + lemp->nconflict += resolve_conflict(ap,nap); + } + } + } + + /* Report an error for each rule that can never be reduced. */ + for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; + for(i=0; instate; i++){ + struct action *ap; + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; + } + } + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->canReduce ) continue; + ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); + lemp->errorcnt++; + } +} + +/* Resolve a conflict between the two given actions. If the +** conflict can't be resolved, return non-zero. +** +** NO LONGER TRUE: +** To resolve a conflict, first look to see if either action +** is on an error rule. In that case, take the action which +** is not associated with the error rule. If neither or both +** actions are associated with an error rule, then try to +** use precedence to resolve the conflict. +** +** If either action is a SHIFT, then it must be apx. This +** function won't work if apx->type==REDUCE and apy->type==SHIFT. +*/ +static int resolve_conflict( + struct action *apx, + struct action *apy +){ + struct symbol *spx, *spy; + int errcnt = 0; + assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ + if( apx->type==SHIFT && apy->type==SHIFT ){ + apy->type = SSCONFLICT; + errcnt++; + } + if( apx->type==SHIFT && apy->type==REDUCE ){ + spx = apx->sp; + spy = apy->x.rp->precsym; + if( spy==0 || spx->prec<0 || spy->prec<0 ){ + /* Not enough precedence information. */ + apy->type = SRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ /* higher precedence wins */ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = SH_RESOLVED; + }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ + apy->type = RD_RESOLVED; /* associativity */ + }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ + apx->type = SH_RESOLVED; + }else{ + assert( spx->prec==spy->prec && spx->assoc==NONE ); + apx->type = ERROR; + } + }else if( apx->type==REDUCE && apy->type==REDUCE ){ + spx = apx->x.rp->precsym; + spy = apy->x.rp->precsym; + if( spx==0 || spy==0 || spx->prec<0 || + spy->prec<0 || spx->prec==spy->prec ){ + apy->type = RRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = RD_RESOLVED; + } + }else{ + assert( + apx->type==SH_RESOLVED || + apx->type==RD_RESOLVED || + apx->type==SSCONFLICT || + apx->type==SRCONFLICT || + apx->type==RRCONFLICT || + apy->type==SH_RESOLVED || + apy->type==RD_RESOLVED || + apy->type==SSCONFLICT || + apy->type==SRCONFLICT || + apy->type==RRCONFLICT + ); + /* The REDUCE/SHIFT case cannot happen because SHIFTs come before + ** REDUCEs on the list. If we reach this point it must be because + ** the parser conflict had already been resolved. */ + } + return errcnt; +} +/********************* From the file "configlist.c" *************************/ +/* +** Routines to processing a configuration list and building a state +** in the LEMON parser generator. +*/ + +static struct config *freelist = 0; /* List of free configurations */ +static struct config *current = 0; /* Top of list of configurations */ +static struct config **currentend = 0; /* Last on list of configs */ +static struct config *basis = 0; /* Top of list of basis configs */ +static struct config **basisend = 0; /* End of list of basis configs */ + +/* Return a pointer to a new configuration */ +PRIVATE struct config *newconfig(){ + struct config *newcfg; + if( freelist==0 ){ + int i; + int amt = 3; + freelist = (struct config *)calloc( amt, sizeof(struct config) ); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new configuration."); + exit(1); + } + for(i=0; inext; + return newcfg; +} + +/* The configuration "old" is no longer used */ +PRIVATE void deleteconfig(struct config *old) +{ + old->next = freelist; + freelist = old; +} + +/* Initialized the configuration list builder */ +void Configlist_init(){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_init(); + return; +} + +/* Initialized the configuration list builder */ +void Configlist_reset(){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_clear(0); + return; +} + +/* Add another configuration to the configuration list */ +struct config *Configlist_add( + struct rule *rp, /* The rule */ + int dot /* Index into the RHS of the rule where the dot goes */ +){ + struct config *cfp, model; + + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + Configtable_insert(cfp); + } + return cfp; +} + +/* Add a basis configuration to the configuration list */ +struct config *Configlist_addbasis(struct rule *rp, int dot) +{ + struct config *cfp, model; + + assert( basisend!=0 ); + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + *basisend = cfp; + basisend = &cfp->bp; + Configtable_insert(cfp); + } + return cfp; +} + +/* Compute the closure of the configuration list */ +void Configlist_closure(struct lemon *lemp) +{ + struct config *cfp, *newcfp; + struct rule *rp, *newrp; + struct symbol *sp, *xsp; + int i, dot; + + assert( currentend!=0 ); + for(cfp=current; cfp; cfp=cfp->next){ + rp = cfp->rp; + dot = cfp->dot; + if( dot>=rp->nrhs ) continue; + sp = rp->rhs[dot]; + if( sp->type==NONTERMINAL ){ + if( sp->rule==0 && sp!=lemp->errsym ){ + ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", + sp->name); + lemp->errorcnt++; + } + for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ + newcfp = Configlist_add(newrp,0); + for(i=dot+1; inrhs; i++){ + xsp = rp->rhs[i]; + if( xsp->type==TERMINAL ){ + SetAdd(newcfp->fws,xsp->index); + break; + }else if( xsp->type==MULTITERMINAL ){ + int k; + for(k=0; knsubsym; k++){ + SetAdd(newcfp->fws, xsp->subsym[k]->index); + } + break; + }else{ + SetUnion(newcfp->fws,xsp->firstset); + if( xsp->lambda==LEMON_FALSE ) break; + } + } + if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); + } + } + } + return; +} + +/* Sort the configuration list */ +void Configlist_sort(){ + current = (struct config*)msort((char*)current,(char**)&(current->next), + Configcmp); + currentend = 0; + return; +} + +/* Sort the basis configuration list */ +void Configlist_sortbasis(){ + basis = (struct config*)msort((char*)current,(char**)&(current->bp), + Configcmp); + basisend = 0; + return; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_return(){ + struct config *old; + old = current; + current = 0; + currentend = 0; + return old; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_basis(){ + struct config *old; + old = basis; + basis = 0; + basisend = 0; + return old; +} + +/* Free all elements of the given configuration list */ +void Configlist_eat(struct config *cfp) +{ + struct config *nextcfp; + for(; cfp; cfp=nextcfp){ + nextcfp = cfp->next; + assert( cfp->fplp==0 ); + assert( cfp->bplp==0 ); + if( cfp->fws ) SetFree(cfp->fws); + deleteconfig(cfp); + } + return; +} +/***************** From the file "error.c" *********************************/ +/* +** Code for printing error message. +*/ + +void ErrorMsg(const char *filename, int lineno, const char *format, ...){ + va_list ap; + fprintf(stderr, "%s:%d: ", filename, lineno); + va_start(ap, format); + vfprintf(stderr,format,ap); + va_end(ap); + fprintf(stderr, "\n"); +} +/**************** From the file "main.c" ************************************/ +/* +** Main program file for the LEMON parser generator. +*/ + +/* Report an out-of-memory condition and abort. This function +** is used mostly by the "MemoryCheck" macro in struct.h +*/ +void memory_error(){ + fprintf(stderr,"Out of memory. Aborting...\n"); + exit(1); +} + +static int nDefine = 0; /* Number of -D options on the command line */ +static char **azDefine = 0; /* Name of the -D macros */ + +/* This routine is called with the argument to each -D command-line option. +** Add the macro defined to the azDefine array. +*/ +static void handle_D_option(char *z){ + char **paz; + nDefine++; + azDefine = (char **) realloc(azDefine, sizeof(azDefine[0])*nDefine); + if( azDefine==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + paz = &azDefine[nDefine-1]; + *paz = (char *) malloc( lemonStrlen(z)+1 ); + if( *paz==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + lemon_strcpy(*paz, z); + for(z=*paz; *z && *z!='='; z++){} + *z = 0; +} + +static char *user_templatename = NULL; +static void handle_T_option(char *z){ + user_templatename = (char *) malloc( lemonStrlen(z)+1 ); + if( user_templatename==0 ){ + memory_error(); + } + lemon_strcpy(user_templatename, z); +} + +/* Merge together to lists of rules ordered by rule.iRule */ +static struct rule *Rule_merge(struct rule *pA, struct rule *pB){ + struct rule *pFirst = 0; + struct rule **ppPrev = &pFirst; + while( pA && pB ){ + if( pA->iRuleiRule ){ + *ppPrev = pA; + ppPrev = &pA->next; + pA = pA->next; + }else{ + *ppPrev = pB; + ppPrev = &pB->next; + pB = pB->next; + } + } + if( pA ){ + *ppPrev = pA; + }else{ + *ppPrev = pB; + } + return pFirst; +} + +/* +** Sort a list of rules in order of increasing iRule value +*/ +static struct rule *Rule_sort(struct rule *rp){ + int i; + struct rule *pNext; + struct rule *x[32]; + memset(x, 0, sizeof(x)); + while( rp ){ + pNext = rp->next; + rp->next = 0; + for(i=0; iuseCnt = 0; + + /* Parse the input file */ + Parse(&lem); + if( lem.errorcnt ) exit(lem.errorcnt); + if( lem.nrule==0 ){ + fprintf(stderr,"Empty grammar.\n"); + exit(1); + } + + /* Count and index the symbols of the grammar */ + Symbol_new("{default}"); + lem.nsymbol = Symbol_count(); + lem.symbols = Symbol_arrayof(); + for(i=0; iindex = i; + qsort(lem.symbols,lem.nsymbol,sizeof(struct symbol*), Symbolcmpp); + for(i=0; iindex = i; + while( lem.symbols[i-1]->type==MULTITERMINAL ){ i--; } + assert( strcmp(lem.symbols[i-1]->name,"{default}")==0 ); + lem.nsymbol = i - 1; + for(i=1; ISUPPER(lem.symbols[i]->name[0]); i++); + lem.nterminal = i; + + /* Assign sequential rule numbers. Start with 0. Put rules that have no + ** reduce action C-code associated with them last, so that the switch() + ** statement that selects reduction actions will have a smaller jump table. + */ + for(i=0, rp=lem.rule; rp; rp=rp->next){ + rp->iRule = rp->code ? i++ : -1; + } + for(rp=lem.rule; rp; rp=rp->next){ + if( rp->iRule<0 ) rp->iRule = i++; + } + lem.startRule = lem.rule; + lem.rule = Rule_sort(lem.rule); + + /* Generate a reprint of the grammar, if requested on the command line */ + if( rpflag ){ + Reprint(&lem); + }else{ + /* Initialize the size for all follow and first sets */ + SetSize(lem.nterminal+1); + + /* Find the precedence for every production rule (that has one) */ + FindRulePrecedences(&lem); + + /* Compute the lambda-nonterminals and the first-sets for every + ** nonterminal */ + FindFirstSets(&lem); + + /* Compute all LR(0) states. Also record follow-set propagation + ** links so that the follow-set can be computed later */ + lem.nstate = 0; + FindStates(&lem); + lem.sorted = State_arrayof(); + + /* Tie up loose ends on the propagation links */ + FindLinks(&lem); + + /* Compute the follow set of every reducible configuration */ + FindFollowSets(&lem); + + /* Compute the action tables */ + FindActions(&lem); + + /* Compress the action tables */ + if( compress==0 ) CompressTables(&lem); + + /* Reorder and renumber the states so that states with fewer choices + ** occur at the end. This is an optimization that helps make the + ** generated parser tables smaller. */ + if( noResort==0 ) ResortStates(&lem); + + /* Generate a report of the parser generated. (the "y.output" file) */ + if( !quiet ) ReportOutput(&lem); + + /* Generate the source code for the parser */ + ReportTable(&lem, mhflag); + + /* Produce a header file for use by the scanner. (This step is + ** omitted if the "-m" option is used because makeheaders will + ** generate the file for us.) */ + if( !mhflag ) ReportHeader(&lem); + } + if( statistics ){ + printf("Parser statistics:\n"); + stats_line("terminal symbols", lem.nterminal); + stats_line("non-terminal symbols", lem.nsymbol - lem.nterminal); + stats_line("total symbols", lem.nsymbol); + stats_line("rules", lem.nrule); + stats_line("states", lem.nxstate); + stats_line("conflicts", lem.nconflict); + stats_line("action table entries", lem.nactiontab); + stats_line("total table size (bytes)", lem.tablesize); + } + if( lem.nconflict > 0 ){ + fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); + } + + /* return 0 on success, 1 on failure. */ + exitcode = ((lem.errorcnt > 0) || (lem.nconflict > 0)) ? 1 : 0; + exit(exitcode); + return (exitcode); +} +/******************** From the file "msort.c" *******************************/ +/* +** A generic merge-sort program. +** +** USAGE: +** Let "ptr" be a pointer to some structure which is at the head of +** a null-terminated list. Then to sort the list call: +** +** ptr = msort(ptr,&(ptr->next),cmpfnc); +** +** In the above, "cmpfnc" is a pointer to a function which compares +** two instances of the structure and returns an integer, as in +** strcmp. The second argument is a pointer to the pointer to the +** second element of the linked list. This address is used to compute +** the offset to the "next" field within the structure. The offset to +** the "next" field must be constant for all structures in the list. +** +** The function returns a new pointer which is the head of the list +** after sorting. +** +** ALGORITHM: +** Merge-sort. +*/ + +/* +** Return a pointer to the next structure in the linked list. +*/ +#define NEXT(A) (*(char**)(((char*)A)+offset)) + +/* +** Inputs: +** a: A sorted, null-terminated linked list. (May be null). +** b: A sorted, null-terminated linked list. (May be null). +** cmp: A pointer to the comparison function. +** offset: Offset in the structure to the "next" field. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** of both a and b. +** +** Side effects: +** The "next" pointers for elements in the lists a and b are +** changed. +*/ +static char *merge( + char *a, + char *b, + int (*cmp)(const char*,const char*), + int offset +){ + char *ptr, *head; + + if( a==0 ){ + head = b; + }else if( b==0 ){ + head = a; + }else{ + if( (*cmp)(a,b)<=0 ){ + ptr = a; + a = NEXT(a); + }else{ + ptr = b; + b = NEXT(b); + } + head = ptr; + while( a && b ){ + if( (*cmp)(a,b)<=0 ){ + NEXT(ptr) = a; + ptr = a; + a = NEXT(a); + }else{ + NEXT(ptr) = b; + ptr = b; + b = NEXT(b); + } + } + if( a ) NEXT(ptr) = a; + else NEXT(ptr) = b; + } + return head; +} + +/* +** Inputs: +** list: Pointer to a singly-linked list of structures. +** next: Pointer to pointer to the second element of the list. +** cmp: A comparison function. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** orginally in list. +** +** Side effects: +** The "next" pointers for elements in list are changed. +*/ +#define LISTSIZE 30 +static char *msort( + char *list, + char **next, + int (*cmp)(const char*,const char*) +){ + unsigned long offset; + char *ep; + char *set[LISTSIZE]; + int i; + offset = (unsigned long)((char*)next - (char*)list); + for(i=0; istate = WAITING_FOR_DECL_KEYWORD; + }else if( ISLOWER(x[0]) ){ + psp->lhs = Symbol_new(x); + psp->nrhs = 0; + psp->lhsalias = 0; + psp->state = WAITING_FOR_ARROW; + }else if( x[0]=='{' ){ + if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"There is no prior rule upon which to attach the code \ +fragment which begins on this line."); + psp->errorcnt++; + }else if( psp->prevrule->code!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Code fragment beginning on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->line = psp->tokenlineno; + psp->prevrule->code = &x[1]; + psp->prevrule->noCode = 0; + } + }else if( x[0]=='[' ){ + psp->state = PRECEDENCE_MARK_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Token \"%s\" should be either \"%%\" or a nonterminal name.", + x); + psp->errorcnt++; + } + break; + case PRECEDENCE_MARK_1: + if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "The precedence symbol must be a terminal."); + psp->errorcnt++; + }else if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "There is no prior rule to assign precedence \"[%s]\".",x); + psp->errorcnt++; + }else if( psp->prevrule->precsym!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Precedence mark on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->precsym = Symbol_new(x); + } + psp->state = PRECEDENCE_MARK_2; + break; + case PRECEDENCE_MARK_2: + if( x[0]!=']' ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"]\" on precedence mark."); + psp->errorcnt++; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + break; + case WAITING_FOR_ARROW: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else if( x[0]=='(' ){ + psp->state = LHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Expected to see a \":\" following the LHS symbol \"%s\".", + psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->lhsalias = x; + psp->state = LHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the LHS \"%s\"\n", + x,psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = LHS_ALIAS_3; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_3: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"->\" following: \"%s(%s)\".", + psp->lhs->name,psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case IN_RHS: + if( x[0]=='.' ){ + struct rule *rp; + rp = (struct rule *)calloc( sizeof(struct rule) + + sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); + if( rp==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't allocate enough memory for this rule."); + psp->errorcnt++; + psp->prevrule = 0; + }else{ + int i; + rp->ruleline = psp->tokenlineno; + rp->rhs = (struct symbol**)&rp[1]; + rp->rhsalias = (const char**)&(rp->rhs[psp->nrhs]); + for(i=0; inrhs; i++){ + rp->rhs[i] = psp->rhs[i]; + rp->rhsalias[i] = psp->alias[i]; + } + rp->lhs = psp->lhs; + rp->lhsalias = psp->lhsalias; + rp->nrhs = psp->nrhs; + rp->code = 0; + rp->noCode = 1; + rp->precsym = 0; + rp->index = psp->gp->nrule++; + rp->nextlhs = rp->lhs->rule; + rp->lhs->rule = rp; + rp->next = 0; + if( psp->firstrule==0 ){ + psp->firstrule = psp->lastrule = rp; + }else{ + psp->lastrule->next = rp; + psp->lastrule = rp; + } + psp->prevrule = rp; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISALPHA(x[0]) ){ + if( psp->nrhs>=MAXRHS ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Too many symbols on RHS of rule beginning at \"%s\".", + x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + }else{ + psp->rhs[psp->nrhs] = Symbol_new(x); + psp->alias[psp->nrhs] = 0; + psp->nrhs++; + } + }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ + struct symbol *msp = psp->rhs[psp->nrhs-1]; + if( msp->type!=MULTITERMINAL ){ + struct symbol *origsp = msp; + msp = (struct symbol *) calloc(1,sizeof(*msp)); + memset(msp, 0, sizeof(*msp)); + msp->type = MULTITERMINAL; + msp->nsubsym = 1; + msp->subsym = (struct symbol **) calloc(1,sizeof(struct symbol*)); + msp->subsym[0] = origsp; + msp->name = origsp->name; + psp->rhs[psp->nrhs-1] = msp; + } + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); + if( ISLOWER(x[1]) || ISLOWER(msp->subsym[0]->name[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Cannot form a compound containing a non-terminal"); + psp->errorcnt++; + } + }else if( x[0]=='(' && psp->nrhs>0 ){ + psp->state = RHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal character on RHS of rule: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_1: + if( ISALPHA(x[0]) ){ + psp->alias[psp->nrhs-1] = x; + psp->state = RHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", + x,psp->rhs[psp->nrhs-1]->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case WAITING_FOR_DECL_KEYWORD: + if( ISALPHA(x[0]) ){ + psp->declkeyword = x; + psp->declargslot = 0; + psp->decllinenoslot = 0; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + if( strcmp(x,"name")==0 ){ + psp->declargslot = &(psp->gp->name); + psp->insertLineMacro = 0; + }else if( strcmp(x,"include")==0 ){ + psp->declargslot = &(psp->gp->include); + }else if( strcmp(x,"code")==0 ){ + psp->declargslot = &(psp->gp->extracode); + }else if( strcmp(x,"token_destructor")==0 ){ + psp->declargslot = &psp->gp->tokendest; + }else if( strcmp(x,"default_destructor")==0 ){ + psp->declargslot = &psp->gp->vardest; + }else if( strcmp(x,"token_prefix")==0 ){ + psp->declargslot = &psp->gp->tokenprefix; + psp->insertLineMacro = 0; + }else if( strcmp(x,"syntax_error")==0 ){ + psp->declargslot = &(psp->gp->error); + }else if( strcmp(x,"parse_accept")==0 ){ + psp->declargslot = &(psp->gp->accept); + }else if( strcmp(x,"parse_failure")==0 ){ + psp->declargslot = &(psp->gp->failure); + }else if( strcmp(x,"stack_overflow")==0 ){ + psp->declargslot = &(psp->gp->overflow); + }else if( strcmp(x,"extra_argument")==0 ){ + psp->declargslot = &(psp->gp->arg); + psp->insertLineMacro = 0; + }else if( strcmp(x,"token_type")==0 ){ + psp->declargslot = &(psp->gp->tokentype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"default_type")==0 ){ + psp->declargslot = &(psp->gp->vartype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"stack_size")==0 ){ + psp->declargslot = &(psp->gp->stacksize); + psp->insertLineMacro = 0; + }else if( strcmp(x,"start_symbol")==0 ){ + psp->declargslot = &(psp->gp->start); + psp->insertLineMacro = 0; + }else if( strcmp(x,"left")==0 ){ + psp->preccounter++; + psp->declassoc = LEFT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"right")==0 ){ + psp->preccounter++; + psp->declassoc = RIGHT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"nonassoc")==0 ){ + psp->preccounter++; + psp->declassoc = NONE; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"destructor")==0 ){ + psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; + }else if( strcmp(x,"type")==0 ){ + psp->state = WAITING_FOR_DATATYPE_SYMBOL; + }else if( strcmp(x,"fallback")==0 ){ + psp->fallback = 0; + psp->state = WAITING_FOR_FALLBACK_ID; + }else if( strcmp(x,"wildcard")==0 ){ + psp->state = WAITING_FOR_WILDCARD_ID; + }else if( strcmp(x,"token_class")==0 ){ + psp->state = WAITING_FOR_CLASS_ID; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Unknown declaration keyword: \"%%%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal declaration keyword: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_DESTRUCTOR_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%destructor keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_new(x); + psp->declargslot = &sp->destructor; + psp->decllinenoslot = &sp->destLineno; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + } + break; + case WAITING_FOR_DATATYPE_SYMBOL: + if( !ISALPHA(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%type keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_find(x); + if((sp) && (sp->datatype)){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol %%type \"%s\" already defined", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + if (!sp){ + sp = Symbol_new(x); + } + psp->declargslot = &sp->datatype; + psp->insertLineMacro = 0; + psp->state = WAITING_FOR_DECL_ARG; + } + } + break; + case WAITING_FOR_PRECEDENCE_SYMBOL: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) ){ + struct symbol *sp; + sp = Symbol_new(x); + if( sp->prec>=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol \"%s\" has already be given a precedence.",x); + psp->errorcnt++; + }else{ + sp->prec = psp->preccounter; + sp->assoc = psp->declassoc; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't assign a precedence to \"%s\".",x); + psp->errorcnt++; + } + break; + case WAITING_FOR_DECL_ARG: + if( x[0]=='{' || x[0]=='\"' || ISALNUM(x[0]) ){ + const char *zOld, *zNew; + char *zBuf, *z; + int nOld, n, nLine = 0, nNew, nBack; + int addLineMacro; + char zLine[50]; + zNew = x; + if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; + nNew = lemonStrlen(zNew); + if( *psp->declargslot ){ + zOld = *psp->declargslot; + }else{ + zOld = ""; + } + nOld = lemonStrlen(zOld); + n = nOld + nNew + 20; + addLineMacro = !psp->gp->nolinenosflag && psp->insertLineMacro && + (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); + if( addLineMacro ){ + for(z=psp->filename, nBack=0; *z; z++){ + if( *z=='\\' ) nBack++; + } + lemon_sprintf(zLine, "#line %d ", psp->tokenlineno); + nLine = lemonStrlen(zLine); + n += nLine + lemonStrlen(psp->filename) + nBack; + } + *psp->declargslot = (char *) realloc(*psp->declargslot, n); + zBuf = *psp->declargslot + nOld; + if( addLineMacro ){ + if( nOld && zBuf[-1]!='\n' ){ + *(zBuf++) = '\n'; + } + memcpy(zBuf, zLine, nLine); + zBuf += nLine; + *(zBuf++) = '"'; + for(z=psp->filename; *z; z++){ + if( *z=='\\' ){ + *(zBuf++) = '\\'; + } + *(zBuf++) = *z; + } + *(zBuf++) = '"'; + *(zBuf++) = '\n'; + } + if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ + psp->decllinenoslot[0] = psp->tokenlineno; + } + memcpy(zBuf, zNew, nNew); + zBuf += nNew; + *zBuf = 0; + psp->state = WAITING_FOR_DECL_OR_RULE; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal argument to %%%s: %s",psp->declkeyword,x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_FALLBACK_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%fallback argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->fallback==0 ){ + psp->fallback = sp; + }else if( sp->fallback ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "More than one fallback assigned to token %s", x); + psp->errorcnt++; + }else{ + sp->fallback = psp->fallback; + psp->gp->has_fallback = 1; + } + } + break; + case WAITING_FOR_WILDCARD_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !ISUPPER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%wildcard argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->gp->wildcard==0 ){ + psp->gp->wildcard = sp; + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "Extra wildcard to token: %s", x); + psp->errorcnt++; + } + } + break; + case WAITING_FOR_CLASS_ID: + if( !ISLOWER(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class must be followed by an identifier: ", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else if( Symbol_find(x) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "Symbol \"%s\" already used", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + psp->tkclass = Symbol_new(x); + psp->tkclass->type = MULTITERMINAL; + psp->state = WAITING_FOR_CLASS_TOKEN; + } + break; + case WAITING_FOR_CLASS_TOKEN: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( ISUPPER(x[0]) || ((x[0]=='|' || x[0]=='/') && ISUPPER(x[1])) ){ + struct symbol *msp = psp->tkclass; + msp->nsubsym++; + msp->subsym = (struct symbol **) realloc(msp->subsym, + sizeof(struct symbol*)*msp->nsubsym); + if( !ISUPPER(x[0]) ) x++; + msp->subsym[msp->nsubsym-1] = Symbol_new(x); + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token_class argument \"%s\" should be a token", x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case RESYNC_AFTER_RULE_ERROR: +/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; +** break; */ + case RESYNC_AFTER_DECL_ERROR: + if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; + if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; + break; + } +} + +/* Run the preprocessor over the input file text. The global variables +** azDefine[0] through azDefine[nDefine-1] contains the names of all defined +** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and +** comments them out. Text in between is also commented out as appropriate. +*/ +static void preprocess_input(char *z){ + int i, j, k, n; + int exclude = 0; + int start = 0; + int lineno = 1; + int start_lineno = 1; + for(i=0; z[i]; i++){ + if( z[i]=='\n' ) lineno++; + if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; + if( strncmp(&z[i],"%endif",6)==0 && ISSPACE(z[i+6]) ){ + if( exclude ){ + exclude--; + if( exclude==0 ){ + for(j=start; jfilename; + ps.errorcnt = 0; + ps.state = INITIALIZE; + + /* Begin by reading the input file */ + fp = fopen(ps.filename,"rb"); + if( fp==0 ){ + ErrorMsg(ps.filename,0,"Can't open this file for reading."); + gp->errorcnt++; + return; + } + fseek(fp,0,2); + filesize = ftell(fp); + rewind(fp); + filebuf = (char *)malloc( filesize+1 ); + if( filesize>100000000 || filebuf==0 ){ + ErrorMsg(ps.filename,0,"Input file too large."); + gp->errorcnt++; + fclose(fp); + return; + } + if( fread(filebuf,1,filesize,fp)!=filesize ){ + ErrorMsg(ps.filename,0,"Can't read in all %d bytes of this file.", + filesize); + free(filebuf); + gp->errorcnt++; + fclose(fp); + return; + } + fclose(fp); + filebuf[filesize] = 0; + + /* Make an initial pass through the file to handle %ifdef and %ifndef */ + preprocess_input(filebuf); + + /* Now scan the text of the input file */ + lineno = 1; + for(cp=filebuf; (c= *cp)!=0; ){ + if( c=='\n' ) lineno++; /* Keep track of the line number */ + if( ISSPACE(c) ){ cp++; continue; } /* Skip all white space */ + if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ + cp+=2; + while( (c= *cp)!=0 && c!='\n' ) cp++; + continue; + } + if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ + cp+=2; + while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c ) cp++; + continue; + } + ps.tokenstart = cp; /* Mark the beginning of the token */ + ps.tokenlineno = lineno; /* Linenumber on which token begins */ + if( c=='\"' ){ /* String literals */ + cp++; + while( (c= *cp)!=0 && c!='\"' ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c==0 ){ + ErrorMsg(ps.filename,startline, +"String starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( c=='{' ){ /* A block of C code */ + int level; + cp++; + for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ + if( c=='\n' ) lineno++; + else if( c=='{' ) level++; + else if( c=='}' ) level--; + else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ + int prevc; + cp = &cp[2]; + prevc = 0; + while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ + if( c=='\n' ) lineno++; + prevc = c; + cp++; + } + }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ + cp = &cp[2]; + while( (c= *cp)!=0 && c!='\n' ) cp++; + if( c ) lineno++; + }else if( c=='\'' || c=='\"' ){ /* String a character literals */ + int startchar, prevc; + startchar = c; + prevc = 0; + for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ + if( c=='\n' ) lineno++; + if( prevc=='\\' ) prevc = 0; + else prevc = c; + } + } + } + if( c==0 ){ + ErrorMsg(ps.filename,ps.tokenlineno, +"C code starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( ISALNUM(c) ){ /* Identifiers */ + while( (c= *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ + cp += 3; + nextcp = cp; + }else if( (c=='/' || c=='|') && ISALPHA(cp[1]) ){ + cp += 2; + while( (c = *cp)!=0 && (ISALNUM(c) || c=='_') ) cp++; + nextcp = cp; + }else{ /* All other (one character) operators */ + cp++; + nextcp = cp; + } + c = *cp; + *cp = 0; /* Null terminate the token */ + parseonetoken(&ps); /* Parse the token */ + *cp = (char)c; /* Restore the buffer */ + cp = nextcp; + } + free(filebuf); /* Release the buffer after parsing */ + gp->rule = ps.firstrule; + gp->errorcnt = ps.errorcnt; +} +/*************************** From the file "plink.c" *********************/ +/* +** Routines processing configuration follow-set propagation links +** in the LEMON parser generator. +*/ +static struct plink *plink_freelist = 0; + +/* Allocate a new plink */ +struct plink *Plink_new(){ + struct plink *newlink; + + if( plink_freelist==0 ){ + int i; + int amt = 100; + plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); + if( plink_freelist==0 ){ + fprintf(stderr, + "Unable to allocate memory for a new follow-set propagation link.\n"); + exit(1); + } + for(i=0; inext; + return newlink; +} + +/* Add a plink to a plink list */ +void Plink_add(struct plink **plpp, struct config *cfp) +{ + struct plink *newlink; + newlink = Plink_new(); + newlink->next = *plpp; + *plpp = newlink; + newlink->cfp = cfp; +} + +/* Transfer every plink on the list "from" to the list "to" */ +void Plink_copy(struct plink **to, struct plink *from) +{ + struct plink *nextpl; + while( from ){ + nextpl = from->next; + from->next = *to; + *to = from; + from = nextpl; + } +} + +/* Delete every plink on the list */ +void Plink_delete(struct plink *plp) +{ + struct plink *nextpl; + + while( plp ){ + nextpl = plp->next; + plp->next = plink_freelist; + plink_freelist = plp; + plp = nextpl; + } +} +/*********************** From the file "report.c" **************************/ +/* +** Procedures for generating reports and tables in the LEMON parser generator. +*/ + +/* Generate a filename with the given suffix. Space to hold the +** name comes from malloc() and must be freed by the calling +** function. +*/ +PRIVATE char *file_makename(struct lemon *lemp, const char *suffix) +{ + char *name; + char *cp; + + name = (char*)malloc( lemonStrlen(lemp->filename) + lemonStrlen(suffix) + 5 ); + if( name==0 ){ + fprintf(stderr,"Can't allocate space for a filename.\n"); + exit(1); + } + lemon_strcpy(name,lemp->filename); + cp = strrchr(name,'.'); + if( cp ) *cp = 0; + lemon_strcat(name,suffix); + return name; +} + +/* Open a file with a name based on the name of the input file, +** but with a different (specified) suffix, and return a pointer +** to the stream */ +PRIVATE FILE *file_open( + struct lemon *lemp, + const char *suffix, + const char *mode +){ + FILE *fp; + + if( lemp->outname ) free(lemp->outname); + lemp->outname = file_makename(lemp, suffix); + fp = fopen(lemp->outname,mode); + if( fp==0 && *mode=='w' ){ + fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); + lemp->errorcnt++; + return 0; + } + return fp; +} + +/* Duplicate the input file without comments and without actions +** on rules */ +void Reprint(struct lemon *lemp) +{ + struct rule *rp; + struct symbol *sp; + int i, j, maxlen, len, ncolumns, skip; + printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); + maxlen = 10; + for(i=0; insymbol; i++){ + sp = lemp->symbols[i]; + len = lemonStrlen(sp->name); + if( len>maxlen ) maxlen = len; + } + ncolumns = 76/(maxlen+5); + if( ncolumns<1 ) ncolumns = 1; + skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; + for(i=0; insymbol; j+=skip){ + sp = lemp->symbols[j]; + assert( sp->index==j ); + printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); + } + printf("\n"); + } + for(rp=lemp->rule; rp; rp=rp->next){ + printf("%s",rp->lhs->name); + /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ + printf(" ::="); + for(i=0; inrhs; i++){ + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + printf(" %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + printf("|%s", sp->subsym[j]->name); + } + }else{ + printf(" %s", sp->name); + } + /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ + } + printf("."); + if( rp->precsym ) printf(" [%s]",rp->precsym->name); + /* if( rp->code ) printf("\n %s",rp->code); */ + printf("\n"); + } +} + +/* Print a single rule. +*/ +void RulePrint(FILE *fp, struct rule *rp, int iCursor){ + struct symbol *sp; + int i, j; + fprintf(fp,"%s ::=",rp->lhs->name); + for(i=0; i<=rp->nrhs; i++){ + if( i==iCursor ) fprintf(fp," *"); + if( i==rp->nrhs ) break; + sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + fprintf(fp," %s", sp->subsym[0]->name); + for(j=1; jnsubsym; j++){ + fprintf(fp,"|%s",sp->subsym[j]->name); + } + }else{ + fprintf(fp," %s", sp->name); + } + } +} + +/* Print the rule for a configuration. +*/ +void ConfigPrint(FILE *fp, struct config *cfp){ + RulePrint(fp, cfp->rp, cfp->dot); +} + +/* #define TEST */ +#if 0 +/* Print a set */ +PRIVATE void SetPrint(out,set,lemp) +FILE *out; +char *set; +struct lemon *lemp; +{ + int i; + char *spacer; + spacer = ""; + fprintf(out,"%12s[",""); + for(i=0; interminal; i++){ + if( SetFind(set,i) ){ + fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); + spacer = " "; + } + } + fprintf(out,"]\n"); +} + +/* Print a plink chain */ +PRIVATE void PlinkPrint(out,plp,tag) +FILE *out; +struct plink *plp; +char *tag; +{ + while( plp ){ + fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); + ConfigPrint(out,plp->cfp); + fprintf(out,"\n"); + plp = plp->next; + } +} +#endif + +/* Print an action to the given file descriptor. Return FALSE if +** nothing was actually printed. +*/ +int PrintAction( + struct action *ap, /* The action to print */ + FILE *fp, /* Print the action here */ + int indent /* Indent by this amount */ +){ + int result = 1; + switch( ap->type ){ + case SHIFT: { + struct state *stp = ap->x.stp; + fprintf(fp,"%*s shift %-7d",indent,ap->sp->name,stp->statenum); + break; + } + case REDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case SHIFTREDUCE: { + struct rule *rp = ap->x.rp; + fprintf(fp,"%*s shift-reduce %-7d",indent,ap->sp->name,rp->iRule); + RulePrint(fp, rp, -1); + break; + } + case ACCEPT: + fprintf(fp,"%*s accept",indent,ap->sp->name); + break; + case ERROR: + fprintf(fp,"%*s error",indent,ap->sp->name); + break; + case SRCONFLICT: + case RRCONFLICT: + fprintf(fp,"%*s reduce %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.rp->iRule); + break; + case SSCONFLICT: + fprintf(fp,"%*s shift %-7d ** Parsing conflict **", + indent,ap->sp->name,ap->x.stp->statenum); + break; + case SH_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s shift %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.stp->statenum); + }else{ + result = 0; + } + break; + case RD_RESOLVED: + if( showPrecedenceConflict ){ + fprintf(fp,"%*s reduce %-7d -- dropped by precedence", + indent,ap->sp->name,ap->x.rp->iRule); + }else{ + result = 0; + } + break; + case NOT_USED: + result = 0; + break; + } + if( result && ap->spOpt ){ + fprintf(fp," /* because %s==%s */", ap->sp->name, ap->spOpt->name); + } + return result; +} + +/* Generate the "*.out" log file */ +void ReportOutput(struct lemon *lemp) +{ + int i; + struct state *stp; + struct config *cfp; + struct action *ap; + FILE *fp; + + fp = file_open(lemp,".out","wb"); + if( fp==0 ) return; + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + fprintf(fp,"State %d:\n",stp->statenum); + if( lemp->basisflag ) cfp=stp->bp; + else cfp=stp->cfp; + while( cfp ){ + char buf[20]; + if( cfp->dot==cfp->rp->nrhs ){ + lemon_sprintf(buf,"(%d)",cfp->rp->iRule); + fprintf(fp," %5s ",buf); + }else{ + fprintf(fp," "); + } + ConfigPrint(fp,cfp); + fprintf(fp,"\n"); +#if 0 + SetPrint(fp,cfp->fws,lemp); + PlinkPrint(fp,cfp->fplp,"To "); + PlinkPrint(fp,cfp->bplp,"From"); +#endif + if( lemp->basisflag ) cfp=cfp->bp; + else cfp=cfp->next; + } + fprintf(fp,"\n"); + for(ap=stp->ap; ap; ap=ap->next){ + if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); + } + fprintf(fp,"\n"); + } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Symbols:\n"); + for(i=0; insymbol; i++){ + int j; + struct symbol *sp; + + sp = lemp->symbols[i]; + fprintf(fp, " %3d: %s", i, sp->name); + if( sp->type==NONTERMINAL ){ + fprintf(fp, ":"); + if( sp->lambda ){ + fprintf(fp, " "); + } + for(j=0; jnterminal; j++){ + if( sp->firstset && SetFind(sp->firstset, j) ){ + fprintf(fp, " %s", lemp->symbols[j]->name); + } + } + } + fprintf(fp, "\n"); + } + fclose(fp); + return; +} + +/* Search for the file "name" which is in the same directory as +** the exacutable */ +PRIVATE char *pathsearch(char *argv0, char *name, int modemask) +{ + const char *pathlist; + char *pathbufptr; + char *pathbuf; + char *path,*cp; + char c; + +#ifdef __WIN32__ + cp = strrchr(argv0,'\\'); +#else + cp = strrchr(argv0,'/'); +#endif + if( cp ){ + c = *cp; + *cp = 0; + path = (char *)malloc( lemonStrlen(argv0) + lemonStrlen(name) + 2 ); + if( path ) lemon_sprintf(path,"%s/%s",argv0,name); + *cp = c; + }else{ + pathlist = getenv("PATH"); + if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; + pathbuf = (char *) malloc( lemonStrlen(pathlist) + 1 ); + path = (char *)malloc( lemonStrlen(pathlist)+lemonStrlen(name)+2 ); + if( (pathbuf != 0) && (path!=0) ){ + pathbufptr = pathbuf; + lemon_strcpy(pathbuf, pathlist); + while( *pathbuf ){ + cp = strchr(pathbuf,':'); + if( cp==0 ) cp = &pathbuf[lemonStrlen(pathbuf)]; + c = *cp; + *cp = 0; + lemon_sprintf(path,"%s/%s",pathbuf,name); + *cp = c; + if( c==0 ) pathbuf[0] = 0; + else pathbuf = &cp[1]; + if( access(path,modemask)==0 ) break; + } + free(pathbufptr); + } + } + return path; +} + +/* Given an action, compute the integer value for that action +** which is to be put in the action table of the generated machine. +** Return negative if no action should be generated. +*/ +PRIVATE int compute_action(struct lemon *lemp, struct action *ap) +{ + int act; + switch( ap->type ){ + case SHIFT: act = ap->x.stp->statenum; break; + case SHIFTREDUCE: act = ap->x.rp->iRule + lemp->nstate; break; + case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break; + case ERROR: act = lemp->nstate + lemp->nrule*2; break; + case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break; + default: act = -1; break; + } + return act; +} + +#define LINESIZE 1000 +/* The next cluster of routines are for reading the template file +** and writing the results to the generated parser */ +/* The first function transfers data from "in" to "out" until +** a line is seen which begins with "%%". The line number is +** tracked. +** +** if name!=0, then any word that begin with "Parse" is changed to +** begin with *name instead. +*/ +PRIVATE void tplt_xfer(char *name, FILE *in, FILE *out, int *lineno) +{ + int i, iStart; + char line[LINESIZE]; + while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ + (*lineno)++; + iStart = 0; + if( name ){ + for(i=0; line[i]; i++){ + if( line[i]=='P' && strncmp(&line[i],"Parse",5)==0 + && (i==0 || !ISALPHA(line[i-1])) + ){ + if( i>iStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); + fprintf(out,"%s",name); + i += 4; + iStart = i+1; + } + } + } + fprintf(out,"%s",&line[iStart]); + } +} + +/* The next function finds the template file and opens it, returning +** a pointer to the opened file. */ +PRIVATE FILE *tplt_open(struct lemon *lemp) +{ + static char templatename[] = "lempar.c"; + char buf[1000]; + FILE *in; + char *tpltname; + char *cp; + + /* first, see if user specified a template filename on the command line. */ + if (user_templatename != 0) { + if( access(user_templatename,004)==-1 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(user_templatename,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n", + user_templatename); + lemp->errorcnt++; + return 0; + } + return in; + } + + cp = strrchr(lemp->filename,'.'); + if( cp ){ + lemon_sprintf(buf,"%.*s.lt",(int)(cp-lemp->filename),lemp->filename); + }else{ + lemon_sprintf(buf,"%s.lt",lemp->filename); + } + if( access(buf,004)==0 ){ + tpltname = buf; + }else if( access(templatename,004)==0 ){ + tpltname = templatename; + }else{ + tpltname = pathsearch(lemp->argv0,templatename,0); + } + if( tpltname==0 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + templatename); + lemp->errorcnt++; + return 0; + } + in = fopen(tpltname,"rb"); + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); + lemp->errorcnt++; + return 0; + } + return in; +} + +/* Print a #line directive line to the output file. */ +PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) +{ + fprintf(out,"#line %d \"",lineno); + while( *filename ){ + if( *filename == '\\' ) putc('\\',out); + putc(*filename,out); + filename++; + } + fprintf(out,"\"\n"); +} + +/* Print a string to the file and keep the linenumber up to date */ +PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, int *lineno) +{ + if( str==0 ) return; + while( *str ){ + putc(*str,out); + if( *str=='\n' ) (*lineno)++; + str++; + } + if( str[-1]!='\n' ){ + putc('\n',out); + (*lineno)++; + } + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + return; +} + +/* +** The following routine emits code for the destructor for the +** symbol sp +*/ +void emit_destructor_code( + FILE *out, + struct symbol *sp, + struct lemon *lemp, + int *lineno +){ + char *cp = 0; + + if( sp->type==TERMINAL ){ + cp = lemp->tokendest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else if( sp->destructor ){ + cp = sp->destructor; + fprintf(out,"{\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,sp->destLineno,lemp->filename); + } + }else if( lemp->vardest ){ + cp = lemp->vardest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else{ + assert( 0 ); /* Cannot happen */ + } + for(; *cp; cp++){ + if( *cp=='$' && cp[1]=='$' ){ + fprintf(out,"(yypminor->yy%d)",sp->dtnum); + cp++; + continue; + } + if( *cp=='\n' ) (*lineno)++; + fputc(*cp,out); + } + fprintf(out,"\n"); (*lineno)++; + if (!lemp->nolinenosflag) { + (*lineno)++; tplt_linedir(out,*lineno,lemp->outname); + } + fprintf(out,"}\n"); (*lineno)++; + return; +} + +/* +** Return TRUE (non-zero) if the given symbol has a destructor. +*/ +int has_destructor(struct symbol *sp, struct lemon *lemp) +{ + int ret; + if( sp->type==TERMINAL ){ + ret = lemp->tokendest!=0; + }else{ + ret = lemp->vardest!=0 || sp->destructor!=0; + } + return ret; +} + +/* +** Append text to a dynamically allocated string. If zText is 0 then +** reset the string to be empty again. Always return the complete text +** of the string (which is overwritten with each call). +** +** n bytes of zText are stored. If n==0 then all of zText up to the first +** \000 terminator is stored. zText can contain up to two instances of +** %d. The values of p1 and p2 are written into the first and second +** %d. +** +** If n==-1, then the previous character is overwritten. +*/ +PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ + static char empty[1] = { 0 }; + static char *z = 0; + static int alloced = 0; + static int used = 0; + int c; + char zInt[40]; + if( zText==0 ){ + if( used==0 && z!=0 ) z[0] = 0; + used = 0; + return z; + } + if( n<=0 ){ + if( n<0 ){ + used += n; + assert( used>=0 ); + } + n = lemonStrlen(zText); + } + if( (int) (n+sizeof(zInt)*2+used) >= alloced ){ + alloced = n + sizeof(zInt)*2 + used + 200; + z = (char *) realloc(z, alloced); + } + if( z==0 ) return empty; + while( n-- > 0 ){ + c = *(zText++); + if( c=='%' && n>0 && zText[0]=='d' ){ + lemon_sprintf(zInt, "%d", p1); + p1 = p2; + lemon_strcpy(&z[used], zInt); + used += lemonStrlen(&z[used]); + zText++; + n--; + }else{ + z[used++] = (char)c; + } + } + z[used] = 0; + return z; +} + +/* +** Write and transform the rp->code string so that symbols are expanded. +** Populate the rp->codePrefix and rp->codeSuffix strings, as appropriate. +** +** Return 1 if the expanded code requires that "yylhsminor" local variable +** to be defined. +*/ +PRIVATE int translate_code(struct lemon *lemp, struct rule *rp){ + char *cp, *xp; + int i; + int rc = 0; /* True if yylhsminor is used */ + int dontUseRhs0 = 0; /* If true, use of left-most RHS label is illegal */ + const char *zSkip = 0; /* The zOvwrt comment within rp->code, or NULL */ + char lhsused = 0; /* True if the LHS element has been used */ + char lhsdirect; /* True if LHS writes directly into stack */ + char used[MAXRHS]; /* True for each RHS element which is used */ + char zLhs[50]; /* Convert the LHS symbol into this string */ + char zOvwrt[900]; /* Comment that to allow LHS to overwrite RHS */ + + for(i=0; inrhs; i++) used[i] = 0; + lhsused = 0; + + if( rp->code==0 ){ + static char newlinestr[2] = { '\n', '\0' }; + rp->code = newlinestr; + rp->line = rp->ruleline; + rp->noCode = 1; + }else{ + rp->noCode = 0; + } + + + if( rp->nrhs==0 ){ + /* If there are no RHS symbols, then writing directly to the LHS is ok */ + lhsdirect = 1; + }else if( rp->rhsalias[0]==0 ){ + /* The left-most RHS symbol has no value. LHS direct is ok. But + ** we have to call the distructor on the RHS symbol first. */ + lhsdirect = 1; + if( has_destructor(rp->rhs[0],lemp) ){ + append_str(0,0,0,0); + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[0]->index,1-rp->nrhs); + rp->codePrefix = Strsafe(append_str(0,0,0,0)); + rp->noCode = 0; + } + }else if( rp->lhsalias==0 ){ + /* There is no LHS value symbol. */ + lhsdirect = 1; + }else if( strcmp(rp->lhsalias,rp->rhsalias[0])==0 ){ + /* The LHS symbol and the left-most RHS symbol are the same, so + ** direct writing is allowed */ + lhsdirect = 1; + lhsused = 1; + used[0] = 1; + if( rp->lhs->dtnum!=rp->rhs[0]->dtnum ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) and %s(%s) share the same label but have " + "different datatypes.", + rp->lhs->name, rp->lhsalias, rp->rhs[0]->name, rp->rhsalias[0]); + lemp->errorcnt++; + } + }else{ + lemon_sprintf(zOvwrt, "/*%s-overwrites-%s*/", + rp->lhsalias, rp->rhsalias[0]); + zSkip = strstr(rp->code, zOvwrt); + if( zSkip!=0 ){ + /* The code contains a special comment that indicates that it is safe + ** for the LHS label to overwrite left-most RHS label. */ + lhsdirect = 1; + }else{ + lhsdirect = 0; + } + } + if( lhsdirect ){ + sprintf(zLhs, "yymsp[%d].minor.yy%d",1-rp->nrhs,rp->lhs->dtnum); + }else{ + rc = 1; + sprintf(zLhs, "yylhsminor.yy%d",rp->lhs->dtnum); + } + + append_str(0,0,0,0); + + /* This const cast is wrong but harmless, if we're careful. */ + for(cp=(char *)rp->code; *cp; cp++){ + if( cp==zSkip ){ + append_str(zOvwrt,0,0,0); + cp += lemonStrlen(zOvwrt)-1; + dontUseRhs0 = 1; + continue; + } + if( ISALPHA(*cp) && (cp==rp->code || (!ISALNUM(cp[-1]) && cp[-1]!='_')) ){ + char saved; + for(xp= &cp[1]; ISALNUM(*xp) || *xp=='_'; xp++); + saved = *xp; + *xp = 0; + if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ + append_str(zLhs,0,0,0); + cp = xp; + lhsused = 1; + }else{ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ + if( i==0 && dontUseRhs0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used after '%s'.", + rp->rhsalias[0], zOvwrt); + lemp->errorcnt++; + }else if( cp!=rp->code && cp[-1]=='@' ){ + /* If the argument is of the form @X then substituted + ** the token number of X, not the value of X */ + append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); + }else{ + struct symbol *sp = rp->rhs[i]; + int dtnum; + if( sp->type==MULTITERMINAL ){ + dtnum = sp->subsym[0]->dtnum; + }else{ + dtnum = sp->dtnum; + } + append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); + } + cp = xp; + used[i] = 1; + break; + } + } + } + *xp = saved; + } + append_str(cp, 1, 0, 0); + } /* End loop */ + + /* Main code generation completed */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ) rp->code = Strsafe(cp); + append_str(0,0,0,0); + + /* Check to make sure the LHS has been used */ + if( rp->lhsalias && !lhsused ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label \"%s\" for \"%s(%s)\" is never used.", + rp->lhsalias,rp->lhs->name,rp->lhsalias); + lemp->errorcnt++; + } + + /* Generate destructor code for RHS minor values which are not referenced. + ** Generate error messages for unused labels and duplicate labels. + */ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] ){ + if( i>0 ){ + int j; + if( rp->lhsalias && strcmp(rp->lhsalias,rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "%s(%s) has the same label as the LHS but is not the left-most " + "symbol on the RHS.", + rp->rhs[i]->name, rp->rhsalias); + lemp->errorcnt++; + } + for(j=0; jrhsalias[j] && strcmp(rp->rhsalias[j],rp->rhsalias[i])==0 ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s used for multiple symbols on the RHS of a rule.", + rp->rhsalias[i]); + lemp->errorcnt++; + break; + } + } + } + if( !used[i] ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s for \"%s(%s)\" is never used.", + rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); + lemp->errorcnt++; + } + }else if( i>0 && has_destructor(rp->rhs[i],lemp) ){ + append_str(" yy_destructor(yypParser,%d,&yymsp[%d].minor);\n", 0, + rp->rhs[i]->index,i-rp->nrhs+1); + } + } + + /* If unable to write LHS values directly into the stack, write the + ** saved LHS value now. */ + if( lhsdirect==0 ){ + append_str(" yymsp[%d].minor.yy%d = ", 0, 1-rp->nrhs, rp->lhs->dtnum); + append_str(zLhs, 0, 0, 0); + append_str(";\n", 0, 0, 0); + } + + /* Suffix code generation complete */ + cp = append_str(0,0,0,0); + if( cp && cp[0] ){ + rp->codeSuffix = Strsafe(cp); + rp->noCode = 0; + } + + return rc; +} + +/* +** Generate code which executes when the rule "rp" is reduced. Write +** the code to "out". Make sure lineno stays up-to-date. +*/ +PRIVATE void emit_code( + FILE *out, + struct rule *rp, + struct lemon *lemp, + int *lineno +){ + const char *cp; + + /* Setup code prior to the #line directive */ + if( rp->codePrefix && rp->codePrefix[0] ){ + fprintf(out, "{%s", rp->codePrefix); + for(cp=rp->codePrefix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + /* Generate code to do the reduce action */ + if( rp->code ){ + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,rp->line,lemp->filename); + } + fprintf(out,"{%s",rp->code); + for(cp=rp->code; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + fprintf(out,"}\n"); (*lineno)++; + if( !lemp->nolinenosflag ){ + (*lineno)++; + tplt_linedir(out,*lineno,lemp->outname); + } + } + + /* Generate breakdown code that occurs after the #line directive */ + if( rp->codeSuffix && rp->codeSuffix[0] ){ + fprintf(out, "%s", rp->codeSuffix); + for(cp=rp->codeSuffix; *cp; cp++){ if( *cp=='\n' ) (*lineno)++; } + } + + if( rp->codePrefix ){ + fprintf(out, "}\n"); (*lineno)++; + } + + return; +} + +/* +** Print the definition of the union used for the parser's data stack. +** This union contains fields for every possible data type for tokens +** and nonterminals. In the process of computing and printing this +** union, also set the ".dtnum" field of every terminal and nonterminal +** symbol. +*/ +void print_stack_union( + FILE *out, /* The output stream */ + struct lemon *lemp, /* The main info structure for this parser */ + int *plineno, /* Pointer to the line number */ + int mhflag /* True if generating makeheaders output */ +){ + int lineno = *plineno; /* The line number of the output */ + char **types; /* A hash table of datatypes */ + int arraysize; /* Size of the "types" array */ + int maxdtlength; /* Maximum length of any ".datatype" field. */ + char *stddt; /* Standardized name for a datatype */ + int i,j; /* Loop counters */ + unsigned hash; /* For hashing the name of a type */ + const char *name; /* Name of the parser */ + + /* Allocate and initialize types[] and allocate stddt[] */ + arraysize = lemp->nsymbol * 2; + types = (char**)calloc( arraysize, sizeof(char*) ); + if( types==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + for(i=0; ivartype ){ + maxdtlength = lemonStrlen(lemp->vartype); + } + for(i=0; insymbol; i++){ + int len; + struct symbol *sp = lemp->symbols[i]; + if( sp->datatype==0 ) continue; + len = lemonStrlen(sp->datatype); + if( len>maxdtlength ) maxdtlength = len; + } + stddt = (char*)malloc( maxdtlength*2 + 1 ); + if( stddt==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + + /* Build a hash table of datatypes. The ".dtnum" field of each symbol + ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is + ** used for terminal symbols. If there is no %default_type defined then + ** 0 is also used as the .dtnum value for nonterminals which do not specify + ** a datatype using the %type directive. + */ + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + char *cp; + if( sp==lemp->errsym ){ + sp->dtnum = arraysize+1; + continue; + } + if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ + sp->dtnum = 0; + continue; + } + cp = sp->datatype; + if( cp==0 ) cp = lemp->vartype; + j = 0; + while( ISSPACE(*cp) ) cp++; + while( *cp ) stddt[j++] = *cp++; + while( j>0 && ISSPACE(stddt[j-1]) ) j--; + stddt[j] = 0; + if( lemp->tokentype && strcmp(stddt, lemp->tokentype)==0 ){ + sp->dtnum = 0; + continue; + } + hash = 0; + for(j=0; stddt[j]; j++){ + hash = hash*53 + stddt[j]; + } + hash = (hash & 0x7fffffff)%arraysize; + while( types[hash] ){ + if( strcmp(types[hash],stddt)==0 ){ + sp->dtnum = hash + 1; + break; + } + hash++; + if( hash>=(unsigned)arraysize ) hash = 0; + } + if( types[hash]==0 ){ + sp->dtnum = hash + 1; + types[hash] = (char*)malloc( lemonStrlen(stddt)+1 ); + if( types[hash]==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + lemon_strcpy(types[hash],stddt); + } + } + + /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ + name = lemp->name ? lemp->name : "Parse"; + lineno = *plineno; + if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } + fprintf(out,"#define %sTOKENTYPE %s\n",name, + lemp->tokentype?lemp->tokentype:"void*"); lineno++; + if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } + fprintf(out,"typedef union {\n"); lineno++; + fprintf(out," int yyinit;\n"); lineno++; + fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; + for(i=0; ierrsym->useCnt ){ + fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; + } + free(stddt); + free(types); + fprintf(out,"} YYMINORTYPE;\n"); lineno++; + *plineno = lineno; +} + +/* +** Return the name of a C datatype able to represent values between +** lwr and upr, inclusive. If pnByte!=NULL then also write the sizeof +** for that type (1, 2, or 4) into *pnByte. +*/ +static const char *minimum_size_type(int lwr, int upr, int *pnByte){ + const char *zType = "int"; + int nByte = 4; + if( lwr>=0 ){ + if( upr<=255 ){ + zType = "unsigned char"; + nByte = 1; + }else if( upr<65535 ){ + zType = "unsigned short int"; + nByte = 2; + }else{ + zType = "unsigned int"; + nByte = 4; + } + }else if( lwr>=-127 && upr<=127 ){ + zType = "signed char"; + nByte = 1; + }else if( lwr>=-32767 && upr<32767 ){ + zType = "short"; + nByte = 2; + } + if( pnByte ) *pnByte = nByte; + return zType; +} + +/* +** Each state contains a set of token transaction and a set of +** nonterminal transactions. Each of these sets makes an instance +** of the following structure. An array of these structures is used +** to order the creation of entries in the yy_action[] table. +*/ +struct axset { + struct state *stp; /* A pointer to a state */ + int isTkn; /* True to use tokens. False for non-terminals */ + int nAction; /* Number of actions */ + int iOrder; /* Original order of action sets */ +}; + +/* +** Compare to axset structures for sorting purposes +*/ +static int axset_compare(const void *a, const void *b){ + struct axset *p1 = (struct axset*)a; + struct axset *p2 = (struct axset*)b; + int c; + c = p2->nAction - p1->nAction; + if( c==0 ){ + c = p1->iOrder - p2->iOrder; + } + assert( c!=0 || p1==p2 ); + return c; +} + +/* +** Write text on "out" that describes the rule "rp". +*/ +static void writeRuleText(FILE *out, struct rule *rp){ + int j; + fprintf(out,"%s ::=", rp->lhs->name); + for(j=0; jnrhs; j++){ + struct symbol *sp = rp->rhs[j]; + if( sp->type!=MULTITERMINAL ){ + fprintf(out," %s", sp->name); + }else{ + int k; + fprintf(out," %s", sp->subsym[0]->name); + for(k=1; knsubsym; k++){ + fprintf(out,"|%s",sp->subsym[k]->name); + } + } + } +} + + +/* Generate C source code for the parser */ +void ReportTable( + struct lemon *lemp, + int mhflag /* Output in makeheaders format if true */ +){ + FILE *out, *in; + char line[LINESIZE]; + int lineno; + struct state *stp; + struct action *ap; + struct rule *rp; + struct acttab *pActtab; + int i, j, n, sz; + int szActionType; /* sizeof(YYACTIONTYPE) */ + int szCodeType; /* sizeof(YYCODETYPE) */ + const char *name; + int mnTknOfst, mxTknOfst; + int mnNtOfst, mxNtOfst; + struct axset *ax; + + in = tplt_open(lemp); + if( in==0 ) return; + out = file_open(lemp,".c","wb"); + if( out==0 ){ + fclose(in); + return; + } + lineno = 1; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the include code, if any */ + tplt_print(out,lemp,lemp->include,&lineno); + if( mhflag ){ + char *incName = file_makename(lemp, ".h"); + fprintf(out,"#include \"%s\"\n", incName); lineno++; + free(incName); + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate #defines for all tokens */ + if( mhflag ){ + const char *prefix; + fprintf(out,"#if INTERFACE\n"); lineno++; + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + lineno++; + } + fprintf(out,"#endif\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the defines */ + fprintf(out,"#define YYCODETYPE %s\n", + minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++; + fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; + fprintf(out,"#define YYACTIONTYPE %s\n", + minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++; + if( lemp->wildcard ){ + fprintf(out,"#define YYWILDCARD %d\n", + lemp->wildcard->index); lineno++; + } + print_stack_union(out,lemp,&lineno,mhflag); + fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; + if( lemp->stacksize ){ + fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; + }else{ + fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; + } + fprintf(out, "#endif\n"); lineno++; + if( mhflag ){ + fprintf(out,"#if INTERFACE\n"); lineno++; + } + name = lemp->name ? lemp->name : "Parse"; + if( lemp->arg && lemp->arg[0] ){ + i = lemonStrlen(lemp->arg); + while( i>=1 && ISSPACE(lemp->arg[i-1]) ) i--; + while( i>=1 && (ISALNUM(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; + fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", + name,lemp->arg,&lemp->arg[i]); lineno++; + fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", + name,&lemp->arg[i],&lemp->arg[i]); lineno++; + }else{ + fprintf(out,"#define %sARG_SDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PDECL\n",name); lineno++; + fprintf(out,"#define %sARG_FETCH\n",name); lineno++; + fprintf(out,"#define %sARG_STORE\n",name); lineno++; + } + if( mhflag ){ + fprintf(out,"#endif\n"); lineno++; + } + if( lemp->errsym->useCnt ){ + fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; + fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + } + if( lemp->has_fallback ){ + fprintf(out,"#define YYFALLBACK 1\n"); lineno++; + } + + /* Compute the action table, but do not output it yet. The action + ** table must be computed before generating the YYNSTATE macro because + ** we need to know how many states can be eliminated. + */ + ax = (struct axset *) calloc(lemp->nxstate*2, sizeof(ax[0])); + if( ax==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=0; inxstate; i++){ + stp = lemp->sorted[i]; + ax[i*2].stp = stp; + ax[i*2].isTkn = 1; + ax[i*2].nAction = stp->nTknAct; + ax[i*2+1].stp = stp; + ax[i*2+1].isTkn = 0; + ax[i*2+1].nAction = stp->nNtAct; + } + mxTknOfst = mnTknOfst = 0; + mxNtOfst = mnNtOfst = 0; + /* In an effort to minimize the action table size, use the heuristic + ** of placing the largest action sets first */ + for(i=0; inxstate*2; i++) ax[i].iOrder = i; + qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); + pActtab = acttab_alloc(); + for(i=0; inxstate*2 && ax[i].nAction>0; i++){ + stp = ax[i].stp; + if( ax[i].isTkn ){ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->index>=lemp->nterminal ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iTknOfst = acttab_insert(pActtab); + if( stp->iTknOfstiTknOfst; + if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; + }else{ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->indexnterminal ) continue; + if( ap->sp->index==lemp->nsymbol ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iNtOfst = acttab_insert(pActtab); + if( stp->iNtOfstiNtOfst; + if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; + } +#if 0 /* Uncomment for a trace of how the yy_action[] table fills out */ + { int jj, nn; + for(jj=nn=0; jjnAction; jj++){ + if( pActtab->aAction[jj].action<0 ) nn++; + } + printf("%4d: State %3d %s n: %2d size: %5d freespace: %d\n", + i, stp->statenum, ax[i].isTkn ? "Token" : "Var ", + ax[i].nAction, pActtab->nAction, nn); + } +#endif + } + free(ax); + + /* Mark rules that are actually used for reduce actions after all + ** optimizations have been applied + */ + for(rp=lemp->rule; rp; rp=rp->next) rp->doesReduce = LEMON_FALSE; + for(i=0; inxstate; i++){ + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE || ap->type==SHIFTREDUCE ){ + ap->x.rp->doesReduce = i; + } + } + } + + /* Finish rendering the constants now that the action table has + ** been computed */ + fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++; + fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; + fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++; + fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",lemp->nstate); lineno++; + i = lemp->nstate + lemp->nrule; + fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_MIN_REDUCE %d\n", i); lineno++; + i = lemp->nstate + lemp->nrule*2; + fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++; + fprintf(out,"#define YY_ERROR_ACTION %d\n", i); lineno++; + fprintf(out,"#define YY_ACCEPT_ACTION %d\n", i+1); lineno++; + fprintf(out,"#define YY_NO_ACTION %d\n", i+2); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Now output the action table and its associates: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + */ + + /* Output the yy_action table */ + lemp->nactiontab = n = acttab_size(pActtab); + lemp->tablesize += n*szActionType; + fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; + fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; + for(i=j=0; instate + lemp->nrule + 2; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", action); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_lookahead table */ + lemp->tablesize += n*szCodeType; + fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; + for(i=j=0; insymbol; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", la); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_shift_ofst[] table */ + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", lemp->nactiontab); lineno++; + fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; + fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; + fprintf(out, "static const %s yy_shift_ofst[] = {\n", + minimum_size_type(mnTknOfst, lemp->nterminal+lemp->nactiontab, &sz)); + lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iTknOfst; + if( ofst==NO_OFFSET ) ofst = lemp->nactiontab; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_reduce_ofst[] table */ + fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; + n = lemp->nxstate; + while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; + fprintf(out, "#define YY_REDUCE_MIN (%d)\n", mnNtOfst); lineno++; + fprintf(out, "#define YY_REDUCE_MAX (%d)\n", mxNtOfst); lineno++; + fprintf(out, "static const %s yy_reduce_ofst[] = {\n", + minimum_size_type(mnNtOfst-1, mxNtOfst, &sz)); lineno++; + lemp->tablesize += n*sz; + for(i=j=0; isorted[i]; + ofst = stp->iNtOfst; + if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the default action table */ + fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; + n = lemp->nxstate; + lemp->tablesize += n*szActionType; + for(i=j=0; isorted[i]; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of fallback tokens. + */ + if( lemp->has_fallback ){ + int mx = lemp->nterminal - 1; + while( mx>0 && lemp->symbols[mx]->fallback==0 ){ mx--; } + lemp->tablesize += (mx+1)*szCodeType; + for(i=0; i<=mx; i++){ + struct symbol *p = lemp->symbols[i]; + if( p->fallback==0 ){ + fprintf(out, " 0, /* %10s => nothing */\n", p->name); + }else{ + fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, + p->name, p->fallback->name); + } + lineno++; + } + } + tplt_xfer(lemp->name, in, out, &lineno); + + /* Generate a table containing the symbolic name of every symbol + */ + for(i=0; insymbol; i++){ + lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); + fprintf(out," %-15s",line); + if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + } + if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate a table containing a text string that describes every + ** rule in the rule set of the grammar. This information is used + ** when tracing REDUCE actions. + */ + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + assert( rp->iRule==i ); + fprintf(out," /* %3d */ \"", i); + writeRuleText(out, rp); + fprintf(out,"\",\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes every time a symbol is popped from + ** the stack while processing errors or while destroying the parser. + ** (In other words, generate the %destructor actions) + */ + if( lemp->tokendest ){ + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type!=TERMINAL ) continue; + if( once ){ + fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + } + for(i=0; insymbol && lemp->symbols[i]->type!=TERMINAL; i++); + if( insymbol ){ + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + } + if( lemp->vardest ){ + struct symbol *dflt_sp = 0; + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || + sp->index<=0 || sp->destructor!=0 ) continue; + if( once ){ + fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + dflt_sp = sp; + } + if( dflt_sp!=0 ){ + emit_destructor_code(out,dflt_sp,lemp,&lineno); + } + fprintf(out," break;\n"); lineno++; + } + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; + if( sp->destLineno<0 ) continue; /* Already emitted */ + fprintf(out," case %d: /* %s */\n", sp->index, sp->name); lineno++; + + /* Combine duplicate destructors into a single case */ + for(j=i+1; jnsymbol; j++){ + struct symbol *sp2 = lemp->symbols[j]; + if( sp2 && sp2->type!=TERMINAL && sp2->destructor + && sp2->dtnum==sp->dtnum + && strcmp(sp->destructor,sp2->destructor)==0 ){ + fprintf(out," case %d: /* %s */\n", + sp2->index, sp2->name); lineno++; + sp2->destLineno = -1; /* Avoid emitting this destructor again */ + } + } + + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes whenever the parser stack overflows */ + tplt_print(out,lemp,lemp->overflow,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of rule information + ** + ** Note: This code depends on the fact that rules are number + ** sequentually beginning with 0. + */ + for(rp=lemp->rule; rp; rp=rp->next){ + fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which execution during each REDUCE action */ + i = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + i += translate_code(lemp, rp); + } + if( i ){ + fprintf(out," YYMINORTYPE yylhsminor;\n"); lineno++; + } + /* First output rules other than the default: rule */ + for(rp=lemp->rule; rp; rp=rp->next){ + struct rule *rp2; /* Other rules with the same action */ + if( rp->codeEmitted ) continue; + if( rp->noCode ){ + /* No C code actions, so this will be part of the "default:" rule */ + continue; + } + fprintf(out," case %d: /* ", rp->iRule); + writeRuleText(out, rp); + fprintf(out, " */\n"); lineno++; + for(rp2=rp->next; rp2; rp2=rp2->next){ + if( rp2->code==rp->code && rp2->codePrefix==rp->codePrefix + && rp2->codeSuffix==rp->codeSuffix ){ + fprintf(out," case %d: /* ", rp2->iRule); + writeRuleText(out, rp2); + fprintf(out," */ yytestcase(yyruleno==%d);\n", rp2->iRule); lineno++; + rp2->codeEmitted = 1; + } + } + emit_code(out,rp,lemp,&lineno); + fprintf(out," break;\n"); lineno++; + rp->codeEmitted = 1; + } + /* Finally, output the default: rule. We choose as the default: all + ** empty actions. */ + fprintf(out," default:\n"); lineno++; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->codeEmitted ) continue; + assert( rp->noCode ); + fprintf(out," /* (%d) ", rp->iRule); + writeRuleText(out, rp); + if( rp->doesReduce ){ + fprintf(out, " */ yytestcase(yyruleno==%d);\n", rp->iRule); lineno++; + }else{ + fprintf(out, " (OPTIMIZED OUT) */ assert(yyruleno!=%d);\n", + rp->iRule); lineno++; + } + } + fprintf(out," break;\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes if a parse fails */ + tplt_print(out,lemp,lemp->failure,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when a syntax error occurs */ + tplt_print(out,lemp,lemp->error,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when the parser accepts its input */ + tplt_print(out,lemp,lemp->accept,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Append any addition code the user desires */ + tplt_print(out,lemp,lemp->extracode,&lineno); + + fclose(in); + fclose(out); + return; +} + +/* Generate a header file for the parser */ +void ReportHeader(struct lemon *lemp) +{ + FILE *out, *in; + const char *prefix; + char line[LINESIZE]; + char pattern[LINESIZE]; + int i; + + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + in = file_open(lemp,".h","rb"); + if( in ){ + int nextChar; + for(i=1; interminal && fgets(line,LINESIZE,in); i++){ + lemon_sprintf(pattern,"#define %s%-30s %3d\n", + prefix,lemp->symbols[i]->name,i); + if( strcmp(line,pattern) ) break; + } + nextChar = fgetc(in); + fclose(in); + if( i==lemp->nterminal && nextChar==EOF ){ + /* No change in the file. Don't rewrite it. */ + return; + } + } + out = file_open(lemp,".h","wb"); + if( out ){ + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %3d\n",prefix,lemp->symbols[i]->name,i); + } + fclose(out); + } + return; +} + +/* Reduce the size of the action tables, if possible, by making use +** of defaults. +** +** In this version, we take the most frequent REDUCE action and make +** it the default. Except, there is no default if the wildcard token +** is a possible look-ahead. +*/ +void CompressTables(struct lemon *lemp) +{ + struct state *stp; + struct action *ap, *ap2, *nextap; + struct rule *rp, *rp2, *rbest; + int nbest, n; + int i; + int usesWildcard; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + nbest = 0; + rbest = 0; + usesWildcard = 0; + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ + usesWildcard = 1; + } + if( ap->type!=REDUCE ) continue; + rp = ap->x.rp; + if( rp->lhsStart ) continue; + if( rp==rbest ) continue; + n = 1; + for(ap2=ap->next; ap2; ap2=ap2->next){ + if( ap2->type!=REDUCE ) continue; + rp2 = ap2->x.rp; + if( rp2==rbest ) continue; + if( rp2==rp ) n++; + } + if( n>nbest ){ + nbest = n; + rbest = rp; + } + } + + /* Do not make a default if the number of rules to default + ** is not at least 1 or if the wildcard token is a possible + ** lookahead. + */ + if( nbest<1 || usesWildcard ) continue; + + + /* Combine matching REDUCE actions into a single default */ + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) break; + } + assert( ap ); + ap->sp = Symbol_new("{default}"); + for(ap=ap->next; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; + } + stp->ap = Action_sort(stp->ap); + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT ) break; + if( ap->type==REDUCE && ap->x.rp!=rbest ) break; + } + if( ap==0 ){ + stp->autoReduce = 1; + stp->pDfltReduce = rbest; + } + } + + /* Make a second pass over all states and actions. Convert + ** every action that is a SHIFT to an autoReduce state into + ** a SHIFTREDUCE action. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=ap->next){ + struct state *pNextState; + if( ap->type!=SHIFT ) continue; + pNextState = ap->x.stp; + if( pNextState->autoReduce && pNextState->pDfltReduce!=0 ){ + ap->type = SHIFTREDUCE; + ap->x.rp = pNextState->pDfltReduce; + } + } + } + + /* If a SHIFTREDUCE action specifies a rule that has a single RHS term + ** (meaning that the SHIFTREDUCE will land back in the state where it + ** started) and if there is no C-code associated with the reduce action, + ** then we can go ahead and convert the action to be the same as the + ** action for the RHS of the rule. + */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(ap=stp->ap; ap; ap=nextap){ + nextap = ap->next; + if( ap->type!=SHIFTREDUCE ) continue; + rp = ap->x.rp; + if( rp->noCode==0 ) continue; + if( rp->nrhs!=1 ) continue; +#if 1 + /* Only apply this optimization to non-terminals. It would be OK to + ** apply it to terminal symbols too, but that makes the parser tables + ** larger. */ + if( ap->sp->indexnterminal ) continue; +#endif + /* If we reach this point, it means the optimization can be applied */ + nextap = ap; + for(ap2=stp->ap; ap2 && (ap2==ap || ap2->sp!=rp->lhs); ap2=ap2->next){} + assert( ap2!=0 ); + ap->spOpt = ap2->sp; + ap->type = ap2->type; + ap->x = ap2->x; + } + } +} + + +/* +** Compare two states for sorting purposes. The smaller state is the +** one with the most non-terminal actions. If they have the same number +** of non-terminal actions, then the smaller is the one with the most +** token actions. +*/ +static int stateResortCompare(const void *a, const void *b){ + const struct state *pA = *(const struct state**)a; + const struct state *pB = *(const struct state**)b; + int n; + + n = pB->nNtAct - pA->nNtAct; + if( n==0 ){ + n = pB->nTknAct - pA->nTknAct; + if( n==0 ){ + n = pB->statenum - pA->statenum; + } + } + assert( n!=0 ); + return n; +} + + +/* +** Renumber and resort states so that states with fewer choices +** occur at the end. Except, keep state 0 as the first state. +*/ +void ResortStates(struct lemon *lemp) +{ + int i; + struct state *stp; + struct action *ap; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + stp->nTknAct = stp->nNtAct = 0; + stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */ + stp->iTknOfst = NO_OFFSET; + stp->iNtOfst = NO_OFFSET; + for(ap=stp->ap; ap; ap=ap->next){ + int iAction = compute_action(lemp,ap); + if( iAction>=0 ){ + if( ap->sp->indexnterminal ){ + stp->nTknAct++; + }else if( ap->sp->indexnsymbol ){ + stp->nNtAct++; + }else{ + assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); + stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule; + } + } + } + } + qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), + stateResortCompare); + for(i=0; instate; i++){ + lemp->sorted[i]->statenum = i; + } + lemp->nxstate = lemp->nstate; + while( lemp->nxstate>1 && lemp->sorted[lemp->nxstate-1]->autoReduce ){ + lemp->nxstate--; + } +} + + +/***************** From the file "set.c" ************************************/ +/* +** Set manipulation routines for the LEMON parser generator. +*/ + +static int size = 0; + +/* Set the set size */ +void SetSize(int n) +{ + size = n+1; +} + +/* Allocate a new set */ +char *SetNew(){ + char *s; + s = (char*)calloc( size, 1); + if( s==0 ){ + extern void memory_error(); + memory_error(); + } + return s; +} + +/* Deallocate a set */ +void SetFree(char *s) +{ + free(s); +} + +/* Add a new element to the set. Return TRUE if the element was added +** and FALSE if it was already there. */ +int SetAdd(char *s, int e) +{ + int rv; + assert( e>=0 && esize = 1024; + x1a->count = 0; + x1a->tbl = (x1node*)calloc(1024, sizeof(x1node) + sizeof(x1node*)); + if( x1a->tbl==0 ){ + free(x1a); + x1a = 0; + }else{ + int i; + x1a->ht = (x1node**)&(x1a->tbl[1024]); + for(i=0; i<1024; i++) x1a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Strsafe_insert(const char *data) +{ + x1node *np; + unsigned h; + unsigned ph; + + if( x1a==0 ) return 0; + ph = strhash(data); + h = ph & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x1a->count>=x1a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x1 array; + array.size = arrSize = x1a->size*2; + array.count = x1a->count; + array.tbl = (x1node*)calloc(arrSize, sizeof(x1node) + sizeof(x1node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x1node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x1node *oldnp, *newnp; + oldnp = &(x1a->tbl[i]); + h = strhash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x1a->tbl); + *x1a = array; + } + /* Insert the new data */ + h = ph & (x1a->size-1); + np = &(x1a->tbl[x1a->count++]); + np->data = data; + if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); + np->next = x1a->ht[h]; + x1a->ht[h] = np; + np->from = &(x1a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +const char *Strsafe_find(const char *key) +{ + unsigned h; + x1node *np; + + if( x1a==0 ) return 0; + h = strhash(key) & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return a pointer to the (terminal or nonterminal) symbol "x". +** Create a new symbol if this is the first time "x" has been seen. +*/ +struct symbol *Symbol_new(const char *x) +{ + struct symbol *sp; + + sp = Symbol_find(x); + if( sp==0 ){ + sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); + MemoryCheck(sp); + sp->name = Strsafe(x); + sp->type = ISUPPER(*x) ? TERMINAL : NONTERMINAL; + sp->rule = 0; + sp->fallback = 0; + sp->prec = -1; + sp->assoc = UNK; + sp->firstset = 0; + sp->lambda = LEMON_FALSE; + sp->destructor = 0; + sp->destLineno = 0; + sp->datatype = 0; + sp->useCnt = 0; + Symbol_insert(sp,sp->name); + } + sp->useCnt++; + return sp; +} + +/* Compare two symbols for sorting purposes. Return negative, +** zero, or positive if a is less then, equal to, or greater +** than b. +** +** Symbols that begin with upper case letters (terminals or tokens) +** must sort before symbols that begin with lower case letters +** (non-terminals). And MULTITERMINAL symbols (created using the +** %token_class directive) must sort at the very end. Other than +** that, the order does not matter. +** +** We find experimentally that leaving the symbols in their original +** order (the order they appeared in the grammar file) gives the +** smallest parser tables in SQLite. +*/ +int Symbolcmpp(const void *_a, const void *_b) +{ + const struct symbol *a = *(const struct symbol **) _a; + const struct symbol *b = *(const struct symbol **) _b; + int i1 = a->type==MULTITERMINAL ? 3 : a->name[0]>'Z' ? 2 : 1; + int i2 = b->type==MULTITERMINAL ? 3 : b->name[0]>'Z' ? 2 : 1; + return i1==i2 ? a->index - b->index : i1 - i2; +} + +/* There is one instance of the following structure for each +** associative array of type "x2". +*/ +struct s_x2 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x2node *tbl; /* The data stored here */ + struct s_x2node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x2". +*/ +typedef struct s_x2node { + struct symbol *data; /* The data */ + const char *key; /* The key */ + struct s_x2node *next; /* Next entry with the same hash */ + struct s_x2node **from; /* Previous link */ +} x2node; + +/* There is only one instance of the array, which is the following */ +static struct s_x2 *x2a; + +/* Allocate a new associative array */ +void Symbol_init(){ + if( x2a ) return; + x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); + if( x2a ){ + x2a->size = 128; + x2a->count = 0; + x2a->tbl = (x2node*)calloc(128, sizeof(x2node) + sizeof(x2node*)); + if( x2a->tbl==0 ){ + free(x2a); + x2a = 0; + }else{ + int i; + x2a->ht = (x2node**)&(x2a->tbl[128]); + for(i=0; i<128; i++) x2a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Symbol_insert(struct symbol *data, const char *key) +{ + x2node *np; + unsigned h; + unsigned ph; + + if( x2a==0 ) return 0; + ph = strhash(key); + h = ph & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x2a->count>=x2a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x2 array; + array.size = arrSize = x2a->size*2; + array.count = x2a->count; + array.tbl = (x2node*)calloc(arrSize, sizeof(x2node) + sizeof(x2node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x2node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x2node *oldnp, *newnp; + oldnp = &(x2a->tbl[i]); + h = strhash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x2a->tbl); + *x2a = array; + } + /* Insert the new data */ + h = ph & (x2a->size-1); + np = &(x2a->tbl[x2a->count++]); + np->key = key; + np->data = data; + if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); + np->next = x2a->ht[h]; + x2a->ht[h] = np; + np->from = &(x2a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct symbol *Symbol_find(const char *key) +{ + unsigned h; + x2node *np; + + if( x2a==0 ) return 0; + h = strhash(key) & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return the n-th data. Return NULL if n is out of range. */ +struct symbol *Symbol_Nth(int n) +{ + struct symbol *data; + if( x2a && n>0 && n<=x2a->count ){ + data = x2a->tbl[n-1].data; + }else{ + data = 0; + } + return data; +} + +/* Return the size of the array */ +int Symbol_count() +{ + return x2a ? x2a->count : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct symbol **Symbol_arrayof() +{ + struct symbol **array; + int i,arrSize; + if( x2a==0 ) return 0; + arrSize = x2a->count; + array = (struct symbol **)calloc(arrSize, sizeof(struct symbol *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Compare two configurations */ +int Configcmp(const char *_a,const char *_b) +{ + const struct config *a = (struct config *) _a; + const struct config *b = (struct config *) _b; + int x; + x = a->rp->index - b->rp->index; + if( x==0 ) x = a->dot - b->dot; + return x; +} + +/* Compare two states */ +PRIVATE int statecmp(struct config *a, struct config *b) +{ + int rc; + for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ + rc = a->rp->index - b->rp->index; + if( rc==0 ) rc = a->dot - b->dot; + } + if( rc==0 ){ + if( a ) rc = 1; + if( b ) rc = -1; + } + return rc; +} + +/* Hash a state */ +PRIVATE unsigned statehash(struct config *a) +{ + unsigned h=0; + while( a ){ + h = h*571 + a->rp->index*37 + a->dot; + a = a->bp; + } + return h; +} + +/* Allocate a new state structure */ +struct state *State_new() +{ + struct state *newstate; + newstate = (struct state *)calloc(1, sizeof(struct state) ); + MemoryCheck(newstate); + return newstate; +} + +/* There is one instance of the following structure for each +** associative array of type "x3". +*/ +struct s_x3 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x3node *tbl; /* The data stored here */ + struct s_x3node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x3". +*/ +typedef struct s_x3node { + struct state *data; /* The data */ + struct config *key; /* The key */ + struct s_x3node *next; /* Next entry with the same hash */ + struct s_x3node **from; /* Previous link */ +} x3node; + +/* There is only one instance of the array, which is the following */ +static struct s_x3 *x3a; + +/* Allocate a new associative array */ +void State_init(){ + if( x3a ) return; + x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); + if( x3a ){ + x3a->size = 128; + x3a->count = 0; + x3a->tbl = (x3node*)calloc(128, sizeof(x3node) + sizeof(x3node*)); + if( x3a->tbl==0 ){ + free(x3a); + x3a = 0; + }else{ + int i; + x3a->ht = (x3node**)&(x3a->tbl[128]); + for(i=0; i<128; i++) x3a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int State_insert(struct state *data, struct config *key) +{ + x3node *np; + unsigned h; + unsigned ph; + + if( x3a==0 ) return 0; + ph = statehash(key); + h = ph & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x3a->count>=x3a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x3 array; + array.size = arrSize = x3a->size*2; + array.count = x3a->count; + array.tbl = (x3node*)calloc(arrSize, sizeof(x3node) + sizeof(x3node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x3node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x3node *oldnp, *newnp; + oldnp = &(x3a->tbl[i]); + h = statehash(oldnp->key) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x3a->tbl); + *x3a = array; + } + /* Insert the new data */ + h = ph & (x3a->size-1); + np = &(x3a->tbl[x3a->count++]); + np->key = key; + np->data = data; + if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); + np->next = x3a->ht[h]; + x3a->ht[h] = np; + np->from = &(x3a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct state *State_find(struct config *key) +{ + unsigned h; + x3node *np; + + if( x3a==0 ) return 0; + h = statehash(key) & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct state **State_arrayof() +{ + struct state **array; + int i,arrSize; + if( x3a==0 ) return 0; + arrSize = x3a->count; + array = (struct state **)calloc(arrSize, sizeof(struct state *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Hash a configuration */ +PRIVATE unsigned confighash(struct config *a) +{ + unsigned h=0; + h = h*571 + a->rp->index*37 + a->dot; + return h; +} + +/* There is one instance of the following structure for each +** associative array of type "x4". +*/ +struct s_x4 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x4node *tbl; /* The data stored here */ + struct s_x4node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x4". +*/ +typedef struct s_x4node { + struct config *data; /* The data */ + struct s_x4node *next; /* Next entry with the same hash */ + struct s_x4node **from; /* Previous link */ +} x4node; + +/* There is only one instance of the array, which is the following */ +static struct s_x4 *x4a; + +/* Allocate a new associative array */ +void Configtable_init(){ + if( x4a ) return; + x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); + if( x4a ){ + x4a->size = 64; + x4a->count = 0; + x4a->tbl = (x4node*)calloc(64, sizeof(x4node) + sizeof(x4node*)); + if( x4a->tbl==0 ){ + free(x4a); + x4a = 0; + }else{ + int i; + x4a->ht = (x4node**)&(x4a->tbl[64]); + for(i=0; i<64; i++) x4a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Configtable_insert(struct config *data) +{ + x4node *np; + unsigned h; + unsigned ph; + + if( x4a==0 ) return 0; + ph = confighash(data); + h = ph & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x4a->count>=x4a->size ){ + /* Need to make the hash table bigger */ + int i,arrSize; + struct s_x4 array; + array.size = arrSize = x4a->size*2; + array.count = x4a->count; + array.tbl = (x4node*)calloc(arrSize, sizeof(x4node) + sizeof(x4node*)); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x4node**)&(array.tbl[arrSize]); + for(i=0; icount; i++){ + x4node *oldnp, *newnp; + oldnp = &(x4a->tbl[i]); + h = confighash(oldnp->data) & (arrSize-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x4a->tbl); + *x4a = array; + } + /* Insert the new data */ + h = ph & (x4a->size-1); + np = &(x4a->tbl[x4a->count++]); + np->data = data; + if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); + np->next = x4a->ht[h]; + x4a->ht[h] = np; + np->from = &(x4a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct config *Configtable_find(struct config *key) +{ + int h; + x4node *np; + + if( x4a==0 ) return 0; + h = confighash(key) & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp((const char *) np->data,(const char *) key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Remove all data from the table. Pass each data to the function "f" +** as it is removed. ("f" may be null to avoid this step.) */ +void Configtable_clear(int(*f)(struct config *)) +{ + int i; + if( x4a==0 || x4a->count==0 ) return; + if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); + for(i=0; isize; i++) x4a->ht[i] = 0; + x4a->count = 0; + return; +} diff --git a/lemon-src/lempar.js b/lemon-src/lempar.js new file mode 100644 index 0000000..e86d627 --- /dev/null +++ b/lemon-src/lempar.js @@ -0,0 +1,775 @@ +/* +** 2000-05-29 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +** Based on SQLite distribution v3.17.0 +** Adopted for JavaScript by Artem Butusov +** +************************************************************************* +** Driver template for the LEMON parser generator. +** +** The "lemon" program processes an LALR(1) input grammar file, then uses +** this template to construct a parser. The "lemon" program inserts text +** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the +** interstitial "-" characters) contained in this template is changed into +** the value of the %name directive from the grammar. Otherwise, the content +** of this template is copied straight through into the generate parser +** source file. +** +** The following is the concatenation of all %include directives from the +** input grammar file: +*/ +/************ Begin %include sections from the grammar ************************/ +%% +/**************** End of %include directives **********************************/ +function Parse() { +/* These constants specify the various numeric values for terminal symbols +** in a format understandable to "makeheaders". +***************** Begin makeheaders token definitions *************************/ +%% +/**************** End makeheaders token definitions ***************************/ + +/* The next sections is a series of control #defines. +** various aspects of the generated parser. +** YYNOCODE is a number of type YYCODETYPE that is not used for +** any terminal or nonterminal symbol. +** YYFALLBACK If defined, this indicates that one or more tokens +** (also known as: "terminal symbols") have fall-back +** values which should be used if the original symbol +** would not parse. This permits keywords to sometimes +** be used as identifiers, for example. +** YYSTACKDEPTH is the maximum depth of the parser's stack. If +** zero the stack is dynamically sized using realloc() +** YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +** YYNSTATE the combined number of states. +** YYNRULE the number of rules in the grammar +** YY_MAX_SHIFT Maximum value for shift actions +** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions +** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions +** YY_MIN_REDUCE Maximum value for reduce actions +** YY_ERROR_ACTION The yy_action[] code for syntax error +** YY_ACCEPT_ACTION The yy_action[] code for accept +** YY_NO_ACTION The yy_action[] code for no-op +*/ +/************* Begin control #defines *****************************************/ +%% +/************* End control #defines *******************************************/ + +/* Define the yytestcase() macro to be a no-op if is not already defined +** otherwise. +** +** Applications can choose to define yytestcase() in the %include section +** to a macro that can assist in verifying code coverage. For production +** code the yytestcase() macro should be turned off. But it is useful +** for testing. +*/ +if (!this.yytestcase) { + this.yytestcase = function () {}; +} + + +/* Next are the tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead +** token onto the stack and goto state N. +** +** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then +** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. +** +** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE +** and YY_MAX_REDUCE +** +** N == YY_ERROR_ACTION A syntax error has occurred. +** +** N == YY_ACCEPT_ACTION The parser accepts its input. +** +** N == YY_NO_ACTION No such action. Denotes unused +** slots in the yy_action[] table. +** +** The action table is constructed as a single large table named yy_action[]. +** Given state S and lookahead X, the action is computed as either: +** +** (A) N = yy_action[ yy_shift_ofst[S] + X ] +** (B) N = yy_default[S] +** +** The (A) formula is preferred. The B formula is used instead if: +** (1) The yy_shift_ofst[S]+X value is out of range, or +** (2) yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or +** (3) yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT. +** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that +** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. +** Hence only tests (1) and (2) need to be evaluated.) +** +** The formulas above are for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the yy_reduce_ofst[] array is used in place of +** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of +** YY_SHIFT_USE_DFLT. +** +** The following are the tables generated in this section: +** +** yy_action[] A single table containing all actions. +** yy_lookahead[] A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** yy_shift_ofst[] For each state, the offset into yy_action for +** shifting terminals. +** yy_reduce_ofst[] For each state, the offset into yy_action for +** shifting non-terminals after a reduce. +** yy_default[] Default action for each state. +** +*********** Begin parsing tables **********************************************/ +%% +/********** End of lemon-generated parsing tables *****************************/ + +/* The next table maps tokens (terminal symbols) into fallback tokens. +** If a construct like the following: +** +** %fallback ID X Y Z. +** +** appears in the grammar, then ID becomes a fallback token for X, Y, +** and Z. Whenever one of the tokens X, Y, or Z is input to the parser +** but it does not parse, the type of the token is changed to ID and +** the parse is retried before an error is thrown. +** +** This feature can be used, for example, to cause some keywords in a language +** to revert to identifiers if they keyword does not apply in the context where +** it appears. +*/ +this.yyFallback = [ +%% +]; + +/* The following structure represents a single element of the +** parser's stack. Information stored includes: +** +** + The state number for the parser at this level of the stack. +** +** + The value of the token stored at this level of the stack. +** (In other words, the "major" token.) +** +** + The semantic value stored at this level of the stack. This is +** the information used by the action routines in the grammar. +** It is sometimes called the "minor" token. +** +** After the "shift" half of a SHIFTREDUCE action, the stateno field +** actually contains the reduce action for the second half of the +** SHIFTREDUCE. +*/ +//{ +// stateno, /* The state-number, or reduce action in SHIFTREDUCE */ +// major, /* The major token value. This is the code +// ** number for the token at this stack level */ +// minor, /* The user-supplied minor token value. This +// ** is the value of the token */ +//} + +/* The state of the parser is completely contained in an instance of +** the following structure */ +this.yyhwm = 0; /* High-water mark of the stack */ +this.yyerrcnt = -1; /* Shifts left before out of the error */ +this.yystack = null; /* The parser's stack */ +this.yyidx = -1; /* Stack index of current element in the stack */ + +this.yyTraceCallback = null; +this.yyTracePrompt = ""; + +/* +** Turn parser tracing on by giving a stream to which to write the trace +** and a prompt to preface each trace message. Tracing is turned off +** by making either argument NULL +** +** Inputs: +**
    +**
  • A callback to which trace output should be written. +** If NULL, then tracing is turned off. +**
  • A prefix string written at the beginning of every +** line of trace output. Default is "". +**
+** +** Outputs: +** None. +*/ +this.setTraceCallback = function (callback, prompt) { + this.yyTraceCallback = callback; + this.yyTracePrompt = prompt || ""; +} + +this.trace = function (message) { + this.yyTraceCallback(this.yyTracePrompt + message + "\n"); +} + +/* For tracing shifts, the names of all terminals and nonterminals +** are required. The following table supplies these names */ +this.yyTokenName = [ +%% +]; + +/* For tracing reduce actions, the names of all rules are required. +*/ +this.yyRuleName = [ +%% +]; +/* +** Try to increase the size of the parser stack. Return the number +** of errors. Return 0 on success. +*/ +this.yyGrowStack = function () { + // fix me: yystksz*2 + 100 + this.yystack.push({ + stateno: undefined, + major: undefined, + minor: undefined + }); +} + +/* Initialize a new parser that has already been allocated. +*/ +this.init = function () { + this.yyhwm = 0; + this.yyerrcnt = -1; + this.yyidx = 0; + if (this.YYSTACKDEPTH <= 0) { + this.yystack = []; + this.yyGrowStack(); + } else { + this.yystack = new Array(this.YYSTACKDEPTH); + for (var i = 0; i < this.YYSTACKDEPTH; i++) { + this.yystack[i] = { + stateno: undefined, + major: undefined, + minor: undefined + }; + } + } + var yytos = this.yystack[0]; + yytos.stateno = 0; + yytos.major = 0; +} + +/* The following function deletes the "minor type" or semantic value +** associated with a symbol. The symbol can be either a terminal +** or nonterminal. "yymajor" is the symbol code, and "yypminor" is +** a pointer to the value to be deleted. The code used to do the +** deletions is derived from the %destructor and/or %token_destructor +** directives of the input grammar. +*/ +this.yy_destructor = function ( + yymajor, /* Type code for object to destroy */ + yyminor /* The object to be destroyed */ +) { + switch (yymajor) { + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are *not* used + ** inside the C code. + */ +/********* Begin destructor definitions ***************************************/ +%% +/********* End destructor definitions *****************************************/ + default: break; /* If no destructor action specified: do nothing */ + } +} + +/* +** Pop the parser's stack once. +** +** If there is a destructor routine associated with the token which +** is popped from the stack, then call it. +*/ +this.yy_pop_parser_stack = function () { + // assert( pParser->yytos!=0 ); + // assert( pParser->yytos > pParser->yystack ); + var yytos = this.yystack[this.yyidx]; + + if (this.yyTraceCallback) { + this.trace("Popping " + this.yyTokenName[yytos.major]); + } + this.yy_destructor(yytos.major, yytos.minor); + + this.yyidx--; +} + +/* +** Clear all secondary memory allocations from the parser +*/ +this.finalize = function () { + while (this.yyidx > 0) { + this.yy_pop_parser_stack(); + } + this.yystack = null; +} + +/* +** Return the peak depth of the stack for a parser. +*/ +this.getStackPeak = function () { + return this.yyhwm; +} + +/* +** Find the appropriate action for a parser given the terminal +** look-ahead token iLookAhead. +*/ +this.yy_find_shift_action = function ( + iLookAhead /* The look-ahead token */ +) { + var yytos = this.yystack[this.yyidx]; + var stateno = yytos.stateno; + + if (stateno >= this.YY_MIN_REDUCE) { + return stateno; + } + + // assert( stateno <= YY_SHIFT_COUNT ); + + do { + var i = this.yy_shift_ofst[stateno]; + // assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + if (i < 0 || i >= this.yy_action.length || this.yy_lookahead[i] != iLookAhead) { + if (this.YYFALLBACK) { + var iFallback; /* Fallback token */ + if ((iLookAhead < this.yyFallback.length) + && (iFallback = this.yyFallback[iLookAhead]) != 0 + ) { + if (this.yyTraceCallback) { + this.trace("FALLBACK " + this.yyTokenName[iLookAhead] + " => " + this.yyTokenName[iFallback]); + } + } + // assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ + iLookAhead = iFallback; + continue; + } + + if (this.YYWILDCARD) { + var j = i - iLookAhead + this.YYWILDCARD; + var cond1 = (this.YY_SHIFT_MIN + this.YYWILDCARD) < 0 ? j >= 0 : true; + var cond2 = (this.YY_SHIFT_MAX + this.YYWILDCARD) >= this.yy_action.length ? j < this.yy_action.length : true; + if (cond1 && cond2 && this.yy_lookahead[j] == this.YYWILDCARD && iLookAhead > 0) { + if (this.yyTraceCallback) { + this.trace("WILDCARD " + this.yyTokenName[iLookAhead] + " => " + this.yyTokenName[this.YYWILDCARD]); + } + return this.yy_action[j]; + } + } + + return this.yy_default[stateno]; + } else { + return this.yy_action[i]; + } + } while (true); +} + +/* +** Find the appropriate action for a parser given the non-terminal +** look-ahead token iLookAhead. +*/ +this.yy_find_reduce_action = function ( + stateno, /* Current state number */ + iLookAhead /* The look-ahead token */ +) { + if (this.YYERRORSYMBOL) { + if (stateno > this.YY_REDUCE_COUNT) { + return this.yy_default[stateno]; + } + } else { + // assert( stateno<=YY_REDUCE_COUNT ); + } + + var i = this.yy_reduce_ofst[stateno]; + // assert( i!=YY_REDUCE_USE_DFLT ); + // assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + + if (this.YYERRORSYMBOL) { + if (i < 0 || i >= this.yy_action.length || this.yy_lookahead[i] != iLookAhead) { + return this.yy_default[stateno]; + } + } else { + // assert( i>=0 && i 0) { + this.yy_pop_parser_stack(); + } + /* Here code is inserted which will execute if the parser + ** stack every overflows */ +/******** Begin %stack_overflow code ******************************************/ +%% +/******** End %stack_overflow code ********************************************/ +} + +/* +** Print tracing information for a SHIFT action +*/ +this.yyTraceShift = function (yyNewState) { + if (this.yyTraceCallback) { + var yytos = this.yystack[this.yyidx]; + if (yyNewState < this.YYNSTATE) { + this.trace("Shift '" + this.yyTokenName[yytos.major] + "', go to state " + yyNewState); + } else { + this.trace("Shift '" + this.yyTokenName[yytos.major] + "'"); + } + } +} + +/* +** Perform a shift action. +*/ +this.yy_shift = function ( + yyNewState, /* The new state to shift in */ + yyMajor, /* The major token to shift in */ + yyMinor /* The minor token to shift in */ +) { + this.yyidx++; + + if (this.yyidx > this.yyhwm) { + this.yyhwm++; + // assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); + } + + if (this.YYSTACKDEPTH > 0) { + if (this.yyidx >= this.YYSTACKDEPTH) { + this.yyidx--; + this.yyStackOverflow(); + return; + } + } else { + if (this.yyidx >= this.yystack.length) { + this.yyGrowStack(); + } + } + + if (yyNewState > this.YY_MAX_SHIFT) { + yyNewState += this.YY_MIN_REDUCE - this.YY_MIN_SHIFTREDUCE; + } + + var yytos = this.yystack[this.yyidx]; + yytos.stateno = yyNewState; + yytos.major = yyMajor; + yytos.minor = yyMinor; + + this.yyTraceShift(yyNewState); +} + +/* The following table contains information about every rule that +** is used during the reduce. +*/ +//{ +// lhs, /* Symbol on the left-hand side of the rule */ +// nrhs, /* Number of right-hand side symbols in the rule */ +//} +this.yyRuleInfo = [ +%% +]; + +/* +** Perform a reduce action and the shift that must immediately +** follow the reduce. +*/ +this.yy_reduce = function ( + yyruleno /* Number of the rule by which to reduce */ +){ + var yymsp = this.yystack[this.yyidx]; /* The top of the parser's stack */ + + if (yyruleno < this.yyRuleName.length) { + var yysize = this.yyRuleInfo[yyruleno].nrhs; + var ruleName = this.yyRuleName[yyruleno]; + var newStateNo = this.yystack[this.yyidx - yysize].stateno; + if (this.yyTraceCallback) { + this.trace("Reduce [" + ruleName + "], go to state " + newStateNo + "."); + } + } + + /* Check that the stack is large enough to grow by a single entry + ** if the RHS of the rule is empty. This ensures that there is room + ** enough on the stack to push the LHS value */ + if (this.yyRuleInfo[yyruleno].nrhs == 0) { + if (this.yyidx > this.yyhwm) { + this.yyhwm++; + // assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack)); + } + if (this.YYSTACKDEPTH > 0) { + if (this.yyidx >= this.YYSTACKDEPTH - 1) { + this.yyStackOverflow(); + return; + } + } else { + if (this.yyidx >= this.yystack.length - 1) { + this.yyGrowStack(); + yymsp = this.yystack[this.yyidx]; + } + } + } + + var yylhsminor; + switch (yyruleno) { + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ +/********** Begin reduce actions **********************************************/ +%% +/********** End reduce actions ************************************************/ + }; + // assert( yyruleno this.YY_MAX_SHIFT) { + yyact += this.YY_MIN_REDUCE - this.YY_MIN_SHIFTREDUCE; + } + this.yyidx -= yysize - 1; + yymsp = this.yystack[this.yyidx]; + yymsp.stateno = yyact; + yymsp.major = yygoto; + this.yyTraceShift(yyact); + } else { + // assert( yyact == YY_ACCEPT_ACTION ); + this.yyidx -= yysize; + this.yy_accept(); + } +} + +/* +** The following code executes when the parse fails +*/ +this.yy_parse_failed = function () { + if (this.yyTraceCallback) { + this.trace("Fail!"); + } + while (this.yyidx > 0) { + this.yy_pop_parser_stack(); + } + /* Here code is inserted which will be executed whenever the + ** parser fails */ +/************ Begin %parse_failure code ***************************************/ +%% +/************ End %parse_failure code *****************************************/ +} + +/* +** The following code executes when a syntax error first occurs. +*/ +this.yy_syntax_error = function ( + yymajor, /* The major type of the error token */ + yyminor /* The minor type of the error token */ +) { + var TOKEN = yyminor; +/************ Begin %syntax_error code ****************************************/ +%% +/************ End %syntax_error code ******************************************/ +} + +/* +** The following is executed when the parser accepts +*/ +this.yy_accept = function () { + if (this.yyTraceCallback) { + this.trace("Accept!"); + } + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + // assert( yypParser->yytos==yypParser->yystack ); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ +/*********** Begin %parse_accept code *****************************************/ +%% +/*********** End %parse_accept code *******************************************/ +} + +/* The main parser program. +** The first argument is a pointer to a structure obtained from +** "ParseAlloc" which describes the current state of the parser. +** The second argument is the major token number. The third is +** the minor token. The fourth optional argument is whatever the +** user wants (and specified in the grammar) and is available for +** use by the action routines. +** +** Inputs: +**
    +**
  • A pointer to the parser (an opaque structure.) +**
  • The major token number. +**
  • The minor token number. +**
  • An option argument of a grammar-specified type. +**
+** +** Outputs: +** None. +*/ +this.parse = function ( + yymajor, /* The major token code number */ + yyminor /* The value for the token */ +) { + var yyact; /* The parser action. */ + var yyendofinput; /* True if we are at the end of input */ + var yyerrorhit = 0; /* True if yymajor has invoked an error */ + + //assert( yypParser->yytos!=0 ); + + if (yymajor === undefined || yymajor === null) { + yymajor = 0; + } + + yyendofinput = yymajor == 0; + + if (this.yyTraceCallback) { + this.trace("Input '" + this.yyTokenName[yymajor] + "'"); + } + + do { + yyact = this.yy_find_shift_action(yymajor); + if (yyact <= this.YY_MAX_SHIFTREDUCE) { // check me? + this.yy_shift(yyact, yymajor, yyminor); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt--; + } + yymajor = this.YYNOCODE; + } else if (yyact <= this.YY_MAX_REDUCE) { // check me? + this.yy_reduce(yyact - this.YY_MIN_REDUCE); // check me? + } else { + // assert( yyact == YY_ERROR_ACTION ); + if (this.yyTraceCallback) { + this.trace("Syntax Error!"); + } + if (this.YYERRORSYMBOL) { + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if (this.yyerrcnt < 0) { + this.yy_syntax_error(yymajor, yyminor); + } + var yymx = this.yystack[this.yyidx].major; + if (yymx == this.YYERRORSYMBOL || yyerrorhit) { + if (this.yyTraceCallback) { + this.trace("Discard input token " + this.yyTokenName[yymajor]); + } + this.yy_destructor(yymajor, yyminor); + yymajor = this.YYNOCODE; + } else { + while (this.yyidx >= 0 + && yymx != this.YYERRORSYMBOL + && (yyact = this.yy_find_reduce_action( + this.yystack[this.yyidx].stateno, + this.YYERRORSYMBOL)) >= this.YY_MIN_REDUCE // check me? + ) { + this.yy_pop_parser_stack(); + } + if (this.yyidx < 0 || yymajor == 0) { + this.yy_destructor(yymajor, yyminor); + this.yy_parse_failed(); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + yymajor = this.YYNOCODE; + } else if (yymx != this.YYERRORSYMBOL) { + this.yy_shift(yyact, this.YYERRORSYMBOL, yyminor); // check me? + } + } + this.yyerrcnt = 3; + yyerrorhit = 1; + } else if (this.YYNOERRORRECOVERY) { + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + this.yy_syntax_error(yymajor, yyminor); + this.yy_destructor(yymajor, yyminor); + yymajor = this.YYNOCODE; + } else { /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if (this.yyerrcnt <= 0) { + this.yy_syntax_error(yymajor, yyminor); + } + this.yyerrcnt = 3; + this.yy_destructor(yymajor, yyminor); + if (yyendofinput) { + this.yy_parse_failed(); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + } + yymajor = this.YYNOCODE; + } + } + } while (yymajor != this.YYNOCODE && this.yyidx > 0); + + if (this.yyTraceCallback) { + var remainingTokens = []; + for (var i = 1; i <= this.yyidx; i++) { + remainingTokens.push(this.yyTokenName[this.yystack[i].major]); + } + this.trace("Return. Stack=[" + remainingTokens.join(" ") + "]"); + } +} + +this.init(); + +} // function Parse() diff --git a/main.js b/main.js new file mode 100644 index 0000000..08e1989 --- /dev/null +++ b/main.js @@ -0,0 +1,54 @@ +/** + * Created by Aleksey Chichenkov on 1/28/19. + */ + +var js_beautify = require("js-beautify"); +var args = require("args-parser")(process.argv); +var fs = require("fs"); +var exec = require('child_process').exec; + +var program_path = "./lemon-src/lemon-js"; +var parser_path = "parsers/filters/"; +var file_name = "parser.y"; +var temp_file_name = "temp_parser.y"; + + +var update_parser_y = function () { + var source_parser_y = fs.readFileSync(parser_path + file_name, "utf8"); + + var result = /&&.*?REPLACER\{(.*?)\}&&/gm.exec(source_parser_y); + if(result) { + var file_path = result[1]; + var process_code = fs.readFileSync(file_path, "utf8"); + + source_parser_y = source_parser_y.replace(/&&.*?REPLACER\{(.*?)\}&&/gm, process_code); + + fs.writeFileSync(parser_path + temp_file_name, source_parser_y); + } +}; + +var post_process_parser = function () { + var out_js = fs.readFileSync(parser_path + "temp_parser.js", "utf8"); + out_js = js_beautify(out_js, {indent_size: 4, space_in_empty_paren: true}); + fs.writeFileSync(parser_path + "parser.js", out_js); + + var temp_parser_out = fs.readFileSync(parser_path + "temp_parser.out", "utf8"); + fs.writeFileSync(parser_path + "parser.out", temp_parser_out); +}; + +var start = function () { + update_parser_y(); + + exec(program_path + " " + parser_path + temp_file_name + " -l", function(err, stdout, stderr) { + err && console.log("ERROR: ", err); + err && process.exit(1); + + post_process_parser(); + + fs.unlinkSync(parser_path + temp_file_name); + fs.unlinkSync(parser_path + "temp_parser.js"); + fs.unlinkSync(parser_path + "temp_parser.out"); + }); +}; + +start(); \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..12a31c9 --- /dev/null +++ b/package-lock.json @@ -0,0 +1,223 @@ +{ + "name": "lemon-js-generator", + "requires": true, + "lockfileVersion": 1, + "dependencies": { + "@types/node": { + "version": "10.12.18", + "resolved": "https://registry.npmjs.org/@types/node/-/node-10.12.18.tgz", + "integrity": "sha512-fh+pAqt4xRzPfqA6eh3Z2y6fyZavRIumvjhaCL753+TVkGKGhpPeyrJG2JftD0T9q4GF00KjefsQ+PQNDdWQaQ==" + }, + "@types/semver": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/@types/semver/-/semver-5.5.0.tgz", + "integrity": "sha512-41qEJgBH/TWgo5NFSvBCJ1qkoi3Q6ONSF2avrHq1LVEZfYpdHmj0y9SuTK+u9ZhG1sYQKBL1AWXKyLWP4RaUoQ==" + }, + "abbrev": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", + "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==" + }, + "args-parser": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/args-parser/-/args-parser-1.1.0.tgz", + "integrity": "sha1-YlO/zWlNJ5/mPqr9eNYo0UoF/6k=" + }, + "balanced-match": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", + "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=" + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "commander": { + "version": "2.19.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.19.0.tgz", + "integrity": "sha512-6tvAOO+D6OENvRAh524Dh9jcfKTYDQAqvqezbCW82xj5X0pSrcpxtvRKHLG0yBY6SD7PSDrJaj+0AiOcKVd1Xg==" + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=" + }, + "config-chain": { + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/config-chain/-/config-chain-1.1.12.tgz", + "integrity": "sha512-a1eOIcu8+7lUInge4Rpf/n4Krkf3Dd9lqhljRzII1/Zno/kRtUWnznPO3jOKBmTEktkt3fkxisUcivoj0ebzoA==", + "requires": { + "ini": "^1.3.4", + "proto-list": "~1.2.1" + } + }, + "editorconfig": { + "version": "0.15.2", + "resolved": "https://registry.npmjs.org/editorconfig/-/editorconfig-0.15.2.tgz", + "integrity": "sha512-GWjSI19PVJAM9IZRGOS+YKI8LN+/sjkSjNyvxL5ucqP9/IqtYNXBaQ/6c/hkPNYQHyOHra2KoXZI/JVpuqwmcQ==", + "requires": { + "@types/node": "^10.11.7", + "@types/semver": "^5.5.0", + "commander": "^2.19.0", + "lru-cache": "^4.1.3", + "semver": "^5.6.0", + "sigmund": "^1.0.1" + } + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha1-FQStJSMVjKpA20onh8sBQRmU6k8=" + }, + "glob": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.3.tgz", + "integrity": "sha512-vcfuiIxogLV4DlGBHIUOwI0IbrJ8HWPc4MU7HzviGeNho/UJDfi6B5p3sHeWIQ0KGIU0Jpxi5ZHxemQfLkkAwQ==", + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha1-Sb1jMdfQLQwJvJEKEHW6gWW1bfk=", + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "ini": { + "version": "1.3.5", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.5.tgz", + "integrity": "sha512-RZY5huIKCMRWDUqZlEi72f/lmXKMvuszcMBduliQ3nnWbx9X/ZBQO7DijMEYS9EhHBb2qacRUMtC7svLwe0lcw==" + }, + "js-beautify": { + "version": "1.8.9", + "resolved": "https://registry.npmjs.org/js-beautify/-/js-beautify-1.8.9.tgz", + "integrity": "sha512-MwPmLywK9RSX0SPsUJjN7i+RQY9w/yC17Lbrq9ViEefpLRgqAR2BgrMN2AbifkUuhDV8tRauLhLda/9+bE0YQA==", + "requires": { + "config-chain": "^1.1.12", + "editorconfig": "^0.15.2", + "glob": "^7.1.3", + "mkdirp": "~0.5.0", + "nopt": "~4.0.1" + } + }, + "lru-cache": { + "version": "4.1.5", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.5.tgz", + "integrity": "sha512-sWZlbEP2OsHNkXrMl5GYk/jKk70MBng6UU4YI/qGDYbgf6YbP4EvmqISbXCoJiRKs+1bSpFHVgQxvJ17F2li5g==", + "requires": { + "pseudomap": "^1.0.2", + "yallist": "^2.1.2" + } + }, + "minimatch": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz", + "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==", + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", + "integrity": "sha1-hX/Kv8M5fSYluCKCYuhqp6ARsF0=" + }, + "mkdirp": { + "version": "0.5.1", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.1.tgz", + "integrity": "sha1-MAV0OOrGz3+MR2fzhkjWaX11yQM=", + "requires": { + "minimist": "0.0.8" + } + }, + "nopt": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/nopt/-/nopt-4.0.1.tgz", + "integrity": "sha1-0NRoWv1UFRk8jHUFYC0NF81kR00=", + "requires": { + "abbrev": "1", + "osenv": "^0.1.4" + } + }, + "once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha1-WDsap3WWHUsROsF9nFC6753Xa9E=", + "requires": { + "wrappy": "1" + } + }, + "os-homedir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-homedir/-/os-homedir-1.0.2.tgz", + "integrity": "sha1-/7xJiDNuDoM94MFox+8VISGqf7M=" + }, + "os-tmpdir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=" + }, + "osenv": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/osenv/-/osenv-0.1.5.tgz", + "integrity": "sha512-0CWcCECdMVc2Rw3U5w9ZjqX6ga6ubk1xDVKxtBQPK7wis/0F2r9T6k4ydGYhecl7YUBxBVxhL5oisPsNxAPe2g==", + "requires": { + "os-homedir": "^1.0.0", + "os-tmpdir": "^1.0.0" + } + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha1-F0uSaHNVNP+8es5r9TpanhtcX18=" + }, + "proto-list": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/proto-list/-/proto-list-1.2.4.tgz", + "integrity": "sha1-IS1b/hMYMGpCD2QCuOJv85ZHqEk=" + }, + "pseudomap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", + "integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM=" + }, + "semver": { + "version": "5.6.0", + "resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz", + "integrity": "sha512-RS9R6R35NYgQn++fkDWaOmqGoj4Ek9gGs+DPxNUZKuwE183xjJroKvyo1IzVFeXvUrvmALy6FWD5xrdJT25gMg==" + }, + "sigmund": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/sigmund/-/sigmund-1.0.1.tgz", + "integrity": "sha1-P/IfGYytIXX587eBhT/ZTQ0ZtZA=" + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" + }, + "yallist": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-2.1.2.tgz", + "integrity": "sha1-HBH5IY8HYImkfdUS+TxmmaaoHVI=" + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..33ef518 --- /dev/null +++ b/package.json @@ -0,0 +1,14 @@ +{ + "name": "lemon-js-generator", + "requires": true, + "lockfileVersion": 1, + "license": "MIT", + "author": { + "name": "chichenkov", + "email": "rolahd@yandex.ru" + }, + "dependencies": { + "args-parser": "^1.1.0", + "js-beautify": "^1.8.9" + } +} diff --git a/parsers/filters/lexer.js b/parsers/filters/lexer.js new file mode 100644 index 0000000..0badc26 --- /dev/null +++ b/parsers/filters/lexer.js @@ -0,0 +1,1922 @@ +/* Generated by re2c 1.0.3 on Fri Jan 25 16:37:52 2019 */ +var types = [ + "LSB", + "RSB", + "LCB", + "RCB", + "COLON", + "COMMA", + "DOT", + "REM", + "GT", + "GTE", + "LT", + "LTE", + "EQ", + "NEQ", + "LIKE", + "NLIKE", + "AND", + "OR", + "NOT", + "ADDRESS", + "TIME", + "TIMEDIFF", + "INTEGER_LITERAL", + "FLOAT_LITERAL", + "BOOL_LITERAL", + "ID" +]; + +var errors = { + "-2": "not found close quote or singleQuote", + "-1": "not found any lexemes or errors or anything else", + "0": "success", + "1": "found unknown symbol" +}; + +var Lexer = function(_string) { + this._last_found_lexeme = { + error: -1 + }; + this._end = false; + this._error = false; + this._string = _string; + this._state = 1; + this._yy_char = null; + this._yy_lex_start = 0; + this._yy_cursor = 0; + this._yy_marker = 0; + this._yy_accept = 0; +}; + +Lexer.prototype = { + types: types, + errors: errors, + _notFoundCloseQuote: function() { + this._error = true; + this._last_found_lexeme = { + error: 2, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log(print_f("LEXER: Not found close quote start: %s", this._yy_cursor)); + }, + _unknownSymbol: function() { + this._error = true; + this._last_found_lexeme = { + error: 1, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log(print_f("LEXER: Found unknown symbol on position: %s", this._yy_cursor)); + }, + _foundLexeme: function(_lexeme) { + console.log(print_f("LEXER: found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); + this._last_found_lexeme = { + error: 0, + lexeme: _lexeme, + start: this._yy_lex_start, + end: this._yy_cursor + }; + }, + _endOfString: function() { + console.log(print_f("LEXER: search end\n")); + this._end = true; + this._last_found_lexeme = { + error: -2 + }; + }, + _searchString: function() { + var _quote = this._string[this._yy_cursor - 1]; + var found_back_slash = false; + while (this._yy_cursor < this._string.length) { + this._yy_char = this._string[this._yy_cursor]; + if (_quote == '"') { + switch (this._yy_char) { + case "\\": + found_back_slash = true; + break; + case '"': + if (!found_back_slash) { + this._yy_cursor++; + this._foundLexeme("STRING_LITERAL"); + return; + } + found_back_slash = false; + break; + } + } else if (_quote == "'") { + switch (this._yy_char) { + case "\\": + found_back_slash = true; + break; + case "'": + if (!found_back_slash) { + this._yy_cursor++; + this._foundLexeme("STRING_LITERAL"); + return; + } + found_back_slash = false; + break; + } + } + this._yy_cursor++; + } + + this._notFoundCloseQuote(); + }, + _set_next: function() { + this._yy_accept = 0; + this._state = 1; + this._yy_lex_start = this._yy_cursor; + this._yy_marker = this._yy_cursor; + }, + next: function() { + if (this._end || this._error) return null; + + this.search(); + return this.token(); + }, + token: function() { + return this._last_found_lexeme; + }, + search: function() { + if (this._end) return false; + + while (true) { + switch (this._state) { + case 1: + this._yy_char = this._string[this._yy_cursor]; + (function() { + switch (this._yy_char) { + case undefined: + this._state = 2; + break; + case '\t': + case '\n': + case '\r': + case ' ': + this._state = 6; + break; + case '!': + this._state = 8; + break; + case '"': + case '\'': + this._state = 9; + break; + case '%': + this._state = 11; + break; + case '(': + this._state = 13; + break; + case ')': + this._state = 15; + break; + case ',': + this._state = 17; + break; + case '-': + this._state = 19; + break; + case '.': + this._state = 20; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 22; + break; + case ':': + this._state = 25; + break; + case '<': + this._state = 27; + break; + case '=': + this._state = 29; + break; + case '>': + this._state = 30; + break; + case 'A': + this._state = 32; + break; + case 'B': + case 'C': + case 'D': + case 'E': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'M': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'b': + case 'c': + case 'd': + case 'e': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'm': + case 'p': + case 'q': + case 'r': + case 's': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + case 'F': + case 'f': + this._state = 36; + break; + case 'L': + case 'l': + this._state = 37; + break; + case 'N': + case 'n': + this._state = 38; + break; + case 'O': + case 'o': + this._state = 39; + break; + case 'T': + this._state = 40; + break; + case '[': + this._state = 41; + break; + case ']': + this._state = 43; + break; + case 'a': + this._state = 45; + break; + case 't': + this._state = 46; + break; + default: + this._state = 4; + break; + } + }.bind(this))(); + break; + case 2: + ++this._yy_cursor; { + this._endOfString(); + return; + } + case 4: + ++this._yy_cursor; + case 5: + { + this._unknownSymbol();this._set_next(); + return; + } + case 6: + ++this._yy_cursor; { + this._set_next(); + break; + } + case 8: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '=': + this._state = 47; + break; + default: + this._state = 5; + break; + } + }.bind(this))(); + break; + case 9: + ++this._yy_cursor; { + this._state = 100000000; + break; + } + case 11: + ++this._yy_cursor; { + this._foundLexeme("REM"); + this._set_next(); + return; + } + case 13: + ++this._yy_cursor; { + this._foundLexeme("LCB"); + this._set_next(); + return; + } + case 15: + ++this._yy_cursor; { + this._foundLexeme("RCB"); + this._set_next(); + return; + } + case 17: + ++this._yy_cursor; { + this._foundLexeme("COMMA"); + this._set_next(); + return; + } + case 19: + this._yy_accept = 0; + this._yy_char = this._string[(this._yy_marker = ++this._yy_cursor)]; + (function() { + switch (this._yy_char) { + case '.': + this._state = 49; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 22; + break; + default: + this._state = 5; + break; + } + }.bind(this))(); + break; + case 20: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 51; + break; + default: + this._state = 21; + break; + } + }.bind(this))(); + break; + case 21: + { + this._foundLexeme("DOT");this._set_next(); + return; + } + case 22: + this._yy_accept = 1; + this._yy_char = this._string[(this._yy_marker = ++this._yy_cursor)]; + (function() { + switch (this._yy_char) { + case '.': + this._state = 49; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 22; + break; + default: + this._state = 24; + break; + } + }.bind(this))(); + break; + case 24: + { + this._foundLexeme("INTEGER_LITERAL");this._set_next(); + return; + } + case 25: + ++this._yy_cursor; { + this._foundLexeme("COLON"); + this._set_next(); + return; + } + case 27: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '=': + this._state = 54; + break; + default: + this._state = 28; + break; + } + }.bind(this))(); + break; + case 28: + { + this._foundLexeme("LT");this._set_next(); + return; + } + case 29: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '=': + this._state = 56; + break; + default: + this._state = 5; + break; + } + }.bind(this))(); + break; + case 30: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '=': + this._state = 58; + break; + default: + this._state = 31; + break; + } + }.bind(this))(); + break; + case 31: + { + this._foundLexeme("GT");this._set_next(); + return; + } + case 32: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'N': + case 'n': + this._state = 60; + break; + case 'd': + this._state = 61; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 33: + { + this._foundLexeme("ID");this._set_next(); + return; + } + case 34: + this._yy_char = this._string[++this._yy_cursor]; + case 35: + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 33; + break; + } + }.bind(this))(); + break; + case 36: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'A': + case 'a': + this._state = 62; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 37: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'I': + case 'i': + this._state = 63; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 38: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'L': + case 'l': + this._state = 64; + break; + case 'O': + case 'o': + this._state = 65; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 39: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'R': + case 'r': + this._state = 66; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 40: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'R': + case 'r': + this._state = 68; + break; + case 'i': + this._state = 69; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 41: + ++this._yy_cursor; { + this._foundLexeme("LSB"); + this._set_next(); + return; + } + case 43: + ++this._yy_cursor; { + this._foundLexeme("RSB"); + this._set_next(); + return; + } + case 45: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'N': + case 'n': + this._state = 60; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 46: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'R': + case 'r': + this._state = 68; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 47: + ++this._yy_cursor; { + this._foundLexeme("NEQ"); + this._set_next(); + return; + } + case 49: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 51; + break; + default: + this._state = 50; + break; + } + }.bind(this))(); + break; + case 50: + this._yy_cursor = this._yy_marker; + switch (this._yy_accept) { + case 0: + this._state = 5; + break; + case 1: + this._state = 24; + break; + default: + this._state = 53; + break; + } + case 51: + this._yy_accept = 2; + this._yy_char = this._string[(this._yy_marker = ++this._yy_cursor)]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 51; + break; + case 'e': + this._state = 70; + break; + default: + this._state = 53; + break; + } + }.bind(this))(); + break; + case 53: + { + this._foundLexeme("FLOAT_LITERAL");this._set_next(); + return; + } + case 54: + ++this._yy_cursor; { + this._foundLexeme("LTE"); + this._set_next(); + return; + } + case 56: + ++this._yy_cursor; { + this._foundLexeme("EQ"); + this._set_next(); + return; + } + case 58: + ++this._yy_cursor; { + this._foundLexeme("GTE"); + this._set_next(); + return; + } + case 60: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'D': + case 'd': + this._state = 71; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 61: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'd': + this._state = 73; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 62: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'L': + case 'l': + this._state = 74; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 63: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'K': + case 'k': + this._state = 75; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 64: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'I': + case 'i': + this._state = 76; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 65: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'T': + case 't': + this._state = 77; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 66: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 67; + break; + } + }.bind(this))(); + break; + case 67: + { + this._foundLexeme("OR");this._set_next(); + return; + } + case 68: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'U': + case 'u': + this._state = 79; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 69: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'm': + this._state = 80; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 70: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '-': + this._state = 81; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 82; + break; + default: + this._state = 50; + break; + } + }.bind(this))(); + break; + case 71: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 72; + break; + } + }.bind(this))(); + break; + case 72: + { + this._foundLexeme("AND");this._set_next(); + return; + } + case 73: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'r': + this._state = 84; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 74: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'S': + case 's': + this._state = 79; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 75: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'E': + case 'e': + this._state = 85; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 76: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'K': + case 'k': + this._state = 87; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 77: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 78; + break; + } + }.bind(this))(); + break; + case 78: + { + this._foundLexeme("NOT");this._set_next(); + return; + } + case 79: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'E': + case 'e': + this._state = 88; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 80: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'e': + this._state = 90; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 81: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 82; + break; + default: + this._state = 50; + break; + } + }.bind(this))(); + break; + case 82: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this._state = 82; + break; + default: + this._state = 53; + break; + } + }.bind(this))(); + break; + case 84: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'e': + this._state = 92; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 85: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 86; + break; + } + }.bind(this))(); + break; + case 86: + { + this._foundLexeme("LIKE");this._set_next(); + return; + } + case 87: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'E': + case 'e': + this._state = 93; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 88: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 89; + break; + } + }.bind(this))(); + break; + case 89: + { + this._foundLexeme("BOOL_LITERAL");this._set_next(); + return; + } + case 90: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + case 'D': + this._state = 95; + break; + default: + this._state = 91; + break; + } + }.bind(this))(); + break; + case 91: + { + this._foundLexeme("TIME");this._set_next(); + return; + } + case 92: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 's': + this._state = 96; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 93: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 94; + break; + } + }.bind(this))(); + break; + case 94: + { + this._foundLexeme("NLIKE");this._set_next(); + return; + } + case 95: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'i': + this._state = 97; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 96: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 's': + this._state = 98; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 97: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'f': + this._state = 100; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 98: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 99; + break; + } + }.bind(this))(); + break; + case 99: + { + this._foundLexeme("ADDRESS");this._set_next(); + return; + } + case 100: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case 'f': + this._state = 101; + break; + default: + this._state = 35; + break; + } + }.bind(this))(); + break; + case 101: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + this._state = 34; + break; + default: + this._state = 102; + break; + } + }.bind(this))(); + break; + case 102: + { + this._foundLexeme("TIMEDIFF");this._set_next(); + return; + } + case 100000000: + { + this._searchString();this._set_next(); + return; + } + } + } + } +}; + + +var print_f = function() { + var r_str = ""; + var next = arguments[0]; + + var rx = /(%[a-zA-Z]{1})/; + var a = 1, + match; + while (match = rx.exec(next)) { + var prev = next.substring(0, match.index); + var macro = next.substring(match.index + 1, match.index + 2); + next = next.substring(match.index + 2, next.length); + r_str += prev; + + var arg = arguments[a]; + + if (arg !== undefined) { + switch (macro) { + case "s": + r_str += arg.toString(); + break; + case "i": + r_str += parseInt(arg); + break; + case "f": + r_str += parseFloat(arg); + break; + } + } else { + r_str += "%" + macro; + } + a++; + } + + r_str += next; + + return r_str; +}; +module.exports = Lexer \ No newline at end of file diff --git a/parsers/filters/parser.js b/parsers/filters/parser.js new file mode 100644 index 0000000..dafe5e6 --- /dev/null +++ b/parsers/filters/parser.js @@ -0,0 +1,1152 @@ +/* + ** 2000-05-29 + ** + ** The author disclaims copyright to this source code. In place of + ** a legal notice, here is a blessing: + ** + ** May you do good and not evil. + ** May you find forgiveness for yourself and forgive others. + ** May you share freely, never taking more than you give. + ** + ** Based on SQLite distribution v3.17.0 + ** Adopted for JavaScript by Artem Butusov + ** + ************************************************************************* + ** Driver template for the LEMON parser generator. + ** + ** The "lemon" program processes an LALR(1) input grammar file, then uses + ** this template to construct a parser. The "lemon" program inserts text + ** at each "%%" line. Also, any "P-a-r-s-e" identifer prefix (without the + ** interstitial "-" characters) contained in this template is changed into + ** the value of the %name directive from the grammar. Otherwise, the content + ** of this template is copied straight through into the generate parser + ** source file. + ** + ** The following is the concatenation of all %include directives from the + ** input grammar file: + */ +/************ Begin %include sections from the grammar ************************/ + +// include something +/**************** End of %include directives **********************************/ +function Parser() { + /* These constants specify the various numeric values for terminal symbols + ** in a format understandable to "makeheaders". + ***************** Begin makeheaders token definitions *************************/ + this.TOKEN_OR = 1; + this.TOKEN_AND = 2; + this.TOKEN_NOT = 3; + this.TOKEN_INTEGER_LITERAL = 4; + this.TOKEN_STRING_LITERAL = 5; + this.TOKEN_ID = 6; + this.TOKEN_EQ = 7; + this.TOKEN_LCB = 8; + this.TOKEN_RCB = 9; + this.TOKEN_COMMA = 10; + this.TOKEN_ADDRESS = 11; + this.TOKEN_LSB = 12; + this.TOKEN_RSB = 13; + /**************** End makeheaders token definitions ***************************/ + + /* The next sections is a series of control #defines. + ** various aspects of the generated parser. + ** YYNOCODE is a number of type YYCODETYPE that is not used for + ** any terminal or nonterminal symbol. + ** YYFALLBACK If defined, this indicates that one or more tokens + ** (also known as: "terminal symbols") have fall-back + ** values which should be used if the original symbol + ** would not parse. This permits keywords to sometimes + ** be used as identifiers, for example. + ** YYSTACKDEPTH is the maximum depth of the parser's stack. If + ** zero the stack is dynamically sized using realloc() + ** YYERRORSYMBOL is the code number of the error symbol. If not + ** defined, then do no error processing. + ** YYNSTATE the combined number of states. + ** YYNRULE the number of rules in the grammar + ** YY_MAX_SHIFT Maximum value for shift actions + ** YY_MIN_SHIFTREDUCE Minimum value for shift-reduce actions + ** YY_MAX_SHIFTREDUCE Maximum value for shift-reduce actions + ** YY_MIN_REDUCE Maximum value for reduce actions + ** YY_ERROR_ACTION The yy_action[] code for syntax error + ** YY_ACCEPT_ACTION The yy_action[] code for accept + ** YY_NO_ACTION The yy_action[] code for no-op + */ + /************* Begin control #defines *****************************************/ + this.YYNOCODE = 27; + this.YYSTACKDEPTH = 100; + this.YYFALLBACK = false; + this.YYNSTATE = 12; + this.YYNRULE = 17; + this.YY_MAX_SHIFT = 11; + this.YY_MIN_SHIFTREDUCE = 26; + this.YY_MAX_SHIFTREDUCE = 42; + this.YY_MIN_REDUCE = 43; + this.YY_MAX_REDUCE = 59; + this.YY_ERROR_ACTION = 60; + this.YY_ACCEPT_ACTION = 61; + this.YY_NO_ACTION = 62; + /************* End control #defines *******************************************/ + + /* Define the yytestcase() macro to be a no-op if is not already defined + ** otherwise. + ** + ** Applications can choose to define yytestcase() in the %include section + ** to a macro that can assist in verifying code coverage. For production + ** code the yytestcase() macro should be turned off. But it is useful + ** for testing. + */ + if (!this.yytestcase) { + this.yytestcase = function() {}; + } + + + /* Next are the tables used to determine what action to take based on the + ** current state and lookahead token. These tables are used to implement + ** functions that take a state number and lookahead value and return an + ** action integer. + ** + ** Suppose the action integer is N. Then the action is determined as + ** follows + ** + ** 0 <= N <= YY_MAX_SHIFT Shift N. That is, push the lookahead + ** token onto the stack and goto state N. + ** + ** N between YY_MIN_SHIFTREDUCE Shift to an arbitrary state then + ** and YY_MAX_SHIFTREDUCE reduce by rule N-YY_MIN_SHIFTREDUCE. + ** + ** N between YY_MIN_REDUCE Reduce by rule N-YY_MIN_REDUCE + ** and YY_MAX_REDUCE + ** + ** N == YY_ERROR_ACTION A syntax error has occurred. + ** + ** N == YY_ACCEPT_ACTION The parser accepts its input. + ** + ** N == YY_NO_ACTION No such action. Denotes unused + ** slots in the yy_action[] table. + ** + ** The action table is constructed as a single large table named yy_action[]. + ** Given state S and lookahead X, the action is computed as either: + ** + ** (A) N = yy_action[ yy_shift_ofst[S] + X ] + ** (B) N = yy_default[S] + ** + ** The (A) formula is preferred. The B formula is used instead if: + ** (1) The yy_shift_ofst[S]+X value is out of range, or + ** (2) yy_lookahead[yy_shift_ofst[S]+X] is not equal to X, or + ** (3) yy_shift_ofst[S] equal YY_SHIFT_USE_DFLT. + ** (Implementation note: YY_SHIFT_USE_DFLT is chosen so that + ** YY_SHIFT_USE_DFLT+X will be out of range for all possible lookaheads X. + ** Hence only tests (1) and (2) need to be evaluated.) + ** + ** The formulas above are for computing the action when the lookahead is + ** a terminal symbol. If the lookahead is a non-terminal (as occurs after + ** a reduce action) then the yy_reduce_ofst[] array is used in place of + ** the yy_shift_ofst[] array and YY_REDUCE_USE_DFLT is used in place of + ** YY_SHIFT_USE_DFLT. + ** + ** The following are the tables generated in this section: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + ** + *********** Begin parsing tables **********************************************/ + this.yy_action = [ + /* 0 */ + 61, 6, 27, 2, 30, 11, 34, 35, 5, 10, + /* 10 */ + 36, 30, 11, 34, 35, 33, 9, 7, 30, 11, + /* 20 */ + 34, 35, 28, 32, 29, 31, 43, 1, 2, 37, + /* 30 */ + 42, 41, 38, 8, 4, 3, + ]; + this.yy_lookahead = [ + /* 0 */ + 15, 16, 4, 2, 19, 20, 21, 22, 16, 11, + /* 10 */ + 9, 19, 20, 21, 22, 16, 23, 24, 19, 20, + /* 20 */ + 21, 22, 17, 18, 5, 6, 0, 8, 2, 5, + /* 30 */ + 25, 13, 5, 10, 12, 7, + ]; + this.YY_SHIFT_USE_DFLT = 36; + this.YY_SHIFT_COUNT = 11; + this.YY_SHIFT_MIN = -2; + this.YY_SHIFT_MAX = 28; + this.yy_shift_ofst = [ + /* 0 */ + 19, 19, 19, -2, 24, 1, 26, 18, 27, 23, + /* 10 */ + 22, 28, + ]; + this.YY_REDUCE_USE_DFLT = -16; + this.YY_REDUCE_COUNT = 4; + this.YY_REDUCE_MIN = -15; + this.YY_REDUCE_MAX = 5; + this.yy_reduce_ofst = [ + /* 0 */ + -15, -8, -1, 5, -7, + ]; + this.yy_default = [ + /* 0 */ + 60, 60, 60, 60, 57, 60, 60, 60, 60, 56, + /* 10 */ + 60, 60, + ]; + /********** End of lemon-generated parsing tables *****************************/ + + /* The next table maps tokens (terminal symbols) into fallback tokens. + ** If a construct like the following: + ** + ** %fallback ID X Y Z. + ** + ** appears in the grammar, then ID becomes a fallback token for X, Y, + ** and Z. Whenever one of the tokens X, Y, or Z is input to the parser + ** but it does not parse, the type of the token is changed to ID and + ** the parse is retried before an error is thrown. + ** + ** This feature can be used, for example, to cause some keywords in a language + ** to revert to identifiers if they keyword does not apply in the context where + ** it appears. + */ + this.yyFallback = []; + + /* The following structure represents a single element of the + ** parser's stack. Information stored includes: + ** + ** + The state number for the parser at this level of the stack. + ** + ** + The value of the token stored at this level of the stack. + ** (In other words, the "major" token.) + ** + ** + The semantic value stored at this level of the stack. This is + ** the information used by the action routines in the grammar. + ** It is sometimes called the "minor" token. + ** + ** After the "shift" half of a SHIFTREDUCE action, the stateno field + ** actually contains the reduce action for the second half of the + ** SHIFTREDUCE. + */ + //{ + // stateno, /* The state-number, or reduce action in SHIFTREDUCE */ + // major, /* The major token value. This is the code + // ** number for the token at this stack level */ + // minor, /* The user-supplied minor token value. This + // ** is the value of the token */ + //} + + /* The state of the parser is completely contained in an instance of + ** the following structure */ + this.yyhwm = 0; /* High-water mark of the stack */ + this.yyerrcnt = -1; /* Shifts left before out of the error */ + this.yystack = null; /* The parser's stack */ + this.yyidx = -1; /* Stack index of current element in the stack */ + + this.yyTraceCallback = null; + this.yyTracePrompt = ""; + + /* + ** Turn parser tracing on by giving a stream to which to write the trace + ** and a prompt to preface each trace message. Tracing is turned off + ** by making either argument NULL + ** + ** Inputs: + **
    + **
  • A callback to which trace output should be written. + ** If NULL, then tracing is turned off. + **
  • A prefix string written at the beginning of every + ** line of trace output. Default is "". + **
+ ** + ** Outputs: + ** None. + */ + this.setTraceCallback = function(callback, prompt) { + this.yyTraceCallback = callback; + this.yyTracePrompt = prompt || ""; + } + + this.trace = function(message) { + this.yyTraceCallback(this.yyTracePrompt + message + "\n"); + } + + /* For tracing shifts, the names of all terminals and nonterminals + ** are required. The following table supplies these names */ + this.yyTokenName = [ + "$", "OR", "AND", "NOT", + "INTEGER_LITERAL", "STRING_LITERAL", "ID", "EQ", + "LCB", "RCB", "COMMA", "ADDRESS", + "LSB", "RSB", "error", "main", + "expr", "integer", "literal", "string", + "id", "eq", "and", "address_literal_content", + "address_literal_content_or_empty", "address_literal", + ]; + + /* For tracing reduce actions, the names of all rules are required. + */ + this.yyRuleName = [ + /* 0 */ + "main ::= expr", + /* 1 */ + "integer ::= INTEGER_LITERAL", + /* 2 */ + "literal ::= integer", + /* 3 */ + "string ::= STRING_LITERAL", + /* 4 */ + "id ::= string", + /* 5 */ + "id ::= ID", + /* 6 */ + "eq ::= id EQ literal", + /* 7 */ + "and ::= expr AND expr", + /* 8 */ + "expr ::= eq", + /* 9 */ + "expr ::= and", + /* 10 */ + "expr ::= LCB expr RCB", + /* 11 */ + "address_literal_content ::= STRING_LITERAL", + /* 12 */ + "address_literal_content ::= address_literal_content COMMA STRING_LITERAL", + /* 13 */ + "address_literal_content_or_empty ::= address_literal_content", + /* 14 */ + "address_literal_content_or_empty ::=", + /* 15 */ + "address_literal ::= ADDRESS LSB address_literal_content_or_empty RSB", + /* 16 */ + "literal ::= address_literal", + ]; + /* + ** Try to increase the size of the parser stack. Return the number + ** of errors. Return 0 on success. + */ + this.yyGrowStack = function() { + // fix me: yystksz*2 + 100 + this.yystack.push({ + stateno: undefined, + major: undefined, + minor: undefined + }); + } + + /* Initialize a new parser that has already been allocated. + */ + this.init = function() { + this.yyhwm = 0; + this.yyerrcnt = -1; + this.yyidx = 0; + if (this.YYSTACKDEPTH <= 0) { + this.yystack = []; + this.yyGrowStack(); + } else { + this.yystack = new Array(this.YYSTACKDEPTH); + for (var i = 0; i < this.YYSTACKDEPTH; i++) { + this.yystack[i] = { + stateno: undefined, + major: undefined, + minor: undefined + }; + } + } + var yytos = this.yystack[0]; + yytos.stateno = 0; + yytos.major = 0; + } + + /* The following function deletes the "minor type" or semantic value + ** associated with a symbol. The symbol can be either a terminal + ** or nonterminal. "yymajor" is the symbol code, and "yypminor" is + ** a pointer to the value to be deleted. The code used to do the + ** deletions is derived from the %destructor and/or %token_destructor + ** directives of the input grammar. + */ + this.yy_destructor = function( + yymajor, /* Type code for object to destroy */ + yyminor /* The object to be destroyed */ + ) { + switch (yymajor) { + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are *not* used + ** inside the C code. + */ + /********* Begin destructor definitions ***************************************/ + /********* End destructor definitions *****************************************/ + default: + break; /* If no destructor action specified: do nothing */ + } + } + + /* + ** Pop the parser's stack once. + ** + ** If there is a destructor routine associated with the token which + ** is popped from the stack, then call it. + */ + this.yy_pop_parser_stack = function() { + // assert( pParser->yytos!=0 ); + // assert( pParser->yytos > pParser->yystack ); + var yytos = this.yystack[this.yyidx]; + + if (this.yyTraceCallback) { + this.trace("Popping " + this.yyTokenName[yytos.major]); + } + this.yy_destructor(yytos.major, yytos.minor); + + this.yyidx--; + } + + /* + ** Clear all secondary memory allocations from the parser + */ + this.finalize = function() { + while (this.yyidx > 0) { + this.yy_pop_parser_stack(); + } + this.yystack = null; + } + + /* + ** Return the peak depth of the stack for a parser. + */ + this.getStackPeak = function() { + return this.yyhwm; + } + + /* + ** Find the appropriate action for a parser given the terminal + ** look-ahead token iLookAhead. + */ + this.yy_find_shift_action = function( + iLookAhead /* The look-ahead token */ + ) { + var yytos = this.yystack[this.yyidx]; + var stateno = yytos.stateno; + + if (stateno >= this.YY_MIN_REDUCE) { + return stateno; + } + + // assert( stateno <= YY_SHIFT_COUNT ); + + do { + var i = this.yy_shift_ofst[stateno]; + // assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + if (i < 0 || i >= this.yy_action.length || this.yy_lookahead[i] != iLookAhead) { + if (this.YYFALLBACK) { + var iFallback; /* Fallback token */ + if ((iLookAhead < this.yyFallback.length) && + (iFallback = this.yyFallback[iLookAhead]) != 0 + ) { + if (this.yyTraceCallback) { + this.trace("FALLBACK " + this.yyTokenName[iLookAhead] + " => " + this.yyTokenName[iFallback]); + } + } + // assert( yyFallback[iFallback]==0 ); /* Fallback loop must terminate */ + iLookAhead = iFallback; + continue; + } + + if (this.YYWILDCARD) { + var j = i - iLookAhead + this.YYWILDCARD; + var cond1 = (this.YY_SHIFT_MIN + this.YYWILDCARD) < 0 ? j >= 0 : true; + var cond2 = (this.YY_SHIFT_MAX + this.YYWILDCARD) >= this.yy_action.length ? j < this.yy_action.length : true; + if (cond1 && cond2 && this.yy_lookahead[j] == this.YYWILDCARD && iLookAhead > 0) { + if (this.yyTraceCallback) { + this.trace("WILDCARD " + this.yyTokenName[iLookAhead] + " => " + this.yyTokenName[this.YYWILDCARD]); + } + return this.yy_action[j]; + } + } + + return this.yy_default[stateno]; + } else { + return this.yy_action[i]; + } + } while (true); + } + + /* + ** Find the appropriate action for a parser given the non-terminal + ** look-ahead token iLookAhead. + */ + this.yy_find_reduce_action = function( + stateno, /* Current state number */ + iLookAhead /* The look-ahead token */ + ) { + if (this.YYERRORSYMBOL) { + if (stateno > this.YY_REDUCE_COUNT) { + return this.yy_default[stateno]; + } + } else { + // assert( stateno<=YY_REDUCE_COUNT ); + } + + var i = this.yy_reduce_ofst[stateno]; + // assert( i!=YY_REDUCE_USE_DFLT ); + // assert( iLookAhead!=YYNOCODE ); + i += iLookAhead; + + if (this.YYERRORSYMBOL) { + if (i < 0 || i >= this.yy_action.length || this.yy_lookahead[i] != iLookAhead) { + return this.yy_default[stateno]; + } + } else { + // assert( i>=0 && i 0) { + this.yy_pop_parser_stack(); + } + /* Here code is inserted which will execute if the parser + ** stack every overflows */ + /******** Begin %stack_overflow code ******************************************/ + /******** End %stack_overflow code ********************************************/ + } + + /* + ** Print tracing information for a SHIFT action + */ + this.yyTraceShift = function(yyNewState) { + if (this.yyTraceCallback) { + var yytos = this.yystack[this.yyidx]; + if (yyNewState < this.YYNSTATE) { + this.trace("Shift '" + this.yyTokenName[yytos.major] + "', go to state " + yyNewState); + } else { + this.trace("Shift '" + this.yyTokenName[yytos.major] + "'"); + } + } + } + + /* + ** Perform a shift action. + */ + this.yy_shift = function( + yyNewState, /* The new state to shift in */ + yyMajor, /* The major token to shift in */ + yyMinor /* The minor token to shift in */ + ) { + this.yyidx++; + + if (this.yyidx > this.yyhwm) { + this.yyhwm++; + // assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack) ); + } + + if (this.YYSTACKDEPTH > 0) { + if (this.yyidx >= this.YYSTACKDEPTH) { + this.yyidx--; + this.yyStackOverflow(); + return; + } + } else { + if (this.yyidx >= this.yystack.length) { + this.yyGrowStack(); + } + } + + if (yyNewState > this.YY_MAX_SHIFT) { + yyNewState += this.YY_MIN_REDUCE - this.YY_MIN_SHIFTREDUCE; + } + + var yytos = this.yystack[this.yyidx]; + yytos.stateno = yyNewState; + yytos.major = yyMajor; + yytos.minor = yyMinor; + + this.yyTraceShift(yyNewState); + } + + /* The following table contains information about every rule that + ** is used during the reduce. + */ + //{ + // lhs, /* Symbol on the left-hand side of the rule */ + // nrhs, /* Number of right-hand side symbols in the rule */ + //} + this.yyRuleInfo = [{ + lhs: 15, + nrhs: 1 + }, + { + lhs: 17, + nrhs: 1 + }, + { + lhs: 18, + nrhs: 1 + }, + { + lhs: 19, + nrhs: 1 + }, + { + lhs: 20, + nrhs: 1 + }, + { + lhs: 20, + nrhs: 1 + }, + { + lhs: 21, + nrhs: 3 + }, + { + lhs: 22, + nrhs: 3 + }, + { + lhs: 16, + nrhs: 1 + }, + { + lhs: 16, + nrhs: 1 + }, + { + lhs: 16, + nrhs: 3 + }, + { + lhs: 23, + nrhs: 1 + }, + { + lhs: 23, + nrhs: 3 + }, + { + lhs: 24, + nrhs: 1 + }, + { + lhs: 24, + nrhs: 0 + }, + { + lhs: 25, + nrhs: 4 + }, + { + lhs: 18, + nrhs: 1 + }, + ]; + + /* + ** Perform a reduce action and the shift that must immediately + ** follow the reduce. + */ + this.yy_reduce = function( + yyruleno /* Number of the rule by which to reduce */ + ) { + var yymsp = this.yystack[this.yyidx]; /* The top of the parser's stack */ + + if (yyruleno < this.yyRuleName.length) { + var yysize = this.yyRuleInfo[yyruleno].nrhs; + var ruleName = this.yyRuleName[yyruleno]; + var newStateNo = this.yystack[this.yyidx - yysize].stateno; + if (this.yyTraceCallback) { + this.trace("Reduce [" + ruleName + "], go to state " + newStateNo + "."); + } + } + + /* Check that the stack is large enough to grow by a single entry + ** if the RHS of the rule is empty. This ensures that there is room + ** enough on the stack to push the LHS value */ + if (this.yyRuleInfo[yyruleno].nrhs == 0) { + if (this.yyidx > this.yyhwm) { + this.yyhwm++; + // assert( yypParser->yyhwm == (int)(yypParser->yytos - yypParser->yystack)); + } + if (this.YYSTACKDEPTH > 0) { + if (this.yyidx >= this.YYSTACKDEPTH - 1) { + this.yyStackOverflow(); + return; + } + } else { + if (this.yyidx >= this.yystack.length - 1) { + this.yyGrowStack(); + yymsp = this.yystack[this.yyidx]; + } + } + } + + var yylhsminor; + switch (yyruleno) { + /* Beginning here are the reduction cases. A typical example + ** follows: + ** case 0: + ** #line + ** { ... } // User supplied code + ** #line + ** break; + */ + /********** Begin reduce actions **********************************************/ + case 0: + /* main ::= expr */ { + _result.root_node = this.yystack[this.yyidx + 0].minor + } + break; + case 1: + /* integer ::= INTEGER_LITERAL */ { + yylhsminor = new Node({ + type: "INTEGER_LITERAL", + lexeme: this.yystack[this.yyidx + 0].minor.lexeme, + start: this.yystack[this.yyidx + 0].minor.start, + end: this.yystack[this.yyidx + 0].minor.end + }) + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 2: + /* literal ::= integer */ { + yylhsminor = new Node({ + type: "literal", + children: [this.yystack[this.yyidx + 0].minor] + }) + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 3: + /* string ::= STRING_LITERAL */ { + yylhsminor = new Node({ + type: "STRING_LITERAL", + lexeme: this.yystack[this.yyidx + 0].minor.lexeme, + start: this.yystack[this.yyidx + 0].minor.start, + end: this.yystack[this.yyidx + 0].minor.end + }) + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 4: + /* id ::= string */ { + yylhsminor = new Node({ + type: "id", + children: [this.yystack[this.yyidx + 0].minor] + }); + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 5: + /* id ::= ID */ { + yylhsminor = new Node({ + type: "ID", + lexeme: this.yystack[this.yyidx + 0].minor.lexeme, + start: this.yystack[this.yyidx + 0].minor.start, + end: this.yystack[this.yyidx + 0].minor.end + }) + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 6: + /* eq ::= id EQ literal */ { + yylhsminor = new Node({ + type: "eq", + children: [ + this.yystack[this.yyidx + -2].minor, + new Node({ + type: "EQ", + lexeme: this.yystack[this.yyidx + -1].minor.lexeme, + start: this.yystack[this.yyidx + -1].minor.start, + end: this.yystack[this.yyidx + -1].minor.end + }), + this.yystack[this.yyidx + 0].minor + ] + }) + } + this.yystack[this.yyidx + -2].minor = yylhsminor; + break; + case 7: + /* and ::= expr AND expr */ { + yylhsminor = new Node({ + type: "and", + children: [ + this.yystack[this.yyidx + -2].minor, + this.yystack[this.yyidx + 0].minor + ] + }) + } + this.yystack[this.yyidx + -2].minor = yylhsminor; + break; + case 8: + /* expr ::= eq */ { + yylhsminor = new Node({ + type: "expr", + children: [this.yystack[this.yyidx + 0].minor] + }) + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 9: + /* expr ::= and */ + case 13: + /* address_literal_content_or_empty ::= address_literal_content */ this.yytestcase(yyruleno == 13); { + yylhsminor = this.yystack[this.yyidx + 0].minor; + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 10: + /* expr ::= LCB expr RCB */ { + this.yystack[this.yyidx + -2].minor = this.yystack[this.yyidx + -1].minor; + } + break; + case 11: + /* address_literal_content ::= STRING_LITERAL */ { + yylhsminor = new Node({ + children: [ + new Node({ + type: "STRING_LITERAL", + lexeme: this.yystack[this.yyidx + 0].minor.lexeme, + start: this.yystack[this.yyidx + 0].minor.start, + end: this.yystack[this.yyidx + 0].minor.end + }) + ] + }); + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + case 12: + /* address_literal_content ::= address_literal_content COMMA STRING_LITERAL */ { + this.yystack[this.yyidx + -2].minor.add(new Node({ + type: "STRING_LITERAL", + lexeme: this.yystack[this.yyidx + 0].minor.lexeme, + start: this.yystack[this.yyidx + 0].minor.start, + end: this.yystack[this.yyidx + 0].minor.end + })); + yylhsminor = this.yystack[this.yyidx + -2].minor; + } + this.yystack[this.yyidx + -2].minor = yylhsminor; + break; + case 14: + /* address_literal_content_or_empty ::= */ { + this.yystack[this.yyidx + 1].minor = new Node({ + type: "address_literal_content" + }); + } + break; + case 15: + /* address_literal ::= ADDRESS LSB address_literal_content_or_empty RSB */ { + this.yystack[this.yyidx + -3].minor = new Node({ + type: "address_literal", + children: this.yystack[this.yyidx + -1].minor.children + }); + } + break; + case 16: + /* literal ::= address_literal */ { + yylhsminor = new Node({ + type: "literal", + children: [this.yystack[this.yyidx + 0].minor] + }); + } + this.yystack[this.yyidx + 0].minor = yylhsminor; + break; + default: + break; + /********** End reduce actions ************************************************/ + }; + // assert( yyruleno this.YY_MAX_SHIFT) { + yyact += this.YY_MIN_REDUCE - this.YY_MIN_SHIFTREDUCE; + } + this.yyidx -= yysize - 1; + yymsp = this.yystack[this.yyidx]; + yymsp.stateno = yyact; + yymsp.major = yygoto; + this.yyTraceShift(yyact); + } else { + // assert( yyact == YY_ACCEPT_ACTION ); + this.yyidx -= yysize; + this.yy_accept(); + } + } + + /* + ** The following code executes when the parse fails + */ + this.yy_parse_failed = function() { + if (this.yyTraceCallback) { + this.trace("Fail!"); + } + while (this.yyidx > 0) { + this.yy_pop_parser_stack(); + } + /* Here code is inserted which will be executed whenever the + ** parser fails */ + /************ Begin %parse_failure code ***************************************/ + /************ End %parse_failure code *****************************************/ + } + + /* + ** The following code executes when a syntax error first occurs. + */ + this.yy_syntax_error = function( + yymajor, /* The major type of the error token */ + yyminor /* The minor type of the error token */ + ) { + var TOKEN = yyminor; + /************ Begin %syntax_error code ****************************************/ + + console.log("Syntax error"); + /************ End %syntax_error code ******************************************/ + } + + /* + ** The following is executed when the parser accepts + */ + this.yy_accept = function() { + if (this.yyTraceCallback) { + this.trace("Accept!"); + } + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + // assert( yypParser->yytos==yypParser->yystack ); + /* Here code is inserted which will be executed whenever the + ** parser accepts */ + /*********** Begin %parse_accept code *****************************************/ + /*********** End %parse_accept code *******************************************/ + } + + /* The main parser program. + ** The first argument is a pointer to a structure obtained from + ** "ParserAlloc" which describes the current state of the parser. + ** The second argument is the major token number. The third is + ** the minor token. The fourth optional argument is whatever the + ** user wants (and specified in the grammar) and is available for + ** use by the action routines. + ** + ** Inputs: + **
    + **
  • A pointer to the parser (an opaque structure.) + **
  • The major token number. + **
  • The minor token number. + **
  • An option argument of a grammar-specified type. + **
+ ** + ** Outputs: + ** None. + */ + this.parse = function( + yymajor, /* The major token code number */ + yyminor /* The value for the token */ + ) { + var yyact; /* The parser action. */ + var yyendofinput; /* True if we are at the end of input */ + var yyerrorhit = 0; /* True if yymajor has invoked an error */ + + //assert( yypParser->yytos!=0 ); + + if (yymajor === undefined || yymajor === null) { + yymajor = 0; + } + + yyendofinput = yymajor == 0; + + if (this.yyTraceCallback) { + this.trace("Input '" + this.yyTokenName[yymajor] + "'"); + } + + do { + yyact = this.yy_find_shift_action(yymajor); + if (yyact <= this.YY_MAX_SHIFTREDUCE) { // check me? + this.yy_shift(yyact, yymajor, yyminor); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt--; + } + yymajor = this.YYNOCODE; + } else if (yyact <= this.YY_MAX_REDUCE) { // check me? + this.yy_reduce(yyact - this.YY_MIN_REDUCE); // check me? + } else { + // assert( yyact == YY_ERROR_ACTION ); + if (this.yyTraceCallback) { + this.trace("Syntax Error!"); + } + if (this.YYERRORSYMBOL) { + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if (this.yyerrcnt < 0) { + this.yy_syntax_error(yymajor, yyminor); + } + var yymx = this.yystack[this.yyidx].major; + if (yymx == this.YYERRORSYMBOL || yyerrorhit) { + if (this.yyTraceCallback) { + this.trace("Discard input token " + this.yyTokenName[yymajor]); + } + this.yy_destructor(yymajor, yyminor); + yymajor = this.YYNOCODE; + } else { + while (this.yyidx >= 0 && + yymx != this.YYERRORSYMBOL && + (yyact = this.yy_find_reduce_action( + this.yystack[this.yyidx].stateno, + this.YYERRORSYMBOL)) >= this.YY_MIN_REDUCE // check me? + ) { + this.yy_pop_parser_stack(); + } + if (this.yyidx < 0 || yymajor == 0) { + this.yy_destructor(yymajor, yyminor); + this.yy_parse_failed(); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + yymajor = this.YYNOCODE; + } else if (yymx != this.YYERRORSYMBOL) { + this.yy_shift(yyact, this.YYERRORSYMBOL, yyminor); // check me? + } + } + this.yyerrcnt = 3; + yyerrorhit = 1; + } else if (this.YYNOERRORRECOVERY) { + /* If the YYNOERRORRECOVERY macro is defined, then do not attempt to + ** do any kind of error recovery. Instead, simply invoke the syntax + ** error routine and continue going as if nothing had happened. + ** + ** Applications can set this macro (for example inside %include) if + ** they intend to abandon the parse upon the first syntax error seen. + */ + this.yy_syntax_error(yymajor, yyminor); + this.yy_destructor(yymajor, yyminor); + yymajor = this.YYNOCODE; + } else { + /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if (this.yyerrcnt <= 0) { + this.yy_syntax_error(yymajor, yyminor); + } + this.yyerrcnt = 3; + this.yy_destructor(yymajor, yyminor); + if (yyendofinput) { + this.yy_parse_failed(); + if (!this.YYNOERRORRECOVERY) { + this.yyerrcnt = -1; + } + } + yymajor = this.YYNOCODE; + } + } + } while (yymajor != this.YYNOCODE && this.yyidx > 0); + + if (this.yyTraceCallback) { + var remainingTokens = []; + for (var i = 1; i <= this.yyidx; i++) { + remainingTokens.push(this.yyTokenName[this.yystack[i].major]); + } + this.trace("Return. Stack=[" + remainingTokens.join(" ") + "]"); + } + } + + this.init(); + +} // function Parser() + +/** + * Created by Aleksey Chichenkov on 1/28/19. + */ + +var Lexer = require('./lexer.js'); + +var Node = function(_options) { + this.type = _options.type || "none"; + this.children = _options.children || []; + this.lexeme = _options.lexeme || null; + this.start = _options.start || 0; + this.end = _options.end || 0; +}; + +Node.prototype = { + add: function(_node) { + this.children.push(_node); + } +}; + +var _result = {}; + +var LemonJS = function(_input) { + + var parser = new Parser(); + // var lexer = new Lexer("abc == 1 and abc1 == 2 and (bbc == 5)"); + var lexer = new Lexer(_input); + + var token; + while (token = lexer.next()) { + console.log("PARSE", token.lexeme); + parser.parse(parser["TOKEN_" + token.lexeme], token); + } + parser.parse(); + return _result; +}; + +var fs = require("fs"); + +fs.mkdirSync("tests"); + +var test_and = LemonJS("abc == 1 and abc1 == 2 and (bbc == 5)"); +fs.writeFileSync("tests/out_test_and.json", JSON.stringify(test_and, true, 3)); + +var test_address = LemonJS('abc == Address ["a", "b", "c"]'); +fs.writeFileSync("tests/out_tree_address.json", JSON.stringify(test_address, true, 3)); \ No newline at end of file diff --git a/parsers/filters/parser.out b/parsers/filters/parser.out new file mode 100644 index 0000000..ee9bc1b --- /dev/null +++ b/parsers/filters/parser.out @@ -0,0 +1,155 @@ +State 0: + main ::= * expr + string ::= * STRING_LITERAL + id ::= * string + id ::= * ID + eq ::= * id EQ literal + and ::= * expr AND expr + expr ::= * eq + expr ::= * and + expr ::= * LCB expr RCB + + STRING_LITERAL shift-reduce 3 string ::= STRING_LITERAL + ID shift-reduce 5 id ::= ID + LCB shift 1 + main accept + expr shift 6 + string shift-reduce 4 id ::= string + id shift 11 + eq shift-reduce 8 expr ::= eq + and shift-reduce 9 expr ::= and + +State 1: + string ::= * STRING_LITERAL + id ::= * string + id ::= * ID + eq ::= * id EQ literal + and ::= * expr AND expr + expr ::= * eq + expr ::= * and + expr ::= * LCB expr RCB + expr ::= LCB * expr RCB + + STRING_LITERAL shift-reduce 3 string ::= STRING_LITERAL + ID shift-reduce 5 id ::= ID + LCB shift 1 + expr shift 5 + string shift-reduce 4 id ::= string + id shift 11 + eq shift-reduce 8 expr ::= eq + and shift-reduce 9 expr ::= and + +State 2: + string ::= * STRING_LITERAL + id ::= * string + id ::= * ID + eq ::= * id EQ literal + and ::= * expr AND expr + and ::= expr AND * expr + expr ::= * eq + expr ::= * and + expr ::= * LCB expr RCB + + STRING_LITERAL shift-reduce 3 string ::= STRING_LITERAL + ID shift-reduce 5 id ::= ID + LCB shift 1 + expr shift-reduce 7 and ::= expr AND expr + string shift-reduce 4 id ::= string + id shift 11 + eq shift-reduce 8 expr ::= eq + and shift-reduce 9 expr ::= and + +State 3: + integer ::= * INTEGER_LITERAL + literal ::= * integer + eq ::= id EQ * literal + address_literal ::= * ADDRESS LSB address_literal_content_or_empty RSB + literal ::= * address_literal + + INTEGER_LITERAL shift-reduce 1 integer ::= INTEGER_LITERAL + ADDRESS shift 10 + integer shift-reduce 2 literal ::= integer + literal shift-reduce 6 eq ::= id EQ literal + address_literal shift-reduce 16 literal ::= address_literal + +State 4: + address_literal_content ::= * STRING_LITERAL + address_literal_content ::= * address_literal_content COMMA STRING_LITERAL + address_literal_content_or_empty ::= * address_literal_content + (14) address_literal_content_or_empty ::= * + address_literal ::= ADDRESS LSB * address_literal_content_or_empty RSB + + STRING_LITERAL shift-reduce 11 address_literal_content ::= STRING_LITERAL + address_literal_content shift 9 +address_literal_content_or_empty shift 7 + {default} reduce 14 address_literal_content_or_empty ::= + +State 5: + and ::= expr * AND expr + expr ::= LCB expr * RCB + + AND shift 2 + RCB shift-reduce 10 expr ::= LCB expr RCB + +State 6: + (0) main ::= expr * + and ::= expr * AND expr + + $ reduce 0 main ::= expr + AND shift 2 + +State 7: + address_literal ::= ADDRESS LSB address_literal_content_or_empty * RSB + + RSB shift-reduce 15 address_literal ::= ADDRESS LSB address_literal_content_or_empty RSB + +State 8: + address_literal_content ::= address_literal_content COMMA * STRING_LITERAL + + STRING_LITERAL shift-reduce 12 address_literal_content ::= address_literal_content COMMA STRING_LITERAL + +State 9: + address_literal_content ::= address_literal_content * COMMA STRING_LITERAL + (13) address_literal_content_or_empty ::= address_literal_content * + + COMMA shift 8 + {default} reduce 13 address_literal_content_or_empty ::= address_literal_content + +State 10: + address_literal ::= ADDRESS * LSB address_literal_content_or_empty RSB + + LSB shift 4 + +State 11: + eq ::= id * EQ literal + + EQ shift 3 + +---------------------------------------------------- +Symbols: + 0: $: + 1: OR + 2: AND + 3: NOT + 4: INTEGER_LITERAL + 5: STRING_LITERAL + 6: ID + 7: EQ + 8: LCB + 9: RCB + 10: COMMA + 11: ADDRESS + 12: LSB + 13: RSB + 14: error: + 15: main: STRING_LITERAL ID LCB + 16: expr: STRING_LITERAL ID LCB + 17: integer: INTEGER_LITERAL + 18: literal: INTEGER_LITERAL ADDRESS + 19: string: STRING_LITERAL + 20: id: STRING_LITERAL ID + 21: eq: STRING_LITERAL ID + 22: and: STRING_LITERAL ID LCB + 23: address_literal_content: STRING_LITERAL + 24: address_literal_content_or_empty: STRING_LITERAL + 25: address_literal: ADDRESS diff --git a/parsers/filters/parser.y b/parsers/filters/parser.y new file mode 100644 index 0000000..407c569 --- /dev/null +++ b/parsers/filters/parser.y @@ -0,0 +1,152 @@ +%name Parser + +%token_prefix TOKEN_ + +%left OR. +%left AND. +%right NOT. + +%include { + // include something +} + +%code { + &&REPLACER{process.js}&& +} + +%syntax_error { + console.log("Syntax error"); +} + +main ::= expr(A) . { + _result.root_node = A +} + +integer(A) ::= INTEGER_LITERAL(B) . { + A = new Node({ + type: "INTEGER_LITERAL", + lexeme: B.lexeme, + start: B.start, + end: B.end + }) +} + +literal(A) ::= integer(B) . { + A = new Node({ + type: "literal", + children: [B] + }) +} + +string(A) ::= STRING_LITERAL(B) . { + A = new Node({ + type: "STRING_LITERAL", + lexeme: B.lexeme, + start: B.start, + end: B.end + }) +} + +id(A) ::= string(B) . { + A = new Node({ + type: "id", + children: [B] + }); +} + +id(A) ::= ID(B) . { + A = new Node({ + type: "ID", + lexeme: B.lexeme, + start: B.start, + end: B.end + }) +} + +eq(A) ::= id(B) EQ(C) literal(D) . { + A = new Node({ + type: "eq", + children: [ + B, + new Node({ + type: "EQ", + lexeme: C.lexeme, + start: C.start, + end: C.end + }), + D + ] + }) +} + +and(A) ::= expr(B) AND expr(D) . { + A = new Node({ + type: "and", + children: [ + B, + D + ] + }) +} + +expr(A) ::= eq(B) . { + A = new Node({ + type: "expr", + children: [B] + }) +} + +expr(A) ::= and(B) . { + A = B; +} + +expr(A) ::= LCB expr(C) RCB . { + A = C; +} + +address_literal_content(A) ::= STRING_LITERAL(B) . { + A = new Node({ + children: [ + new Node({ + type: "STRING_LITERAL", + lexeme: B.lexeme, + start: B.start, + end: B.end + }) + ] + }); +} + +address_literal_content(A) ::= address_literal_content(B) COMMA STRING_LITERAL(C) . { + B.add(new Node({ + type: "STRING_LITERAL", + lexeme: C.lexeme, + start: C.start, + end: C.end + })); + A = B; +} + +address_literal_content_or_empty(A) ::= address_literal_content(B) . { + A = B; +} + +address_literal_content_or_empty(A) ::= . { + A = new Node({ + type: "address_literal_content" + }); +} + +address_literal(A) ::= ADDRESS LSB address_literal_content_or_empty(C) RSB . { + A = new Node({ + type: "address_literal", + children: C.children + }); +} + +literal(A) ::= address_literal(B) . { + A = new Node({ + type: "literal", + children: [B] + }); +} diff --git a/process.js b/process.js new file mode 100644 index 0000000..b1dea3f --- /dev/null +++ b/process.js @@ -0,0 +1,370 @@ +/** + * Created by Aleksey Chichenkov on 1/28/19. + */ + +var fs = require("fs"); +var Lexer = require('./lexer.js'); + +var tokens = (function () { + + var std = (function () { + var protos = "__protos__"; + var keys = "__keys__"; + + + /** + * Return unique data + * + * @param {Object[]} _arr - prototypes of inheritance classes + * @param {Object} _main - prototype of resulting class + * + * @return {Object} + * */ + var unique = function (_arr, _main) { + var result = Object.create(null); + var to_remove = []; + + for (var i = 0, e = _arr.length; i != e; ++i) { + var item = _arr[i]; + + for (var key in item) { + if (key in result) { + to_remove.push(key); + continue; + } + + result[key] = item[key]; + } + + if (keys in item) { + for (var ii = 0, ee = item[keys].length; ii != ee; ++ii) { + var key = item[keys][ii]; + if (key in result) { + to_remove.push(key); + continue; + } + + result[key] = item[key]; + } + } + } + + for (var i = 0; i != to_remove.length; ++i) { + delete result[to_remove[i]]; + } + + for (var key in _main) { + result[key] = _main[key]; + } + + return result; + }; + + /** + * Create OOP class + * + * @param {Function[]} _constrs - inheritance classes + * @param {Object} _proto - prototype of resulting class + * @param {Object?} _static - static data + * + * @return {Function} + * */ + var class_creator = function (_constrs, _proto, _static) { + _constrs = _constrs || []; + _proto = _proto || []; + _static = _static || []; + + var constr; + if (_proto && _proto.hasOwnProperty("constructor")) { + constr = _proto.constructor; + delete _proto.constructor; + } else { + constr = function () { + for (var i = 0; i != _constrs.length; ++i) { + _constrs[i].apply(this, arguments); + } + }; + } + + var proto = Object.create(null); + Object.defineProperty(proto, protos, { + "value": [] + }); + Object.defineProperty(proto, keys, { + "value": [] + }); + + /************************FOR MEMBERS*******************************/ + for (var i = 0, e = _constrs.length; i != e; ++i) { + proto[protos].push(_constrs[i].prototype); + } + + var m_un = unique(proto[protos], _proto); + for (var key in m_un) { + proto[keys].push(key); + + Object.defineProperty(proto, key, { + "value": m_un[key] + }); + } + /************************FOR MEMBERS END***************************/ + + /************************FOR STATICS*******************************/ + var s_un = unique(_constrs, _static); + for (var key in s_un) { + Object.defineProperty(constr, key, { + "value": s_un[key], + "enumerable": true + }); + } + /************************FOR STATICS END***************************/ + + + Object.defineProperties(constr, { + "pr": { + "value": proto + }, + "prototype": { + "value": proto + } + }); + + Object.freeze(proto); + Object.freeze(constr); + + return constr; + }; + + /** + * Check if target has prototype + * + * @param {Object} _target - checkable instance + * @param {Object} _proto - posible prototype + * + * */ + var check = function (_target, _proto) { + for (var i = 0; i != _target[protos].length; ++i) { + var t_proto = _target[protos][i]; + if (t_proto == _proto) { + return true; + } + + if (t_proto[protos]) { + if (check(t_proto, _proto)) + return true; + } + } + + return false; + }; + + /** + * Check if target is instance of class + * + * @param {Object} _target - checkable instance + * @param {Function} _constr - posible constructor + * + * */ + var class_check = function (_target, _constr) { + if (_target instanceof _constr) { + return true; + } + + return check(_target, _constr.prototype); + }; + + return { + class: class_creator, + class_check: class_check + }; + })(); + var tools = { + merge: function (_obj) { + var target = Object.create(null); + var i = 0, e = arguments.length; + for (; i != e; ++i) { + var options = arguments[i]; + + for (var key in options) { + if (options[key] === undefined && target === options[key]) + continue; + + target[key] = options[key]; + } + } + + return target; + } + }; + + var Node = std.class([], { + constructor: function Node(_options) { + var base = tools.merge({ + children: [] + }, _options); + + this.children = base.children; + }, + add: function (_n) { + this.children.push(_n); + return this; + } + }); + + var Lexeme = std.class([Node], { + constructor: function Lexeme(_options) { + var base = tools.merge({ + start: -1, + end: -1, + type: null, + value: null + }, _options); + + Node.call(this, base); + + this.start = base.start; + this.end = base.end; + this.type = base.type; + this.value = base.value; + } + }); + + var Rule = std.class([Node], { + constructor: function NonTerminal(_options) { + var base = tools.merge({}, _options); + + Node.call(this, base); + } + }); + + var string_literal = std.class([Rule], { + constructor: function string_literal(_options) { + var base = tools.merge({}, _options); + + Rule.call(this, base); + } + }); + + + var integer_literal = std.class([Rule], { + constructor: function integer_literal(_options) { + var base = tools.merge({}, _options); + + Rule.call(this, base); + } + }); + + var id = std.class([Rule], { + constructor: function id(_options) { + var base = tools.merge({}, _options); + + Rule.call(this, base); + } + }); + + var literal = std.class([Rule], { + constructor: function literal(_options) { + var base = tools.merge({}, _options); + + Rule.call(this, base); + } + }); + + var eq = std.class([Rule], { + constructor: function eq(_options) { + var base = tools.merge({ + id: null, + EQ: null, + literal: null + }, _options); + + Rule.call(this, base); + + this.id = base.id; + this.EQ = base.EQ; + this.literal = base.literal; + }, + set_id: function (_n) { + this._id = _n; + }, + set_EQ: function (_n) { + this._EQ = _n; + }, + set_literal: function (_n) { + this._literal = _n; + } + }); + + var and = std.class([Rule], { + constructor: function and(_options) { + var base = tools.merge({ + lexpr: null, + AND: null, + rexpr: null + }, _options); + + Rule.call(this, base); + + this.lexpr = base.lexpr; + this.AND = base.AND; + this.rexpr = base.rexpr; + }, + set_lexpr: function (_n) { + this._lexpr = _n; + }, + set_AND: function (_n) { + this._AND = _n; + }, + set_rexpr: function (_n) { + this._rexpr = _n; + } + }); + + var expr = std.class([Rule], { + constructor: function expr(_options) { + var base = tools.merge({}, _options); + + Rule.call(this, base); + } + }); + + return { + // terminal + LEXEME: Lexeme, + + // non terminal + string_literal: string_literal, + integer_literal: integer_literal, + id: id, + literal: literal, + eq: eq, + and: and, + expr: expr, + } + +})(); + + + + +var _result = {}; +var LemonJS = function (_input) { + var parser = new Parser(); + var lexer = new Lexer(_input); + var token; + while (token = lexer.next()) { + console.log("PARSE", token.lexeme); + parser.parse(parser["TOKEN_" + token.lexeme], token); + } + parser.parse(); + return _result; +}; + + +fs.mkdirSync("tests"); + +var test_and = LemonJS("abc == 1 and abc1 == 2 and (bbc == 5)"); +fs.writeFileSync("tests/out_test_and.json", JSON.stringify(test_and, true, 3)); + +var test_address = LemonJS('abc == Address ["a", "b", "c"]'); +fs.writeFileSync("tests/out_tree_address.json", JSON.stringify(test_address, true, 3)); diff --git a/tests/out_test_and.json b/tests/out_test_and.json new file mode 100644 index 0000000..31c6466 --- /dev/null +++ b/tests/out_test_and.json @@ -0,0 +1,153 @@ +{ + "root_node": { + "type": "and", + "children": [ + { + "type": "and", + "children": [ + { + "type": "expr", + "children": [ + { + "type": "eq", + "children": [ + { + "type": "ID", + "children": [], + "lexeme": "ID", + "start": 0, + "end": 3 + }, + { + "type": "EQ", + "children": [], + "lexeme": "EQ", + "start": 4, + "end": 6 + }, + { + "type": "literal", + "children": [ + { + "type": "INTEGER_LITERAL", + "children": [], + "lexeme": "INTEGER_LITERAL", + "start": 7, + "end": 8 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + }, + { + "type": "expr", + "children": [ + { + "type": "eq", + "children": [ + { + "type": "ID", + "children": [], + "lexeme": "ID", + "start": 13, + "end": 17 + }, + { + "type": "EQ", + "children": [], + "lexeme": "EQ", + "start": 18, + "end": 20 + }, + { + "type": "literal", + "children": [ + { + "type": "INTEGER_LITERAL", + "children": [], + "lexeme": "INTEGER_LITERAL", + "start": 21, + "end": 22 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + }, + { + "type": "expr", + "children": [ + { + "type": "eq", + "children": [ + { + "type": "ID", + "children": [], + "lexeme": "ID", + "start": 28, + "end": 31 + }, + { + "type": "EQ", + "children": [], + "lexeme": "EQ", + "start": 32, + "end": 34 + }, + { + "type": "literal", + "children": [ + { + "type": "INTEGER_LITERAL", + "children": [], + "lexeme": "INTEGER_LITERAL", + "start": 35, + "end": 36 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } +} \ No newline at end of file diff --git a/tests/out_tree_address.json b/tests/out_tree_address.json new file mode 100644 index 0000000..809b86e --- /dev/null +++ b/tests/out_tree_address.json @@ -0,0 +1,69 @@ +{ + "root_node": { + "type": "expr", + "children": [ + { + "type": "eq", + "children": [ + { + "type": "ID", + "children": [], + "lexeme": "ID", + "start": 0, + "end": 3 + }, + { + "type": "EQ", + "children": [], + "lexeme": "EQ", + "start": 4, + "end": 6 + }, + { + "type": "literal", + "children": [ + { + "type": "address_literal", + "children": [ + { + "type": "STRING_LITERAL", + "children": [], + "lexeme": "STRING_LITERAL", + "start": 16, + "end": 19 + }, + { + "type": "STRING_LITERAL", + "children": [], + "lexeme": "STRING_LITERAL", + "start": 21, + "end": 24 + }, + { + "type": "STRING_LITERAL", + "children": [], + "lexeme": "STRING_LITERAL", + "start": 26, + "end": 29 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } + ], + "lexeme": null, + "start": 0, + "end": 0 + } +} \ No newline at end of file