diff --git a/README.md b/README.md index a1d1a47..0120c9a 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # re2-js-generator ### Install guide +You need install npm and node v.10; npm install @@ -12,8 +13,10 @@ Read re2c manual: http://re2c.org/manual/manual.html or -node parse_source_lexeme.js +node main.js + +Result fill write in file. + -Result fill write in out.js file. Report for bugs: rolahd@yandex.ru \ No newline at end of file diff --git a/lexer.l b/lexer.l index db9b8d9..2be311b 100644 --- a/lexer.l +++ b/lexer.l @@ -1,190 +1,255 @@ -var lex = [ - "ERR", - "DELIM", - +var types = [ + "LSB", + "RSB", "LCB", "RCB", - "INTEGER", - + "COLON", + "COMMA", + "DOT", + "REM", + "GT", + "GTE", + "LT", + "LTE", + "EQ", + "NEQ", + "LIKE", + "NLIKE", + "AND", + "OR", + "NOT", + "ADDRESS", + "TIME", + "TIMEDIFF", + "INTEGER_LITERAL", "FLOAT_LITERAL", "BOOL_LITERAL", - "INTEGER_LITERAL", + "ID" ]; -var searchString = function (_str, _quote, _yylexstart, _yycursor) { - var found_back_slash = false; - _yycursor++; - while(_yycursor < _str.length){ - var char = _str[_yycursor]; - if(_quote == '"') { - switch (char) { - case "\\": - found_back_slash = true; - break; - case '"': - if(!found_back_slash) { - return { success: true, pos: _yycursor + 1 } - } - found_back_slash = false; - break; - } - } else if(_quote == "'") { - switch (char) { - case "\\": - found_back_slash = true; - break; - case "'": - if(!found_back_slash) { - return { success: true, pos: _yycursor + 1 } - } - found_back_slash = false; - break; - } - } - _yycursor++; - } - - return {success: false, pos: _yycursor + 1 } +var errors = { + "-2": "not found close quote or singleQuote", + "-1": "not found any lexemes or errors or anything else", + "0": "success", + "1": "found unknown symbol" }; -var addLexeme = function(_str, _yylexstart, _yycursor, _lexeme) { - if(_lexeme !== "ERR"){ - console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, _yylexstart, _yycursor, _str.substring(_yylexstart, _yycursor))); - } else { - console.log(print_f("search end\n")); - return true; - } +var Lexer = function(_string){ + this._last_found_lexeme = {error: -1}; + this._end = false; + this._error = false; + this._string = _string; + this._state = 1; + this._yy_char = null; + this._yy_lex_start = 0; + this._yy_cursor = 0; + this._yy_marker = 0; + this._yy_accept = 0; }; -var unknownSymbol = function(_str, _yylexstart, _yycursor){ - throw print_f("Found unknown symbol on position: %s", _yycursor) -}; +Lexer.prototype = { + types: types, + errors: errors, + _notFoundCloseQuote: function() { + this._error = true; + this._last_found_lexeme = { + error: 2, + start: this._yy_lex_start, + end: this._yy_cursor + }; -var notFoundCloseQuote = function(_str, _yylexstart, _yycursor) { - console.log( print_f("Not found close quote start: %s", _yycursor)); - throw print_f("Not found close quote start: %s", _yycursor); -}; - -var start_search = function(_str) { - console.log("start search", _str); - var id = 1; - var yych = null; - var YYLEXSTART = 0; - var yyaccept = 0; - var YYCURSOR = 0; - var YYMARKER = 0; - var str = _str; - - var reset = function(){ - yyaccept = 0; - id = 1; - YYLEXSTART = YYCURSOR; - YYMARKER = YYCURSOR; + console.log( print_f("Not found close quote start: %s", this._yy_cursor)); + }, + _unknownSymbol: function(){ + this._error = true; + this._last_found_lexeme = { + error: 1, + start: this._yy_lex_start, + end: this._yy_cursor }; -while(true) -{ - switch(id) /*!re2c - re2c:define:YYCTYPE = _r2c_var_; - re2c:yyfill:enable = 0; + console.log( print_f("Found unknown symbol on position: %s", this._yy_cursor)); + }, + _foundLexeme: function(_lexeme) { + console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); + this._last_found_lexeme = { + error: 0, + lexeme: _lexeme, + start: this._yy_lex_start, + end: this._yy_cursor + }; + }, + _endOfString: function(){ + console.log(print_f("search end\n")); + this._end = true; + this._last_found_lexeme = { + error: -2 + }; + }, + _searchString: function () { + var _quote = this._string[this._yy_cursor - 1]; + var found_back_slash = false; + while(this._yy_cursor < this._string.length){ + this._yy_char = this._string[this._yy_cursor]; + if(_quote == '"') { + switch (this._yy_char) { + case "\\": + found_back_slash = true; + break; + case '"': + if(!found_back_slash) { + this._yy_cursor++; + this._foundLexeme("STRING_LITERAL"); + return; + } + found_back_slash = false; + break; + } + } else if(_quote == "'") { + switch (this._yy_char) { + case "\\": + found_back_slash = true; + break; + case "'": + if(!found_back_slash) { + this._yy_cursor++; + this._foundLexeme("STRING_LITERAL"); + return; + } + found_back_slash = false; + break; + } + } + this._yy_cursor++; + } - D = [0-9]; - end = "\x00"; - L = [A-Za-z_]; - RL = [\U00000400-\U00000451]; + this._notFoundCloseQuote(); + }, + _set_next: function(){ + this._yy_accept = 0; + this._state = 1; + this._yy_lex_start = this._yy_cursor; + this._yy_marker = this._yy_cursor; + }, + next: function(){ + if(this._end || this._error) return null; + + this.search(); + return this.token(); + }, + token: function(){ + return this._last_found_lexeme; + }, + search: function(){ + if(this._end) return false; - CR = "\r"; - LF = "\n"; - CRLF = CR?LF; - INTEGER = "-"?D+; - SP = " "; - TAB = "\t"; + while(true){ + switch(id) /*!re2c + re2c:define:YYCTYPE = _r2c_var_; + re2c:define:YYCURSOR = this._yy_cursor; + re2c:define:YYMARKER = this._yy_marker; + re2c:yyfill:enable = 0; - DELIM = SP|TAB|CR|LF; + D = [0-9]; + end = "\x00"; + L = [A-Za-z_]; + RL = [\U00000400-\U00000451]; - LSB = "["; - RSB = "]"; - LCB = "("; - RCB = ")"; - COLON = ":"; - COMMA = ","; - DOT = "."; - REM = "%"; - GT = ">"; - GTE = ">="; - LT = "<"; - LTE = "<="; - EQ = "=="; - NEQ = "!="; + CR = "\r"; + LF = "\n"; + CRLF = CR?LF; + INTEGER = "-"?D+; + SP = " "; + TAB = "\t"; - AND = 'AND'; - OR = 'OR'; - NOT = 'NOT'; - LIKE = 'LIKE'; - NLIKE = 'NLIKE'; + DELIM = SP|TAB|CR|LF; - ADDRESS = "Address"; - TIME = "Time"; - TIMEDIFF = "TimeDiff"; + LSB = "["; + RSB = "]"; + LCB = "("; + RCB = ")"; + COLON = ":"; + COMMA = ","; + DOT = "."; + REM = "%"; + GT = ">"; + GTE = ">="; + LT = "<"; + LTE = "<="; + EQ = "=="; + NEQ = "!="; - BOOL_LITERAL = 'true'|'false'; - FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?; - INTEGER_LITERAL = INTEGER; - ID = L(L|D)*; + AND = 'AND'; + OR = 'OR'; + NOT = 'NOT'; + LIKE = 'LIKE'; + NLIKE = 'NLIKE'; - QU = "\""; - SQU = "'"; + ADDRESS = "Address"; + TIME = "Time"; + TIMEDIFF = "TimeDiff"; + + BOOL_LITERAL = 'true'|'false'; + FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?; + INTEGER_LITERAL = INTEGER; + ID = L(L|D)*; + + QU = "\""; + SQU = "'"; - end { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ERR")) return; reset(); break; } + end { this._endOfString(); return; } - LSB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LSB")) return; reset(); break; } - RSB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "RSB")) return; reset(); break; } - LCB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LCB")) return; reset(); break; } - RCB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "RCB")) return; reset(); break; } - COLON { if(addLexeme(str, YYLEXSTART, YYCURSOR, "COLON")) return; reset(); break; } - COMMA { if(addLexeme(str, YYLEXSTART, YYCURSOR, "COMMA")) return; reset(); break; } - DOT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "DOT")) return; reset(); break; } - REM { if(addLexeme(str, YYLEXSTART, YYCURSOR, "REM")) return; reset(); break; } + LSB { this._foundLexeme("LSB"); this._set_next(); return; } + RSB { this._foundLexeme("RSB"); this._set_next(); return; } + LCB { this._foundLexeme("LCB"); this._set_next(); return; } + RCB { this._foundLexeme("RCB"); this._set_next(); return; } + COLON { this._foundLexeme("COLON"); this._set_next(); return; } + COMMA { this._foundLexeme("COMMA"); this._set_next(); return; } + DOT { this._foundLexeme("DOT"); this._set_next(); return; } + REM { this._foundLexeme("REM"); this._set_next(); return; } - GT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "GT")) return; reset(); break; } - GTE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "GTE")) return; reset(); break; } - LT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LT")) return; reset(); break; } - LTE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LTE")) return; reset(); break; } - EQ { if(addLexeme(str, YYLEXSTART, YYCURSOR, "EQ")) return; reset(); break; } - NEQ { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NEQ")) return; reset(); break; } - LIKE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LIKE")) return; reset(); break; } - NLIKE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NLIKE")) return; reset(); break; } + GT { this._foundLexeme("GT"); this._set_next(); return; } + GTE { this._foundLexeme("GTE"); this._set_next(); return; } + LT { this._foundLexeme("LT"); this._set_next(); return; } + LTE { this._foundLexeme("LTE"); this._set_next(); return; } + EQ { this._foundLexeme("EQ"); this._set_next(); return; } + NEQ { this._foundLexeme("NEQ"); this._set_next(); return; } + LIKE { this._foundLexeme("LIKE"); this._set_next(); return; } + NLIKE { this._foundLexeme("NLIKE"); this._set_next(); return; } - AND { if(addLexeme(str, YYLEXSTART, YYCURSOR, "AND")) return; reset(); break; } - OR { if(addLexeme(str, YYLEXSTART, YYCURSOR, "OR")) return; reset(); break; } - NOT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NOT")) return; reset(); break; } + AND { this._foundLexeme("AND"); this._set_next(); return; } + OR { this._foundLexeme("OR"); this._set_next(); return; } + NOT { this._foundLexeme("NOT"); this._set_next(); return; } - ADDRESS { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ADDRESS")) return; reset(); break; } - TIME { if(addLexeme(str, YYLEXSTART, YYCURSOR, "TIME")) return; reset(); break; } - TIMEDIFF { if(addLexeme(str, YYLEXSTART, YYCURSOR, "TIMEDIFF")) return; reset(); break; } + ADDRESS { this._foundLexeme("ADDRESS"); this._set_next(); return; } + TIME { this._foundLexeme("TIME"); this._set_next(); return; } + TIMEDIFF { this._foundLexeme("TIMEDIFF"); this._set_next(); return; } - INTEGER_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "INTEGER_LITERAL")) return; reset(); break; } - FLOAT_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "FLOAT_LITERAL")) return; reset(); break; } - BOOL_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "BOOL_LITERAL")) return; reset(); break; } + INTEGER_LITERAL { this._foundLexeme("INTEGER_LITERAL"); this._set_next(); return; } + FLOAT_LITERAL { this._foundLexeme("FLOAT_LITERAL"); this._set_next(); return; } + BOOL_LITERAL { this._foundLexeme("BOOL_LITERAL"); this._set_next(); return; } - ID { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ID")) return; reset(); break; } - DELIM { reset(); break; } + ID { this._foundLexeme("ID"); this._set_next(); return; } + DELIM { this._set_next(); break; } - QU|SQU { id = 100000000; break;} + QU|SQU { id = 100000000; break;} - [^] { if(unknownSymbol(str, YYLEXSTART, YYCURSOR)) return; reset(); break; } - */ENDER} + [^] { this._unknownSymbol(); this._set_next(); return; } + */ENDER} + } }; -var print_f = function () { + +var print_f = function() { var r_str = ""; var next = arguments[0]; var rx = /(%[a-zA-Z]{1})/; - var a = 1, match; + var a = 1, + match; while (match = rx.exec(next)) { var prev = next.substring(0, match.index); var macro = next.substring(match.index + 1, match.index + 2); @@ -193,17 +258,16 @@ var print_f = function () { var arg = arguments[a]; - if(arg !== undefined) { + if (arg !== undefined) { switch (macro) { case "s": - if(arg.to_string && !arg.toString) r_str += arg.to_string(); - if(arg.toString && !arg.to_string) r_str += arg.toString(); + r_str += arg.toString(); break; case "i": - r_str += (arg.to_number && arg.to_number()) || parseInt(arg); + r_str += parseInt(arg); break; case "f": - r_str += (arg.to_number && arg.to_number()) || parseFloat(arg); + r_str += parseFloat(arg); break; } } else { @@ -217,41 +281,41 @@ var print_f = function () { return r_str; }; -console.log("TEST SINGLE") -start_search("["); -start_search("]"); -start_search("("); -start_search(")"); -start_search(":"); -start_search(","); -start_search("."); -start_search("%"); -start_search(">"); -start_search(">="); -start_search("<"); -start_search("<="); -start_search("=="); -start_search("!="); -start_search("AND"); -start_search("and"); -start_search("OR"); -start_search("or"); -start_search("NOT"); -start_search("not"); -start_search("LIKE"); -start_search("like"); -start_search("NLIKE"); -start_search("nlike"); -start_search("Address"); -start_search("Time"); -start_search("TimeDiff"); +console.log("TEST SINGLE"); +(new Lexer("[")).search(); +(new Lexer("]")).search(); +(new Lexer("(")).search(); +(new Lexer(")").search()); +(new Lexer(":")).search(); +(new Lexer(",")).search(); +(new Lexer(".")).search(); +(new Lexer("%")).search(); +(new Lexer(">")).search(); +(new Lexer(">=")).search(); +(new Lexer("<")).search(); +(new Lexer("<=")).search(); +(new Lexer("==")).search(); +(new Lexer("!=")).search(); +(new Lexer("AND")).search(); +(new Lexer("and")).search(); +(new Lexer("OR")).search(); +(new Lexer("or")).search(); +(new Lexer("NOT")).search(); +(new Lexer("not")).search(); +(new Lexer("LIKE")).search(); +(new Lexer("like")).search(); +(new Lexer("NLIKE")).search(); +(new Lexer("nlike")).search(); +(new Lexer("Address")).search(); +(new Lexer("Time")).search(); +(new Lexer("TimeDiff")).search(); -console.log("TEST ALL") -start_search("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Time TimeDiff "); +var lex_test_all = new Lexer("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Time TimeDiff 'sdfadfasdf' \"asdfasfd\" ") +var _lex; +while(_lex = lex_test_all.next()){ + console.log("IN while:", _lex.lexeme); +} console.log("TEST STRING LITERAL"); -start_search(' "111\\\"11\\\"1" "222222" '); -start_search(" '111\\\'11\\\'1' '222222' "); - -console.log("TEST FAILS"); -start_search(' sdfasdfasdfsdf "fasdf'); \ No newline at end of file +(new Lexer(' "111\\\"11\\\"1" "222222" ')).search(); +(new Lexer(" '111\\\'11\\\'1' '222222' ")).search(); \ No newline at end of file diff --git a/main.js b/main.js index 55e3183..472b838 100644 --- a/main.js +++ b/main.js @@ -14,13 +14,12 @@ exec("re2c -i lexer.l", function(err, stdout, stderr) { var post_process_lexer = function (_string) { - var search_string = fs.readFileSync("search_string.js", "utf8"); // insert last case for string detect - _string = _string.replace(/\}\nENDER}/gm, "yy100000000: { " + search_string + " reset(); break; }}}"); + _string = _string.replace(/\}\nENDER}/gm, "yy100000000: { this._searchString(); this._set_next(); return; }}}"); _string = _string.replace(/^.*(_r2c_var_.*;|unsigned int yyaccept = 0;)\n/gm, ""); // replace var yych; - _string = _string.replace(/(yych = \*YYCURSOR);\n/gm, "\tcase 1:\n yych = str[YYCURSOR];\n"); // insert "case 1:" before; - _string = _string.replace(/\*(.*?);/gm, "str[$1];"); // замена разыменовываний + _string = _string.replace(/(yych = \*this._yy_cursor);\n/gm, "\tcase 1:\n yych = this._string[this._yy_cursor];\n"); // insert "case 1:" before; + _string = _string.replace(/\*(.*?);/gm, "this._string[$1];"); // замена разыменовываний _string = _string.replace(/^yy(\d*?):/gm, "case $1:"); // replace goto marker onto case _string = _string.replace(/\) goto yy(\d*?);/gm, ") { id = $1; break; }"); // replace goto inside if _string = _string.replace(/goto yy(\d*?);/gm, "id = $1; break;"); // replace goto outside if @@ -29,10 +28,16 @@ var post_process_lexer = function (_string) { _string = _string.replace(/0x00/gm, 'undefined'); // replace 0x00 // black magic - _string = _string.replace(/(switch \(yych\) \{[\s\S]*?})/gm, "(function(){$1})(); break;"); // добавим замыкание что бы обработать свиче в свиче + _string = _string.replace(/(switch \(yych\) \{[\s\S]*?})/gm, "(function(){$1}.bind(this))(); break;"); // добавим замыкание что бы обработать свиче в свиче + + _string = _string.replace(/switch\((id)\)/gm, "switch(this._state)"); // replace id to this._state + _string = _string.replace(/id = (\d.*?);/gm, "this._state = $1;"); // replace id = n to this._state = n + + _string = _string.replace(/yyaccept/gm, "this._yy_accept"); // replace yyaccept to this._yy_accept + _string = _string.replace(/yych/gm, "this._yy_char"); // replace yych to this._yy_char _string = js_beautify(_string, {indent_size: 4, space_in_empty_paren: true}); - fs.writeFileSync("out.js", _string); + fs.writeFileSync("lexer.js", _string); }; \ No newline at end of file diff --git a/search_string.js b/search_string.js deleted file mode 100644 index 1f8545f..0000000 --- a/search_string.js +++ /dev/null @@ -1,7 +0,0 @@ -var info = searchString(str, yych, YYLEXSTART, YYCURSOR); -if(info.success) { - YYCURSOR = info.pos; - addLexeme(str, YYLEXSTART, YYCURSOR, "STRING"); -} else { - notFoundCloseQuote(str, YYLEXSTART, YYCURSOR); -} \ No newline at end of file