From 2203caddc61441673fd8432c278c2a6e30f2cfa4 Mon Sep 17 00:00:00 2001 From: Aleksey Chichenkov Date: Thu, 24 Jan 2019 16:57:54 +0300 Subject: [PATCH] first commit --- lexer.l | 232 +++++++++++++++++++++++++++++++++++++++++ parse_source_lexeme.js | 33 ++++++ run.sh | 2 + test_strings | 0 4 files changed, 267 insertions(+) create mode 100644 lexer.l create mode 100644 parse_source_lexeme.js create mode 100755 run.sh create mode 100644 test_strings diff --git a/lexer.l b/lexer.l new file mode 100644 index 0000000..b1d146f --- /dev/null +++ b/lexer.l @@ -0,0 +1,232 @@ +var lex = [ + "ERR", + "DELIM", + + "LCB", + "RCB", + "INTEGER", + + "FLOAT_LITERAL", + "BOOL_LITERAL", + "INTEGER_LITERAL", +]; + + +var addLexeme = function(_str, _yylexstart, _yycursor, _lexeme) { + if(_lexeme !== "ERR"){ + console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, _yylexstart, _yycursor, _str.substring(_yylexstart, _yycursor))); + } else { + console.log(print_f("search end\n")); + return true; + } +}; + +var unknownSymbol = function(_str, _yylexstart, _yycursor){ + throw print_f("Found unknown symbol on position: %s", _yycursor) +}; + +var start_search = function(_str) { + console.log("start search", _str); + var id = 1; + var yych = null; + var YYLEXSTART = 0; + var yyaccept = 0; + var YYCURSOR = 0; + var YYMARKER = 0; + var str = _str; + + var reset = function(){ + yyaccept = 0; + id = 1; + YYLEXSTART = YYCURSOR; + YYMARKER = YYCURSOR; + }; + +while(true) +{ + switch(id) /*!re2c + re2c:define:YYCTYPE = _r2c_var_; + re2c:yyfill:enable = 0; + + D = [0-9]; + end = "\x00"; + L = [A-Za-z_]; + RL = [\U00000400-\U00000451]; + + CR = "\r"; + LF = "\n"; + CRLF = CR?LF; + INTEGER = "-"?D+; + SP = " "; + TAB = "\t"; + + DELIM = SP|TAB|CR|LF; + + LSB = "["; + RSB = "]"; + LCB = "("; + RCB = ")"; + COLON = ":"; + COMMA = ","; + DOT = "."; + REM = "%"; + GT = ">"; + GTE = ">="; + LT = "<"; + LTE = "<="; + EQ = "=="; + NEQ = "!="; + + AND = 'AND'; + OR = 'OR'; + NOT = 'NOT'; + LIKE = 'LIKE'; + NLIKE = 'NLIKE'; + + ADDRESS = "Address"; + TIME = "Time"; + TIMEDIFF = "TimeDiff"; + + BOOL_LITERAL = 'true'|'false'; + FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?; + INTEGER_LITERAL = INTEGER; + ID = L(L|D)*; + + QU = "\""; + EQU = "\\\""; + + + + + end { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ERR")) return; reset(); break; } + + LSB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LSB")) return; reset(); break; } + RSB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "RSB")) return; reset(); break; } + LCB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LCB")) return; reset(); break; } + RCB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "RCB")) return; reset(); break; } + COLON { if(addLexeme(str, YYLEXSTART, YYCURSOR, "COLON")) return; reset(); break; } + COMMA { if(addLexeme(str, YYLEXSTART, YYCURSOR, "COMMA")) return; reset(); break; } + DOT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "DOT")) return; reset(); break; } + REM { if(addLexeme(str, YYLEXSTART, YYCURSOR, "REM")) return; reset(); break; } + + GT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "GT")) return; reset(); break; } + GTE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "GTE")) return; reset(); break; } + LT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LT")) return; reset(); break; } + LTE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LTE")) return; reset(); break; } + EQ { if(addLexeme(str, YYLEXSTART, YYCURSOR, "EQ")) return; reset(); break; } + NEQ { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NEQ")) return; reset(); break; } + LIKE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LIKE")) return; reset(); break; } + NLIKE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NLIKE")) return; reset(); break; } + + AND { if(addLexeme(str, YYLEXSTART, YYCURSOR, "AND")) return; reset(); break; } + OR { if(addLexeme(str, YYLEXSTART, YYCURSOR, "OR")) return; reset(); break; } + NOT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NOT")) return; reset(); break; } + + ADDRESS { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ADDRESS")) return; reset(); break; } + TIME { if(addLexeme(str, YYLEXSTART, YYCURSOR, "TIME")) return; reset(); break; } + TIMEDIFF { if(addLexeme(str, YYLEXSTART, YYCURSOR, "TIMEDIFF")) return; reset(); break; } + + INTEGER_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "INTEGER_LITERAL")) return; reset(); break; } + FLOAT_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "FLOAT_LITERAL")) return; reset(); break; } + BOOL_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "BOOL_LITERAL")) return; reset(); break; } + + ID { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ID")) return; reset(); break; } + DELIM { reset(); break; } + + [^] { if(unknownSymbol(str, YYLEXSTART, YYCURSOR)) return; reset(); break; } + */} +} + +var print_f = function () { + var r_str = ""; + var next = arguments[0]; + + var rx = /(%[a-zA-Z]{1})/; + var a = 1, match; + while (match = rx.exec(next)) { + var prev = next.substring(0, match.index); + var macro = next.substring(match.index + 1, match.index + 2); + next = next.substring(match.index + 2, next.length); + r_str += prev; + + var arg = arguments[a]; + + if(arg !== undefined) { + switch (macro) { + case "s": // v2.string or string + // var v_1 = arg.to_string && arg.to_string(); + + // r_str += (arg.to_string && arg.to_string()) || arg.toString(); + + // var has_to_string = arg.to_string !== undefined; + // var has_toString = arg.toString !== undefined; + + if(arg.to_string && !arg.toString) r_str += arg.to_string(); + if(arg.toString && !arg.to_string) r_str += arg.toString(); + + + break; + case "T": // v2.type + case "M": // v2.model_type + case "A": // v2.address + case "P": // all printable + r_str += v2.tools.print(arg); + break; + case "i": + r_str += (arg.to_number && arg.to_number()) || parseInt(arg); + break; + case "f": + r_str += (arg.to_number && arg.to_number()) || parseFloat(arg); + break; + case "b": + r_str += arg.toString(); + break; + default: + r_str += "%" + macro; + break; + } + } else { + r_str += "%" + macro; + } + a++; + } + + r_str += next; + + return r_str; +}; + +console.log("TEST SINGLE") +start_search("["); +start_search("]"); +start_search("("); +start_search(")"); +start_search(":"); +start_search(","); +start_search("."); +start_search("%"); +start_search(">"); +start_search(">="); +start_search("<"); +start_search("<="); +start_search("=="); +start_search("!="); +start_search("AND"); +start_search("and"); +start_search("OR"); +start_search("or"); +start_search("NOT"); +start_search("not"); +start_search("LIKE"); +start_search("like"); +start_search("NLIKE"); +start_search("nlike"); +start_search("Address"); +start_search("Time"); +start_search("TimeDiff"); + +console.log("TEST ALL") +start_search("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Time TimeDiff "); + +console.log("TEST FAILS"); +start_search(' sdfasdfasdfsdf "fasdf'); \ No newline at end of file diff --git a/parse_source_lexeme.js b/parse_source_lexeme.js new file mode 100644 index 0000000..8e48bc7 --- /dev/null +++ b/parse_source_lexeme.js @@ -0,0 +1,33 @@ +/** + * Created by Aleksey Chichenkov on 1/23/19. + */ +var js_beautify = require("js-beautify"); +var fs = require("fs"); +var exec = require('child_process').exec; + +exec("re2c -i lexer.l", function(err, stdout, stderr) { + err && console.log("ERROR: ", err); + err && process.exit(1); + + post_process_lexer(stdout); +}); + +var post_process_lexer = function (_string) { + _string = _string.replace(/^.*(_r2c_var_.*;|unsigned int yyaccept = 0;)\n/gm, ""); // replace var yych; + _string = _string.replace(/(yych = \*YYCURSOR);\n/gm, "\tcase 1:\n yych = str[YYCURSOR];\n"); // insert "case 1:" before; + _string = _string.replace(/\*(.*?);/gm, "str[$1];"); // замена разыменовываний + _string = _string.replace(/^yy(\d*?):/gm, "case $1:"); // replace goto marker onto case + _string = _string.replace(/\) goto yy(\d*?);/gm, ") { id = $1; break; }"); // replace goto inside if + _string = _string.replace(/goto yy(\d*?);/gm, "id = $1; break;"); // replace goto outside if + _string = _string.replace(/\{ (addLexeme.*break;) \}/gm, "$1"); // replace addLexeme + _string = _string.replace(/\{ (unknownSymbol.*break;) \}/gm, "$1"); // replace unknownSymbol + _string = _string.replace(/0x00/gm, 'undefined'); // replace 0x00 + + // black magic + _string = _string.replace(/(switch \(yych\) \{[\s\S]*?})/gm, "(function(){$1})(); break;"); // добавим замыкание что бы обработать свиче в свиче + + + _string = js_beautify(_string, {indent_size: 4, space_in_empty_paren: true}); + + fs.writeFileSync("out.js", _string); +}; \ No newline at end of file diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..7345393 --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +node parse_source_lexeme.js \ No newline at end of file diff --git a/test_strings b/test_strings new file mode 100644 index 0000000..e69de29