first commit

This commit is contained in:
Aleksey Chichenkov 2019-01-24 16:57:54 +03:00
parent 6e7d547fdf
commit 2203caddc6
4 changed files with 267 additions and 0 deletions

232
lexer.l Normal file
View File

@ -0,0 +1,232 @@
var lex = [
"ERR",
"DELIM",
"LCB",
"RCB",
"INTEGER",
"FLOAT_LITERAL",
"BOOL_LITERAL",
"INTEGER_LITERAL",
];
var addLexeme = function(_str, _yylexstart, _yycursor, _lexeme) {
if(_lexeme !== "ERR"){
console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, _yylexstart, _yycursor, _str.substring(_yylexstart, _yycursor)));
} else {
console.log(print_f("search end\n"));
return true;
}
};
var unknownSymbol = function(_str, _yylexstart, _yycursor){
throw print_f("Found unknown symbol on position: %s", _yycursor)
};
var start_search = function(_str) {
console.log("start search", _str);
var id = 1;
var yych = null;
var YYLEXSTART = 0;
var yyaccept = 0;
var YYCURSOR = 0;
var YYMARKER = 0;
var str = _str;
var reset = function(){
yyaccept = 0;
id = 1;
YYLEXSTART = YYCURSOR;
YYMARKER = YYCURSOR;
};
while(true)
{
switch(id) /*!re2c
re2c:define:YYCTYPE = _r2c_var_;
re2c:yyfill:enable = 0;
D = [0-9];
end = "\x00";
L = [A-Za-z_];
RL = [\U00000400-\U00000451];
CR = "\r";
LF = "\n";
CRLF = CR?LF;
INTEGER = "-"?D+;
SP = " ";
TAB = "\t";
DELIM = SP|TAB|CR|LF;
LSB = "[";
RSB = "]";
LCB = "(";
RCB = ")";
COLON = ":";
COMMA = ",";
DOT = ".";
REM = "%";
GT = ">";
GTE = ">=";
LT = "<";
LTE = "<=";
EQ = "==";
NEQ = "!=";
AND = 'AND';
OR = 'OR';
NOT = 'NOT';
LIKE = 'LIKE';
NLIKE = 'NLIKE';
ADDRESS = "Address";
TIME = "Time";
TIMEDIFF = "TimeDiff";
BOOL_LITERAL = 'true'|'false';
FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?;
INTEGER_LITERAL = INTEGER;
ID = L(L|D)*;
QU = "\"";
EQU = "\\\"";
end { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ERR")) return; reset(); break; }
LSB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LSB")) return; reset(); break; }
RSB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "RSB")) return; reset(); break; }
LCB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LCB")) return; reset(); break; }
RCB { if(addLexeme(str, YYLEXSTART, YYCURSOR, "RCB")) return; reset(); break; }
COLON { if(addLexeme(str, YYLEXSTART, YYCURSOR, "COLON")) return; reset(); break; }
COMMA { if(addLexeme(str, YYLEXSTART, YYCURSOR, "COMMA")) return; reset(); break; }
DOT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "DOT")) return; reset(); break; }
REM { if(addLexeme(str, YYLEXSTART, YYCURSOR, "REM")) return; reset(); break; }
GT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "GT")) return; reset(); break; }
GTE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "GTE")) return; reset(); break; }
LT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LT")) return; reset(); break; }
LTE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LTE")) return; reset(); break; }
EQ { if(addLexeme(str, YYLEXSTART, YYCURSOR, "EQ")) return; reset(); break; }
NEQ { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NEQ")) return; reset(); break; }
LIKE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "LIKE")) return; reset(); break; }
NLIKE { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NLIKE")) return; reset(); break; }
AND { if(addLexeme(str, YYLEXSTART, YYCURSOR, "AND")) return; reset(); break; }
OR { if(addLexeme(str, YYLEXSTART, YYCURSOR, "OR")) return; reset(); break; }
NOT { if(addLexeme(str, YYLEXSTART, YYCURSOR, "NOT")) return; reset(); break; }
ADDRESS { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ADDRESS")) return; reset(); break; }
TIME { if(addLexeme(str, YYLEXSTART, YYCURSOR, "TIME")) return; reset(); break; }
TIMEDIFF { if(addLexeme(str, YYLEXSTART, YYCURSOR, "TIMEDIFF")) return; reset(); break; }
INTEGER_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "INTEGER_LITERAL")) return; reset(); break; }
FLOAT_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "FLOAT_LITERAL")) return; reset(); break; }
BOOL_LITERAL { if(addLexeme(str, YYLEXSTART, YYCURSOR, "BOOL_LITERAL")) return; reset(); break; }
ID { if(addLexeme(str, YYLEXSTART, YYCURSOR, "ID")) return; reset(); break; }
DELIM { reset(); break; }
[^] { if(unknownSymbol(str, YYLEXSTART, YYCURSOR)) return; reset(); break; }
*/}
}
var print_f = function () {
var r_str = "";
var next = arguments[0];
var rx = /(%[a-zA-Z]{1})/;
var a = 1, match;
while (match = rx.exec(next)) {
var prev = next.substring(0, match.index);
var macro = next.substring(match.index + 1, match.index + 2);
next = next.substring(match.index + 2, next.length);
r_str += prev;
var arg = arguments[a];
if(arg !== undefined) {
switch (macro) {
case "s": // v2.string or string
// var v_1 = arg.to_string && arg.to_string();
// r_str += (arg.to_string && arg.to_string()) || arg.toString();
// var has_to_string = arg.to_string !== undefined;
// var has_toString = arg.toString !== undefined;
if(arg.to_string && !arg.toString) r_str += arg.to_string();
if(arg.toString && !arg.to_string) r_str += arg.toString();
break;
case "T": // v2.type
case "M": // v2.model_type
case "A": // v2.address
case "P": // all printable
r_str += v2.tools.print(arg);
break;
case "i":
r_str += (arg.to_number && arg.to_number()) || parseInt(arg);
break;
case "f":
r_str += (arg.to_number && arg.to_number()) || parseFloat(arg);
break;
case "b":
r_str += arg.toString();
break;
default:
r_str += "%" + macro;
break;
}
} else {
r_str += "%" + macro;
}
a++;
}
r_str += next;
return r_str;
};
console.log("TEST SINGLE")
start_search("[");
start_search("]");
start_search("(");
start_search(")");
start_search(":");
start_search(",");
start_search(".");
start_search("%");
start_search(">");
start_search(">=");
start_search("<");
start_search("<=");
start_search("==");
start_search("!=");
start_search("AND");
start_search("and");
start_search("OR");
start_search("or");
start_search("NOT");
start_search("not");
start_search("LIKE");
start_search("like");
start_search("NLIKE");
start_search("nlike");
start_search("Address");
start_search("Time");
start_search("TimeDiff");
console.log("TEST ALL")
start_search("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Time TimeDiff ");
console.log("TEST FAILS");
start_search(' sdfasdfasdfsdf "fasdf');

33
parse_source_lexeme.js Normal file
View File

@ -0,0 +1,33 @@
/**
* Created by Aleksey Chichenkov <a.chichenkov@initi.ru> on 1/23/19.
*/
var js_beautify = require("js-beautify");
var fs = require("fs");
var exec = require('child_process').exec;
exec("re2c -i lexer.l", function(err, stdout, stderr) {
err && console.log("ERROR: ", err);
err && process.exit(1);
post_process_lexer(stdout);
});
var post_process_lexer = function (_string) {
_string = _string.replace(/^.*(_r2c_var_.*;|unsigned int yyaccept = 0;)\n/gm, ""); // replace var yych;
_string = _string.replace(/(yych = \*YYCURSOR);\n/gm, "\tcase 1:\n yych = str[YYCURSOR];\n"); // insert "case 1:" before;
_string = _string.replace(/\*(.*?);/gm, "str[$1];"); // замена разыменовываний
_string = _string.replace(/^yy(\d*?):/gm, "case $1:"); // replace goto marker onto case
_string = _string.replace(/\) goto yy(\d*?);/gm, ") { id = $1; break; }"); // replace goto inside if
_string = _string.replace(/goto yy(\d*?);/gm, "id = $1; break;"); // replace goto outside if
_string = _string.replace(/\{ (addLexeme.*break;) \}/gm, "$1"); // replace addLexeme
_string = _string.replace(/\{ (unknownSymbol.*break;) \}/gm, "$1"); // replace unknownSymbol
_string = _string.replace(/0x00/gm, 'undefined'); // replace 0x00
// black magic
_string = _string.replace(/(switch \(yych\) \{[\s\S]*?})/gm, "(function(){$1})(); break;"); // добавим замыкание что бы обработать свиче в свиче
_string = js_beautify(_string, {indent_size: 4, space_in_empty_paren: true});
fs.writeFileSync("out.js", _string);
};

2
run.sh Executable file
View File

@ -0,0 +1,2 @@
#!/usr/bin/env bash
node parse_source_lexeme.js

0
test_strings Normal file
View File