This commit is contained in:
Aleksey Chichenkov 2019-01-30 12:49:21 +03:00
parent bcb12599ac
commit 1bec222dec
3 changed files with 267 additions and 84 deletions

314
lexer.l
View File

@ -1,4 +1,5 @@
var types = [ var types = [
"DSEQ",
"SLASH", "SLASH",
"LSB", "LSB",
"RSB", "RSB",
@ -20,7 +21,7 @@ var types = [
"OR", "OR",
"NOT", "NOT",
"ADDRESS", "ADDRESS",
"OID", "OID_LITERAL",
"TIME", "TIME",
"TIMEDIFF", "TIMEDIFF",
"INTEGER_LITERAL", "INTEGER_LITERAL",
@ -72,6 +73,16 @@ Lexer.prototype = {
console.log( print_f("Found unknown symbol on position: %s", this._yy_cursor)); console.log( print_f("Found unknown symbol on position: %s", this._yy_cursor));
}, },
_oidUnexpectedSymbol: function(){
this._error = true;
this._last_found_lexeme = {
error: 3,
start: this._yy_lex_start,
end: this._yy_cursor
};
console.log( print_f("Found unknown symbol in Oid on position: %s", this._yy_cursor));
},
_foundLexeme: function(_lexeme) { _foundLexeme: function(_lexeme) {
console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor)));
this._last_found_lexeme = { this._last_found_lexeme = {
@ -82,6 +93,18 @@ Lexer.prototype = {
end: this._yy_cursor end: this._yy_cursor
}; };
}, },
_foundOidLexeme: function(_lexeme, _lsb, _rsb) {
console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor)));
this._last_found_lexeme = {
error: 0,
lexeme: _lexeme,
value: this._string.substring(this._yy_lex_start, this._yy_cursor),
start: this._yy_lex_start,
end: this._yy_cursor,
lsb: _lsb,
rsb: _rsb
};
},
_endOfString: function(){ _endOfString: function(){
console.log(print_f("search end\n")); console.log(print_f("search end\n"));
this._end = true; this._end = true;
@ -128,6 +151,116 @@ Lexer.prototype = {
this._notFoundCloseQuote(); this._notFoundCloseQuote();
}, },
_searchOid: function (){
var lsb, rsb;
var state = 0;
while(this._yy_cursor < this._string.length){
switch(state){
case 0:
this._yy_char = this._string[this._yy_cursor];
(function(){
switch(this._yy_char){
case " ":
state = 0;
this._yy_cursor++;
break;
case "[":
lsb = {start: this._yy_cursor, end: this._yy_cursor + 1};
state = 1;
break;
default:
state = 5;
}
}.bind(this))();
break;
case 1:
this._yy_char = this._string[++this._yy_cursor];
(function(){
switch(this._yy_char){
case " ":
state = 1;
break;
case "0":
case "1":
case "2":
case "3":
case "4":
case "5":
case "6":
case "7":
case "8":
case "9":
state = 2;
break;
case "]":
state = 4;
break;
default:
state = 5;
}
}.bind(this))();
break;
case 2:
this._yy_char = this._string[++this._yy_cursor];
(function(){
switch(this._yy_char){
case " ":
case "0":
case "1":
case "2":
case "3":
case "4":
case "5":
case "6":
case "7":
case "8":
case "9":
state = 2;
break;
case ".":
state = 3;
break;
case "]":
state = 4;
break;
default:
state = 5;
}
}.bind(this))();
break;
case 3:
this._yy_char = this._string[++this._yy_cursor];
(function(){
switch(this._yy_char){
case " ":
case "0":
case "1":
case "2":
case "3":
case "4":
case "5":
case "6":
case "7":
case "8":
case "9":
state = 2;
break;
default:
state = 5;
}
}.bind(this))();
break;
case 4:
rsb = {start: this._yy_cursor, end: this._yy_cursor + 1};
++this._yy_cursor;
this._foundOidLexeme("OID_LITERAL", lsb, rsb);
return;
case 5:
this._oidUnexpectedSymbol();
return;
}
}
},
_set_next: function(){ _set_next: function(){
this._yy_accept = 0; this._yy_accept = 0;
this._state = 1; this._state = 1;
@ -147,108 +280,129 @@ Lexer.prototype = {
if(this._end) return false; if(this._end) return false;
while(true){ while(true){
switch(id) /*!re2c switch(id) {
re2c:define:YYCTYPE = _r2c_var_;
re2c:define:YYCURSOR = this._yy_cursor;
re2c:define:YYMARKER = this._yy_marker;
re2c:yyfill:enable = 0;
D = [0-9]; START/*!re2c
end = "\x00"; re2c:define:YYCTYPE = _r2c_var_;
L = [A-Za-z_]; re2c:define:YYCURSOR = this._yy_cursor;
RL = [\U00000400-\U00000451]; re2c:define:YYMARKER = this._yy_marker;
re2c:yyfill:enable = 0;
CR = "\r"; D = [0-9];
LF = "\n"; DSEQ = D+;
CRLF = CR?LF;
INTEGER = "-"?D+;
SP = " ";
TAB = "\t";
DELIM = SP|TAB|CR|LF; end = "\x00";
L = [A-Za-z_];
RL = [\U00000400-\U00000451];
LSB = "["; CR = "\r";
SLASH = "/"; LF = "\n";
RSB = "]"; CRLF = CR?LF;
LCB = "("; INTEGER = "-"?D+;
RCB = ")"; SP = " ";
COLON = ":"; TAB = "\t";
COMMA = ",";
DOT = ".";
REM = "%";
GT = ">";
GTE = ">=";
LT = "<";
LTE = "<=";
EQ = "==";
NEQ = "!=";
AND = 'AND'; DELIM = SP|TAB|CR|LF;
OR = 'OR';
NOT = 'NOT';
LIKE = 'LIKE';
NLIKE = 'NLIKE';
ADDRESS = "Address"; LSB = "[";
TIME = "Time"; SLASH = "/";
OID = "Oid"; RSB = "]";
TIMEDIFF = "TimeDiff"; LCB = "(";
RCB = ")";
COLON = ":";
COMMA = ",";
DOT = ".";
REM = "%";
GT = ">";
GTE = ">=";
LT = "<";
LTE = "<=";
EQ = "==";
NEQ = "!=";
BOOL_LITERAL = 'true'|'false'; AND = 'AND';
FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?; OR = 'OR';
INTEGER_LITERAL = INTEGER; NOT = 'NOT';
ID = L(L|D)*; LIKE = 'LIKE';
NLIKE = 'NLIKE';
QU = "\""; ADDRESS = "Address";
SQU = "'"; TIME = "Time";
OID = "Oid";
TIMEDIFF = "TimeDiff";
ID = L(L|D)*;
BOOL_LITERAL = 'true'|'false';
INTEGER_LITERAL = INTEGER;
FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?;
QU = "\"";
SQU = "'";
end { this._endOfString(); return; } end { this._endOfString(); return; }
DSEQ { this._foundLexeme("DSEQ"); this._set_next(); return; }
SLASH { this._foundLexeme("SLASH"); this._set_next(); return; } SLASH { this._foundLexeme("SLASH"); this._set_next(); return; }
LSB { this._foundLexeme("LSB"); this._set_next(); return; } LSB { this._foundLexeme("LSB"); this._set_next(); return; }
RSB { this._foundLexeme("RSB"); this._set_next(); return; } RSB { this._foundLexeme("RSB"); this._set_next(); return; }
LCB { this._foundLexeme("LCB"); this._set_next(); return; } LCB { this._foundLexeme("LCB"); this._set_next(); return; }
RCB { this._foundLexeme("RCB"); this._set_next(); return; } RCB { this._foundLexeme("RCB"); this._set_next(); return; }
COLON { this._foundLexeme("COLON"); this._set_next(); return; } COLON { this._foundLexeme("COLON"); this._set_next(); return; }
COMMA { this._foundLexeme("COMMA"); this._set_next(); return; } COMMA { this._foundLexeme("COMMA"); this._set_next(); return; }
DOT { this._foundLexeme("DOT"); this._set_next(); return; } DOT { this._foundLexeme("DOT"); this._set_next(); return; }
REM { this._foundLexeme("REM"); this._set_next(); return; } REM { this._foundLexeme("REM"); this._set_next(); return; }
GT { this._foundLexeme("GT"); this._set_next(); return; } GT { this._foundLexeme("GT"); this._set_next(); return; }
GTE { this._foundLexeme("GTE"); this._set_next(); return; } GTE { this._foundLexeme("GTE"); this._set_next(); return; }
LT { this._foundLexeme("LT"); this._set_next(); return; } LT { this._foundLexeme("LT"); this._set_next(); return; }
LTE { this._foundLexeme("LTE"); this._set_next(); return; } LTE { this._foundLexeme("LTE"); this._set_next(); return; }
EQ { this._foundLexeme("EQ"); this._set_next(); return; } EQ { this._foundLexeme("EQ"); this._set_next(); return; }
NEQ { this._foundLexeme("NEQ"); this._set_next(); return; } NEQ { this._foundLexeme("NEQ"); this._set_next(); return; }
LIKE { this._foundLexeme("LIKE"); this._set_next(); return; } LIKE { this._foundLexeme("LIKE"); this._set_next(); return; }
NLIKE { this._foundLexeme("NLIKE"); this._set_next(); return; } NLIKE { this._foundLexeme("NLIKE"); this._set_next(); return; }
AND { this._foundLexeme("AND"); this._set_next(); return; } AND { this._foundLexeme("AND"); this._set_next(); return; }
OR { this._foundLexeme("OR"); this._set_next(); return; } OR { this._foundLexeme("OR"); this._set_next(); return; }
NOT { this._foundLexeme("NOT"); this._set_next(); return; } NOT { this._foundLexeme("NOT"); this._set_next(); return; }
ADDRESS { this._foundLexeme("ADDRESS"); this._set_next(); return; } ADDRESS { this._foundLexeme("ADDRESS"); this._set_next(); return; }
TIME { this._foundLexeme("TIME"); this._set_next(); return; } TIME { this._foundLexeme("TIME"); this._set_next(); return; }
TIMEDIFF { this._foundLexeme("TIMEDIFF"); this._set_next(); return; } TIMEDIFF { this._foundLexeme("TIMEDIFF"); this._set_next(); return; }
OID { this._foundLexeme("OID"); this._set_next(); return; } OID { id = 100000001; break; }
INTEGER_LITERAL { this._foundLexeme("INTEGER_LITERAL"); this._set_next(); return; } INTEGER_LITERAL { this._foundLexeme("INTEGER_LITERAL"); this._set_next(); return; }
FLOAT_LITERAL { this._foundLexeme("FLOAT_LITERAL"); this._set_next(); return; } FLOAT_LITERAL { this._foundLexeme("FLOAT_LITERAL"); this._set_next(); return; }
BOOL_LITERAL { this._foundLexeme("BOOL_LITERAL"); this._set_next(); return; } BOOL_LITERAL { this._foundLexeme("BOOL_LITERAL"); this._set_next(); return; }
ID { this._foundLexeme("ID"); this._set_next(); return; } ID { this._foundLexeme("ID"); this._set_next(); return; }
DELIM { this._set_next(); break; } DELIM { this._set_next(); break; }
QU|SQU { id = 100000000; break;} QU|SQU { id = 100000000; break;}
[^] { this._unknownSymbol(); this._set_next(); return; } [^] { this._unknownSymbol(); this._set_next(); return; }
*/ENDER} */END &&<STRING&&
&&<OID&&
}
}
} }
}; };
&&>STRING
case 100000000:
this._searchString();
this._set_next();
return;
&&
&&>OID
case 100000001:
this._searchOid();
this._set_next();
return;
&&
var print_f = function() { var print_f = function() {
var r_str = ""; var r_str = "";

32
main.js
View File

@ -16,8 +16,9 @@ exec("re2c -i lexer.l", function(err, stdout, stderr) {
var post_process_lexer = function (_string) { var post_process_lexer = function (_string) {
// insert last case for string detect // replace start and end fbrackets
_string = _string.replace(/\}\nENDER}/gm, "yy100000000: { this._searchString(); this._set_next(); return; }}}"); _string = _string.replace(/START\n\{/gm, "");
_string = _string.replace(/\}\nEND/gm, "");
_string = _string.replace(/^.*(_r2c_var_.*;|unsigned int yyaccept = 0;)\n/gm, ""); // replace var yych; _string = _string.replace(/^.*(_r2c_var_.*;|unsigned int yyaccept = 0;)\n/gm, ""); // replace var yych;
_string = _string.replace(/(yych = \*this._yy_cursor);\n/gm, "\tcase 1:\n yych = this._string[this._yy_cursor];\n"); // insert "case 1:" before; _string = _string.replace(/(yych = \*this._yy_cursor);\n/gm, "\tcase 1:\n yych = this._string[this._yy_cursor];\n"); // insert "case 1:" before;
@ -52,9 +53,36 @@ var post_process_lexer = function (_string) {
var output = args["o"] || "lexer.js"; var output = args["o"] || "lexer.js";
_string = process_metatags(_string);
if( !(args["no-beautify"] || args["nb"]) ) { if( !(args["no-beautify"] || args["nb"]) ) {
_string = js_beautify(_string, {indent_size: 4, space_in_empty_paren: true}); _string = js_beautify(_string, {indent_size: 4, space_in_empty_paren: true});
} }
fs.writeFileSync(output, _string); fs.writeFileSync(output, _string);
}; };
var process_metatags = function (_string) {
var metatags = {};
_string = _string.replace(/&&>([A-Z_][A-Z0-9_]+)([\s\S]*?)&&/gm, function (_full, _metatag, _text) {
if(!metatags[_metatag]){
metatags[_metatag] = {
includes: []
};
}
metatags[_metatag].includes.push(_text);
return "";
});
_string = _string.replace(/&&<([A-Z_][A-Z0-9_]+)&&/gm, function (_full, _metatag) {
var metatag = metatags[_metatag];
if(!metatag){
throw "not found metatag for include"
}
return metatag.includes.join("");
});
return _string;
};

View File

@ -42,11 +42,12 @@ var test = function() {
(new Lexer("NLIKE")).search(); (new Lexer("NLIKE")).search();
(new Lexer("nlike")).search(); (new Lexer("nlike")).search();
(new Lexer("Address")).search(); (new Lexer("Address")).search();
(new Lexer("Oid")).search(); (new Lexer("Oid[1.2.3]")).search();
(new Lexer("Oid [ 2431.2.3 ]")).search();
(new Lexer("Time")).search(); (new Lexer("Time")).search();
(new Lexer("TimeDiff")).search(); (new Lexer("TimeDiff")).search();
var lex_test_all = new Lexer("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Time TimeDiff 'sdfadfasdf' \"asdfasfd\" ") var lex_test_all = new Lexer("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Oid[1.2.3] Time TimeDiff 'sdfadfasdf' \"asdfasfd\" ")
var _lex; var _lex;
console.log("start search"); console.log("start search");