From e3de12d9ccfedbec8b5c11fbde2b04213a74fc2c Mon Sep 17 00:00:00 2001 From: Aleksey Chichenkov Date: Wed, 30 Jan 2019 14:16:22 +0300 Subject: [PATCH] example and logs off --- build_example.sh | 3 + example/lexer.l | 457 +++++++++++++++++++++++++++++++++++++++++++++++ example/lexme.js | 64 +++++++ lexer.l | 20 ++- main.js | 12 +- test.js | 2 +- 6 files changed, 552 insertions(+), 6 deletions(-) create mode 100755 build_example.sh create mode 100644 example/lexer.l create mode 100644 example/lexme.js diff --git a/build_example.sh b/build_example.sh new file mode 100755 index 0000000..7cb035c --- /dev/null +++ b/build_example.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +node main.js -in=./example/lexer.l -o=./example/lexer.js -t=node -logs \ No newline at end of file diff --git a/example/lexer.l b/example/lexer.l new file mode 100644 index 0000000..8c691e1 --- /dev/null +++ b/example/lexer.l @@ -0,0 +1,457 @@ +var types = [ + "SLASH", + "LSB", + "RSB", + "LCB", + "RCB", + "COLON", + "COMMA", + "DOT", + "REM", + "GT", + "GTE", + "LT", + "LTE", + "EQ", + "NEQ", + "LIKE", + "NLIKE", + "AND", + "OR", + "NOT", + "ADDRESS", + "OID", + "OID_LITERAL", + "TIME", + "TIMEDIFF", + "INTEGER_LITERAL", + "FLOAT_LITERAL", + "BOOL_LITERAL", + "ID" +]; + +var errors = { + "-2": "end of search", + "-1": "not found any lexemes or errors or anything else", + "0": "success", + "1": "found unknown symbol" + "2": "not found close quote or singleQuote", + "3": "Unexpected symbol in oid structure", + "4": "Not found close bracket for Oid", +}; + +var Lexer = function(_string){ + this._last_found_lexeme = {error: -1}; + this._end = false; + this._error = false; + this._string = _string; + this._state = 1; + this._yy_char = null; + this._yy_lex_start = 0; + this._yy_cursor = 0; + this._yy_marker = 0; + this._yy_accept = 0; +}; + +Lexer.prototype = { + types: types, + errors: errors, + _notFoundCloseQuote: function() { + this._error = true; + this._last_found_lexeme = { + error: 2, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log( print_f("Not found close quote start: %s", this._yy_cursor)); + }, + _unknownSymbol: function(){ + this._error = true; + this._last_found_lexeme = { + error: 1, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log( print_f("Found unknown symbol on position: %s", this._yy_cursor)); + }, + _oidUnexpectedSymbol: function(){ + this._error = true; + this._last_found_lexeme = { + error: 3, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log( print_f("Found unknown symbol in Oid on position: %s", this._yy_cursor)); + }, + _oidNotFoundCloseBracket: function(){ + this._error = true; + this._last_found_lexeme = { + error: 4, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log( print_f("Not found close bracket for Oid")); + }, + _foundLexeme: function(_lexeme) { + console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); + this._last_found_lexeme = { + error: 0, + lexeme: _lexeme, + value: this._string.substring(this._yy_lex_start, this._yy_cursor), + start: this._yy_lex_start, + end: this._yy_cursor + }; + }, + _foundOidLexeme: function(_lexeme, _lsb, _rsb) { + console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); + this._last_found_lexeme = { + error: 0, + lexeme: _lexeme, + value: this._string.substring(this._yy_lex_start, this._yy_cursor), + start: this._yy_lex_start, + end: this._yy_cursor, + lsb: _lsb, + rsb: _rsb + }; + }, + _endOfString: function(){ + console.log(print_f("search end\n")); + this._end = true; + this._last_found_lexeme = { + error: -2 + }; + }, + _searchString: function () { + var _quote = this._string[this._yy_cursor - 1]; + var found_back_slash = false; + while(this._yy_cursor < this._string.length){ + this._yy_char = this._string[this._yy_cursor]; + if(_quote == '"') { + switch (this._yy_char) { + case "\\": + found_back_slash = true; + break; + case '"': + if(!found_back_slash) { + this._yy_cursor++; + this._foundLexeme("STRING_LITERAL"); + return; + } + found_back_slash = false; + break; + } + } else if(_quote == "'") { + switch (this._yy_char) { + case "\\": + found_back_slash = true; + break; + case "'": + if(!found_back_slash) { + this._yy_cursor++; + this._foundLexeme("STRING_LITERAL"); + return; + } + found_back_slash = false; + break; + } + } + this._yy_cursor++; + } + + this._notFoundCloseQuote(); + }, + _searchOid: function (){ + var lsb, rsb; + var state = 0; + while(this._yy_cursor < this._string.length){ + switch(state){ + case 0: + this._yy_char = this._string[this._yy_cursor]; + (function(){ + switch(this._yy_char){ + case " ": + state = 0; + this._yy_cursor++; + break; + case "[": + lsb = {start: this._yy_cursor, end: this._yy_cursor + 1}; + state = 1; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 1: + this._yy_char = this._string[++this._yy_cursor]; + (function(){ + switch(this._yy_char){ + case " ": + state = 1; + break; + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + state = 2; + break; + case "]": + state = 4; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 2: + this._yy_char = this._string[++this._yy_cursor]; + (function(){ + switch(this._yy_char){ + case " ": + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + state = 2; + break; + case ".": + state = 3; + break; + case "]": + state = 4; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 3: + this._yy_char = this._string[++this._yy_cursor]; + (function(){ + switch(this._yy_char){ + case " ": + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + state = 2; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 4: + rsb = {start: this._yy_cursor, end: this._yy_cursor + 1}; + ++this._yy_cursor; + this._foundOidLexeme("OID_LITERAL", lsb, rsb); + return; + case 5: + this._oidUnexpectedSymbol(); + return; + } + } + + this._oidNotFoundCloseBracket(); + }, + _set_next: function(){ + this._yy_accept = 0; + this._state = 1; + this._yy_lex_start = this._yy_cursor; + this._yy_marker = this._yy_cursor; + }, + next: function(){ + if(this._end || this._error) return null; + + this.search(); + return this.token(); + }, + token: function(){ + return this._last_found_lexeme; + }, + search: function(){ + if(this._end) return false; + + while(true){ + switch(id) { + + START/*!re2c + re2c:define:YYCTYPE = _r2c_var_; + re2c:define:YYCURSOR = this._yy_cursor; + re2c:define:YYMARKER = this._yy_marker; + re2c:yyfill:enable = 0; + + D = [0-9]; + DSEQ = D+; + + end = "\x00"; + L = [A-Za-z_]; + RL = [\U00000400-\U00000451]; + + CR = "\r"; + LF = "\n"; + CRLF = CR?LF; + INTEGER = "-"?D+; + SP = " "; + TAB = "\t"; + + DELIM = SP|TAB|CR|LF; + + LSB = "["; + SLASH = "/"; + RSB = "]"; + LCB = "("; + RCB = ")"; + COLON = ":"; + COMMA = ","; + DOT = "."; + REM = "%"; + GT = ">"; + GTE = ">="; + LT = "<"; + LTE = "<="; + EQ = "=="; + NEQ = "!="; + + AND = 'AND'; + OR = 'OR'; + NOT = 'NOT'; + LIKE = 'LIKE'; + NLIKE = 'NLIKE'; + + ADDRESS = "Address"; + TIME = "Time"; + OID = "Oid"; + TIMEDIFF = "TimeDiff"; + + ID = L(L|D)*; + BOOL_LITERAL = 'true'|'false'; + INTEGER_LITERAL = INTEGER; + FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?; + + QU = "\""; + SQU = "'"; + + + + + end { this._endOfString(); return; } + + SLASH { this._foundLexeme("SLASH"); this._set_next(); return; } + LSB { this._foundLexeme("LSB"); this._set_next(); return; } + RSB { this._foundLexeme("RSB"); this._set_next(); return; } + LCB { this._foundLexeme("LCB"); this._set_next(); return; } + RCB { this._foundLexeme("RCB"); this._set_next(); return; } + COLON { this._foundLexeme("COLON"); this._set_next(); return; } + COMMA { this._foundLexeme("COMMA"); this._set_next(); return; } + DOT { this._foundLexeme("DOT"); this._set_next(); return; } + REM { this._foundLexeme("REM"); this._set_next(); return; } + + GT { this._foundLexeme("GT"); this._set_next(); return; } + GTE { this._foundLexeme("GTE"); this._set_next(); return; } + LT { this._foundLexeme("LT"); this._set_next(); return; } + LTE { this._foundLexeme("LTE"); this._set_next(); return; } + EQ { this._foundLexeme("EQ"); this._set_next(); return; } + NEQ { this._foundLexeme("NEQ"); this._set_next(); return; } + LIKE { this._foundLexeme("LIKE"); this._set_next(); return; } + NLIKE { this._foundLexeme("NLIKE"); this._set_next(); return; } + + AND { this._foundLexeme("AND"); this._set_next(); return; } + OR { this._foundLexeme("OR"); this._set_next(); return; } + NOT { this._foundLexeme("NOT"); this._set_next(); return; } + + ADDRESS { this._foundLexeme("ADDRESS"); this._set_next(); return; } + TIME { this._foundLexeme("TIME"); this._set_next(); return; } + TIMEDIFF { this._foundLexeme("TIMEDIFF"); this._set_next(); return; } + OID { id = 100000001; break; } + + INTEGER_LITERAL { this._foundLexeme("INTEGER_LITERAL"); this._set_next(); return; } + FLOAT_LITERAL { this._foundLexeme("FLOAT_LITERAL"); this._set_next(); return; } + BOOL_LITERAL { this._foundLexeme("BOOL_LITERAL"); this._set_next(); return; } + + ID { this._foundLexeme("ID"); this._set_next(); return; } + DELIM { this._set_next(); break; } + + QU|SQU { id = 100000000; break;} + + [^] { this._unknownSymbol(); this._set_next(); return; } + */END &&STRING +case 100000000: + this._searchString(); + this._set_next(); + return; +&& + +&&>OID +case 100000001: + this._searchOid(); + this._set_next(); + return; +&& + +var print_f = function() { + var r_str = ""; + var next = arguments[0]; + + var rx = /(%[a-zA-Z]{1})/; + var a = 1, + match; + while (match = rx.exec(next)) { + var prev = next.substring(0, match.index); + var macro = next.substring(match.index + 1, match.index + 2); + next = next.substring(match.index + 2, next.length); + r_str += prev; + + var arg = arguments[a]; + + if (arg !== undefined) { + switch (macro) { + case "s": + r_str += arg.toString(); + break; + case "i": + r_str += parseInt(arg); + break; + case "f": + r_str += parseFloat(arg); + break; + } + } else { + r_str += "%" + macro; + } + a++; + } + + r_str += next; + + return r_str; +}; \ No newline at end of file diff --git a/example/lexme.js b/example/lexme.js new file mode 100644 index 0000000..5ecf6a4 --- /dev/null +++ b/example/lexme.js @@ -0,0 +1,64 @@ +/** + * Created by Aleksey Chichenkov on 1/30/19. + */ + +var Lexer = require("./lexer.js"); + + +// See Allow Lexemes in ./lexer.js on top file + +/** + * create Lexer and set string + * + * Here you can found next lexemes: + * - ID + * - EQ + * - STRING_LITERAL + * - AND + * - ID + * - GT + * - INTEGER + * @type {Lexer} + */ +var lexer = new Lexer("id == 'string' and a > 10"); + + +var token; +while(token = lexer.next()){ + /** + * token has next fields + * - error: number, // 0 if not error + * - lexeme: string, type of token (ID|EQ|STRING_LITERAL) + * - value: string, value what found between and + * - start: number, + * - end: number + * + * and if lexeme == "OID_LITERAL" + * - lsb - position left square bracket + * - rsb - position right square bracket + */ + + switch (token.error) { + case 0: + console.log("TOKEN:", token.lexeme, token.value); + break; + case -2: + console.log("end of search"); + break; + case -1: + console.log("Not found any lexemes or errors or anything else"); + break; + case 1: + console.log("Found unknown symbol on position"); + break; + case 2: + console.log("Not found close quote"); + break; + case 3: + console.log("Unexpected symbol in oid structure"); + break; + case 4: + console.log("Not found close bracket for Oid"); + break; + } +} diff --git a/lexer.l b/lexer.l index d98ee0c..8c691e1 100644 --- a/lexer.l +++ b/lexer.l @@ -1,5 +1,4 @@ var types = [ - "DSEQ", "SLASH", "LSB", "RSB", @@ -21,6 +20,7 @@ var types = [ "OR", "NOT", "ADDRESS", + "OID", "OID_LITERAL", "TIME", "TIMEDIFF", @@ -31,10 +31,13 @@ var types = [ ]; var errors = { - "-2": "not found close quote or singleQuote", + "-2": "end of search", "-1": "not found any lexemes or errors or anything else", "0": "success", "1": "found unknown symbol" + "2": "not found close quote or singleQuote", + "3": "Unexpected symbol in oid structure", + "4": "Not found close bracket for Oid", }; var Lexer = function(_string){ @@ -83,6 +86,16 @@ Lexer.prototype = { console.log( print_f("Found unknown symbol in Oid on position: %s", this._yy_cursor)); }, + _oidNotFoundCloseBracket: function(){ + this._error = true; + this._last_found_lexeme = { + error: 4, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log( print_f("Not found close bracket for Oid")); + }, _foundLexeme: function(_lexeme) { console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); this._last_found_lexeme = { @@ -260,6 +273,8 @@ Lexer.prototype = { return; } } + + this._oidNotFoundCloseBracket(); }, _set_next: function(){ this._yy_accept = 0; @@ -343,7 +358,6 @@ Lexer.prototype = { end { this._endOfString(); return; } - DSEQ { this._foundLexeme("DSEQ"); this._set_next(); return; } SLASH { this._foundLexeme("SLASH"); this._set_next(); return; } LSB { this._foundLexeme("LSB"); this._set_next(); return; } diff --git a/main.js b/main.js index d72a3d6..7506742 100644 --- a/main.js +++ b/main.js @@ -7,7 +7,10 @@ var args = require("args-parser")(process.argv); var fs = require("fs"); var exec = require('child_process').exec; -exec("re2c -i lexer.l", function(err, stdout, stderr) { +var inp = args["in"] ? args["in"] : "lexer.l"; +var enable_logs = args["logs"] ? args["logs"] : false; + +exec("re2c -i " + inp, function(err, stdout, stderr) { err && console.log("ERROR: ", err); err && process.exit(1); @@ -43,7 +46,8 @@ var post_process_lexer = function (_string) { if(args["t"] !== undefined) { switch (args["t"]) { case "web": - _string = "(function(){\n" + _string + "return Lexer; \n})()"; + + _string = "(function () {var deps = [];define(deps, function(){\n" + _string + "return Lexer; \n});})();"; break; case "node": _string += "\n module.exports = Lexer"; @@ -59,6 +63,10 @@ var post_process_lexer = function (_string) { _string = js_beautify(_string, {indent_size: 4, space_in_empty_paren: true}); } + if(!enable_logs){ + _string = _string.replace(/^ *console\.log[\s\S]*?$\n/gm, ""); + } + fs.writeFileSync(output, _string); }; diff --git a/test.js b/test.js index ee6fbbe..b04cb0e 100644 --- a/test.js +++ b/test.js @@ -47,7 +47,7 @@ var test = function() { (new Lexer("Time")).search(); (new Lexer("TimeDiff")).search(); - var lex_test_all = new Lexer("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Oid[1.2.3] Time TimeDiff 'sdfadfasdf' \"asdfasfd\" ") + var lex_test_all = new Lexer("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Oid[1.2.3] Time TimeDiff 'sdfadfasdf' \"asdfasfd\" "); var _lex; console.log("start search");