var types = [ "LSB", "RSB", "LCB", "RCB", "COLON", "COMMA", "DOT", "REM", "GT", "GTE", "LT", "LTE", "EQ", "NEQ", "LIKE", "NLIKE", "AND", "OR", "NOT", "ADDRESS", "TIME", "TIMEDIFF", "INTEGER_LITERAL", "FLOAT_LITERAL", "BOOL_LITERAL", "ID" ]; var errors = { "-2": "not found close quote or singleQuote", "-1": "not found any lexemes or errors or anything else", "0": "success", "1": "found unknown symbol" }; var Lexer = function(_string){ this._last_found_lexeme = {error: -1}; this._end = false; this._error = false; this._string = _string; this._state = 1; this._yy_char = null; this._yy_lex_start = 0; this._yy_cursor = 0; this._yy_marker = 0; this._yy_accept = 0; }; Lexer.prototype = { types: types, errors: errors, _notFoundCloseQuote: function() { this._error = true; this._last_found_lexeme = { error: 2, start: this._yy_lex_start, end: this._yy_cursor }; console.log( print_f("Not found close quote start: %s", this._yy_cursor)); }, _unknownSymbol: function(){ this._error = true; this._last_found_lexeme = { error: 1, start: this._yy_lex_start, end: this._yy_cursor }; console.log( print_f("Found unknown symbol on position: %s", this._yy_cursor)); }, _foundLexeme: function(_lexeme) { console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); this._last_found_lexeme = { error: 0, lexeme: _lexeme, start: this._yy_lex_start, end: this._yy_cursor }; }, _endOfString: function(){ console.log(print_f("search end\n")); this._end = true; this._last_found_lexeme = { error: -2 }; }, _searchString: function () { var _quote = this._string[this._yy_cursor - 1]; var found_back_slash = false; while(this._yy_cursor < this._string.length){ this._yy_char = this._string[this._yy_cursor]; if(_quote == '"') { switch (this._yy_char) { case "\\": found_back_slash = true; break; case '"': if(!found_back_slash) { this._yy_cursor++; this._foundLexeme("STRING_LITERAL"); return; } found_back_slash = false; break; } } else if(_quote == "'") { switch (this._yy_char) { case "\\": found_back_slash = true; break; case "'": if(!found_back_slash) { this._yy_cursor++; this._foundLexeme("STRING_LITERAL"); return; } found_back_slash = false; break; } } this._yy_cursor++; } this._notFoundCloseQuote(); }, _set_next: function(){ this._yy_accept = 0; this._state = 1; this._yy_lex_start = this._yy_cursor; this._yy_marker = this._yy_cursor; }, next: function(){ if(this._end || this._error) return null; this.search(); return this.token(); }, token: function(){ return this._last_found_lexeme; }, search: function(){ if(this._end) return false; while(true){ switch(id) /*!re2c re2c:define:YYCTYPE = _r2c_var_; re2c:define:YYCURSOR = this._yy_cursor; re2c:define:YYMARKER = this._yy_marker; re2c:yyfill:enable = 0; D = [0-9]; end = "\x00"; L = [A-Za-z_]; RL = [\U00000400-\U00000451]; CR = "\r"; LF = "\n"; CRLF = CR?LF; INTEGER = "-"?D+; SP = " "; TAB = "\t"; DELIM = SP|TAB|CR|LF; LSB = "["; RSB = "]"; LCB = "("; RCB = ")"; COLON = ":"; COMMA = ","; DOT = "."; REM = "%"; GT = ">"; GTE = ">="; LT = "<"; LTE = "<="; EQ = "=="; NEQ = "!="; AND = 'AND'; OR = 'OR'; NOT = 'NOT'; LIKE = 'LIKE'; NLIKE = 'NLIKE'; ADDRESS = "Address"; TIME = "Time"; TIMEDIFF = "TimeDiff"; BOOL_LITERAL = 'true'|'false'; FLOAT_LITERAL = "-"? D* "." D+ ("e" "-"? D+)?; INTEGER_LITERAL = INTEGER; ID = L(L|D)*; QU = "\""; SQU = "'"; end { this._endOfString(); return; } LSB { this._foundLexeme("LSB"); this._set_next(); return; } RSB { this._foundLexeme("RSB"); this._set_next(); return; } LCB { this._foundLexeme("LCB"); this._set_next(); return; } RCB { this._foundLexeme("RCB"); this._set_next(); return; } COLON { this._foundLexeme("COLON"); this._set_next(); return; } COMMA { this._foundLexeme("COMMA"); this._set_next(); return; } DOT { this._foundLexeme("DOT"); this._set_next(); return; } REM { this._foundLexeme("REM"); this._set_next(); return; } GT { this._foundLexeme("GT"); this._set_next(); return; } GTE { this._foundLexeme("GTE"); this._set_next(); return; } LT { this._foundLexeme("LT"); this._set_next(); return; } LTE { this._foundLexeme("LTE"); this._set_next(); return; } EQ { this._foundLexeme("EQ"); this._set_next(); return; } NEQ { this._foundLexeme("NEQ"); this._set_next(); return; } LIKE { this._foundLexeme("LIKE"); this._set_next(); return; } NLIKE { this._foundLexeme("NLIKE"); this._set_next(); return; } AND { this._foundLexeme("AND"); this._set_next(); return; } OR { this._foundLexeme("OR"); this._set_next(); return; } NOT { this._foundLexeme("NOT"); this._set_next(); return; } ADDRESS { this._foundLexeme("ADDRESS"); this._set_next(); return; } TIME { this._foundLexeme("TIME"); this._set_next(); return; } TIMEDIFF { this._foundLexeme("TIMEDIFF"); this._set_next(); return; } INTEGER_LITERAL { this._foundLexeme("INTEGER_LITERAL"); this._set_next(); return; } FLOAT_LITERAL { this._foundLexeme("FLOAT_LITERAL"); this._set_next(); return; } BOOL_LITERAL { this._foundLexeme("BOOL_LITERAL"); this._set_next(); return; } ID { this._foundLexeme("ID"); this._set_next(); return; } DELIM { this._set_next(); break; } QU|SQU { id = 100000000; break;} [^] { this._unknownSymbol(); this._set_next(); return; } */ENDER} } }; var print_f = function() { var r_str = ""; var next = arguments[0]; var rx = /(%[a-zA-Z]{1})/; var a = 1, match; while (match = rx.exec(next)) { var prev = next.substring(0, match.index); var macro = next.substring(match.index + 1, match.index + 2); next = next.substring(match.index + 2, next.length); r_str += prev; var arg = arguments[a]; if (arg !== undefined) { switch (macro) { case "s": r_str += arg.toString(); break; case "i": r_str += parseInt(arg); break; case "f": r_str += parseFloat(arg); break; } } else { r_str += "%" + macro; } a++; } r_str += next; return r_str; }; console.log("TEST SINGLE"); (new Lexer("[")).search(); (new Lexer("]")).search(); (new Lexer("(")).search(); (new Lexer(")").search()); (new Lexer(":")).search(); (new Lexer(",")).search(); (new Lexer(".")).search(); (new Lexer("%")).search(); (new Lexer(">")).search(); (new Lexer(">=")).search(); (new Lexer("<")).search(); (new Lexer("<=")).search(); (new Lexer("==")).search(); (new Lexer("!=")).search(); (new Lexer("AND")).search(); (new Lexer("and")).search(); (new Lexer("OR")).search(); (new Lexer("or")).search(); (new Lexer("NOT")).search(); (new Lexer("not")).search(); (new Lexer("LIKE")).search(); (new Lexer("like")).search(); (new Lexer("NLIKE")).search(); (new Lexer("nlike")).search(); (new Lexer("Address")).search(); (new Lexer("Time")).search(); (new Lexer("TimeDiff")).search(); var lex_test_all = new Lexer("[ ] ( ) : , . % > >= < <= == != AND and OR or NOT not LIKE like NLIKE nlike Address Time TimeDiff 'sdfadfasdf' \"asdfasfd\" ") var _lex; while(_lex = lex_test_all.next()){ console.log("IN while:", _lex.lexeme); } console.log("TEST STRING LITERAL"); (new Lexer(' "111\\\"11\\\"1" "222222" ')).search(); (new Lexer(" '111\\\'11\\\'1' '222222' ")).search();