From 7f8ea12ffbf7374ba7f919e794af8840fe6b89fd Mon Sep 17 00:00:00 2001 From: Aleksey Chichenkov Date: Wed, 30 Jan 2019 13:13:05 +0300 Subject: [PATCH] fix rules and literal --- example/lexer.js | 421 +++++++++++++++++++++---------- example/rules.y | 38 +-- example/test_code_environment.js | 17 +- output/parser.y | 61 ++++- output/rules.y | 38 +-- test.js | 3 +- 6 files changed, 353 insertions(+), 225 deletions(-) diff --git a/example/lexer.js b/example/lexer.js index 790be74..612b0d0 100644 --- a/example/lexer.js +++ b/example/lexer.js @@ -1,4 +1,4 @@ -/* Generated by re2c 1.0.3 on Tue Jan 29 10:00:10 2019 */ +/* Generated by re2c 1.0.3 on Wed Jan 30 13:09:24 2019 */ var types = [ "SLASH", "LSB", @@ -22,6 +22,7 @@ var types = [ "NOT", "ADDRESS", "OID", + "OID_LITERAL", "TIME", "TIMEDIFF", "INTEGER_LITERAL", @@ -75,6 +76,26 @@ Lexer.prototype = { console.log(print_f("Found unknown symbol on position: %s", this._yy_cursor)); }, + _oidUnexpectedSymbol: function() { + this._error = true; + this._last_found_lexeme = { + error: 3, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log(print_f("Found unknown symbol in Oid on position: %s", this._yy_cursor)); + }, + _oidNotFoundCloseBracket: function() { + this._error = true; + this._last_found_lexeme = { + error: 4, + start: this._yy_lex_start, + end: this._yy_cursor + }; + + console.log(print_f("Not found close bracket for Oid")); + }, _foundLexeme: function(_lexeme) { console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); this._last_found_lexeme = { @@ -85,6 +106,18 @@ Lexer.prototype = { end: this._yy_cursor }; }, + _foundOidLexeme: function(_lexeme, _lsb, _rsb) { + console.log(print_f("found lex: %s; start: %s; end: %s; result => %s", _lexeme, this._yy_lex_start, this._yy_cursor, this._string.substring(this._yy_lex_start, this._yy_cursor))); + this._last_found_lexeme = { + error: 0, + lexeme: _lexeme, + value: this._string.substring(this._yy_lex_start, this._yy_cursor), + start: this._yy_lex_start, + end: this._yy_cursor, + lsb: _lsb, + rsb: _rsb + }; + }, _endOfString: function() { console.log(print_f("search end\n")); this._end = true; @@ -131,6 +164,124 @@ Lexer.prototype = { this._notFoundCloseQuote(); }, + _searchOid: function() { + var lsb, rsb; + var state = 0; + while (this._yy_cursor < this._string.length) { + switch (state) { + case 0: + this._yy_char = this._string[this._yy_cursor]; + (function() { + switch (this._yy_char) { + case " ": + state = 0; + this._yy_cursor++; + break; + case "[": + lsb = { + start: this._yy_cursor, + end: this._yy_cursor + 1 + }; + state = 1; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 1: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case " ": + state = 1; + break; + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + state = 2; + break; + case "]": + state = 4; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 2: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case " ": + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + state = 2; + break; + case ".": + state = 3; + break; + case "]": + state = 4; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 3: + this._yy_char = this._string[++this._yy_cursor]; + (function() { + switch (this._yy_char) { + case " ": + case "0": + case "1": + case "2": + case "3": + case "4": + case "5": + case "6": + case "7": + case "8": + case "9": + state = 2; + break; + default: + state = 5; + } + }.bind(this))(); + break; + case 4: + rsb = { + start: this._yy_cursor, + end: this._yy_cursor + 1 + }; + ++this._yy_cursor; + this._foundOidLexeme("OID_LITERAL", lsb, rsb); + return; + case 5: + this._oidUnexpectedSymbol(); + return; + } + } + + this._oidNotFoundCloseBracket(); + }, _set_next: function() { this._yy_accept = 0; this._state = 1; @@ -151,6 +302,8 @@ Lexer.prototype = { while (true) { switch (this._state) { + + case 1: this._yy_char = this._string[this._yy_cursor]; (function() { @@ -303,21 +456,21 @@ Lexer.prototype = { break; case 2: ++this._yy_cursor; { - this._endOfString(); - return; - } + this._endOfString(); + return; + } case 4: ++this._yy_cursor; case 5: - { - this._unknownSymbol();this._set_next(); - return; - } + { + this._unknownSymbol();this._set_next(); + return; + } case 6: ++this._yy_cursor; { - this._set_next(); - break; - } + this._set_next(); + break; + } case 8: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -333,33 +486,33 @@ Lexer.prototype = { break; case 9: ++this._yy_cursor; { - this._state = 100000000; - break; - } + this._state = 100000000; + break; + } case 11: ++this._yy_cursor; { - this._foundLexeme("REM"); - this._set_next(); - return; - } + this._foundLexeme("REM"); + this._set_next(); + return; + } case 13: ++this._yy_cursor; { - this._foundLexeme("LCB"); - this._set_next(); - return; - } + this._foundLexeme("LCB"); + this._set_next(); + return; + } case 15: ++this._yy_cursor; { - this._foundLexeme("RCB"); - this._set_next(); - return; - } + this._foundLexeme("RCB"); + this._set_next(); + return; + } case 17: ++this._yy_cursor; { - this._foundLexeme("COMMA"); - this._set_next(); - return; - } + this._foundLexeme("COMMA"); + this._set_next(); + return; + } case 19: this._yy_accept = 0; this._yy_char = this._string[(this._yy_marker = ++this._yy_cursor)]; @@ -409,16 +562,16 @@ Lexer.prototype = { }.bind(this))(); break; case 21: - { - this._foundLexeme("DOT");this._set_next(); - return; - } + { + this._foundLexeme("DOT");this._set_next(); + return; + } case 22: ++this._yy_cursor; { - this._foundLexeme("SLASH"); - this._set_next(); - return; - } + this._foundLexeme("SLASH"); + this._set_next(); + return; + } case 24: this._yy_accept = 1; this._yy_char = this._string[(this._yy_marker = ++this._yy_cursor)]; @@ -446,16 +599,16 @@ Lexer.prototype = { }.bind(this))(); break; case 26: - { - this._foundLexeme("INTEGER_LITERAL");this._set_next(); - return; - } + { + this._foundLexeme("INTEGER_LITERAL");this._set_next(); + return; + } case 27: ++this._yy_cursor; { - this._foundLexeme("COLON"); - this._set_next(); - return; - } + this._foundLexeme("COLON"); + this._set_next(); + return; + } case 29: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -470,10 +623,10 @@ Lexer.prototype = { }.bind(this))(); break; case 30: - { - this._foundLexeme("LT");this._set_next(); - return; - } + { + this._foundLexeme("LT");this._set_next(); + return; + } case 31: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -501,10 +654,10 @@ Lexer.prototype = { }.bind(this))(); break; case 33: - { - this._foundLexeme("GT");this._set_next(); - return; - } + { + this._foundLexeme("GT");this._set_next(); + return; + } case 34: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -523,10 +676,10 @@ Lexer.prototype = { }.bind(this))(); break; case 35: - { - this._foundLexeme("ID");this._set_next(); - return; - } + { + this._foundLexeme("ID");this._set_next(); + return; + } case 36: this._yy_char = this._string[++this._yy_cursor]; case 37: @@ -685,16 +838,16 @@ Lexer.prototype = { break; case 43: ++this._yy_cursor; { - this._foundLexeme("LSB"); - this._set_next(); - return; - } + this._foundLexeme("LSB"); + this._set_next(); + return; + } case 45: ++this._yy_cursor; { - this._foundLexeme("RSB"); - this._set_next(); - return; - } + this._foundLexeme("RSB"); + this._set_next(); + return; + } case 47: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -739,10 +892,10 @@ Lexer.prototype = { break; case 50: ++this._yy_cursor; { - this._foundLexeme("NEQ"); - this._set_next(); - return; - } + this._foundLexeme("NEQ"); + this._set_next(); + return; + } case 52: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -805,28 +958,28 @@ Lexer.prototype = { }.bind(this))(); break; case 56: - { - this._foundLexeme("FLOAT_LITERAL");this._set_next(); - return; - } + { + this._foundLexeme("FLOAT_LITERAL");this._set_next(); + return; + } case 57: ++this._yy_cursor; { - this._foundLexeme("LTE"); - this._set_next(); - return; - } + this._foundLexeme("LTE"); + this._set_next(); + return; + } case 59: ++this._yy_cursor; { - this._foundLexeme("EQ"); - this._set_next(); - return; - } + this._foundLexeme("EQ"); + this._set_next(); + return; + } case 61: ++this._yy_cursor; { - this._foundLexeme("GTE"); - this._set_next(); - return; - } + this._foundLexeme("GTE"); + this._set_next(); + return; + } case 63: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -986,10 +1139,10 @@ Lexer.prototype = { }.bind(this))(); break; case 70: - { - this._foundLexeme("OR");this._set_next(); - return; - } + { + this._foundLexeme("OR");this._set_next(); + return; + } case 71: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1131,10 +1284,10 @@ Lexer.prototype = { }.bind(this))(); break; case 76: - { - this._foundLexeme("AND");this._set_next(); - return; - } + { + this._foundLexeme("AND");this._set_next(); + return; + } case 77: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1266,10 +1419,10 @@ Lexer.prototype = { }.bind(this))(); break; case 82: - { - this._foundLexeme("NOT");this._set_next(); - return; - } + { + this._foundLexeme("NOT");this._set_next(); + return; + } case 83: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1346,10 +1499,10 @@ Lexer.prototype = { }.bind(this))(); break; case 84: - { - this._foundLexeme("OID");this._set_next(); - return; - } + { + this._state = 100000001; + break; + } case 85: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1510,10 +1663,10 @@ Lexer.prototype = { }.bind(this))(); break; case 92: - { - this._foundLexeme("LIKE");this._set_next(); - return; - } + { + this._foundLexeme("LIKE");this._set_next(); + return; + } case 93: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1604,10 +1757,10 @@ Lexer.prototype = { }.bind(this))(); break; case 95: - { - this._foundLexeme("BOOL_LITERAL");this._set_next(); - return; - } + { + this._foundLexeme("BOOL_LITERAL");this._set_next(); + return; + } case 96: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1686,10 +1839,10 @@ Lexer.prototype = { }.bind(this))(); break; case 97: - { - this._foundLexeme("TIME");this._set_next(); - return; - } + { + this._foundLexeme("TIME");this._set_next(); + return; + } case 98: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1779,10 +1932,10 @@ Lexer.prototype = { }.bind(this))(); break; case 100: - { - this._foundLexeme("NLIKE");this._set_next(); - return; - } + { + this._foundLexeme("NLIKE");this._set_next(); + return; + } case 101: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1898,10 +2051,10 @@ Lexer.prototype = { }.bind(this))(); break; case 105: - { - this._foundLexeme("ADDRESS");this._set_next(); - return; - } + { + this._foundLexeme("ADDRESS");this._set_next(); + return; + } case 106: this._yy_char = this._string[++this._yy_cursor]; (function() { @@ -1991,21 +2144,31 @@ Lexer.prototype = { }.bind(this))(); break; case 108: - { - this._foundLexeme("TIMEDIFF");this._set_next(); - return; - } + { + this._foundLexeme("TIMEDIFF");this._set_next(); + return; + } + case 100000000: - { - this._searchString();this._set_next(); + this._searchString(); + this._set_next(); return; - } + + + case 100000001: + this._searchOid(); + this._set_next(); + return; + } } } }; + + + var print_f = function() { var r_str = ""; var next = arguments[0]; diff --git a/example/rules.y b/example/rules.y index 88f1adf..1450fbe 100644 --- a/example/rules.y +++ b/example/rules.y @@ -346,48 +346,16 @@ literal(A) ::= address_literal(B) . { A = B; } -oid_literal_content(A) ::= id(B) . { - A = new tokens.oid_literal_content({ - children: [B] - }); -} - -oid_literal_content(A) ::= oid_literal_content(B) DOT id(C) . { - B.add(C); - A = B; -} - -oid_literal_content_or_empty(A) ::= oid_literal_content(B) . { - A = B; -} - -oid_literal_content_or_empty(A) ::= . { - A = new tokens.oid_literal_content({ - children: [] - }); -} - -oid_literal(A) ::= OID(B) LSB(C) oid_literal_content_or_empty(D) RSB(E) . { +oid_literal(A) ::= OID_LITERAL(B) . { A = new tokens.oid_literal({ - children: D.children, keyword: new tokens.LEXEME({ type: B.lexeme, value: B.value, start: B.start, end: B.end }), - LSB: new tokens.LEXEME({ - type: C.lexeme, - value: C.value, - start: C.start, - end: C.end - }), - RSB: new tokens.LEXEME({ - type: E.lexeme, - value: E.value, - start: E.start, - end: E.end - }) + LSB: B.lsb, + RSB: B.rsb }); } diff --git a/example/test_code_environment.js b/example/test_code_environment.js index 922998b..f86cba0 100644 --- a/example/test_code_environment.js +++ b/example/test_code_environment.js @@ -501,14 +501,6 @@ var tokens = (function () { } }); - var oid_literal_content = std.class([Rule], { - constructor: function oid_literal_content(_options) { - var base = tools.merge({}, _options); - - Rule.call(this, base); - } - }); - var oid_literal = std.class([Rule], { constructor: function oid_literal(_options) { var base = tools.merge({ @@ -524,9 +516,11 @@ var tokens = (function () { this.RSB = base.RSB; }, position: function () { + var first_child = this.children[0]; + return { - start: this.keyword.start, - end: this.RSB.end + start: first_child.start, + end: first_child.end } } }); @@ -629,8 +623,7 @@ var tokens = (function () { // expr: expr, sub_expr: sub_expr, - address_literal_content: address_literal_content, - oid_literal_content: oid_literal_content, + address_literal_content: address_literal_content } })(); \ No newline at end of file diff --git a/output/parser.y b/output/parser.y index 870fcb1..5d3940a 100644 --- a/output/parser.y +++ b/output/parser.y @@ -21,23 +21,60 @@ var token; var lexemes = []; while (token = lexer.next()) { - if(_result.error) { - return { success: false } + switch(token.error){ + case 0: + console.log("PARSE", token.lexeme); + parser.parse(parser["TOKEN_" + token.lexeme], token); + lexemes.push(token); + break; + case 1: + return { + success: false, + message: "Found unknown symbol on position", + error: 1, + token: token + }; + case 2: + return { + success: false, + message: "Not found close quote", + error: 2, + token: token + }; + case 3: + return { + success: false, + message: "Unexpected symbol in oid structure", + error: 3, + token: token + }; + case 4: + return { + success: false, + message: "Not found close bracket for Oid", + error: 3, + token: token + }; } - if (token.error === 0) { - console.log("PARSE", token.lexeme); - parser.parse(parser["TOKEN_" + token.lexeme], token); - lexemes.push(token); + if(_result.error) { + return { + success: false, + message: "Syntax error", + error: 0 + } } } parser.parse(); - - return { - success: true, - tree: _result.root_node, - lexemes: lexemes - }; + if (_result.root_node !== undefined) { + return { + success: true, + tree: _result.root_node, + lexemes: lexemes + }; + } else { + return { success: false } + } }; } diff --git a/output/rules.y b/output/rules.y index 88f1adf..1450fbe 100644 --- a/output/rules.y +++ b/output/rules.y @@ -346,48 +346,16 @@ literal(A) ::= address_literal(B) . { A = B; } -oid_literal_content(A) ::= id(B) . { - A = new tokens.oid_literal_content({ - children: [B] - }); -} - -oid_literal_content(A) ::= oid_literal_content(B) DOT id(C) . { - B.add(C); - A = B; -} - -oid_literal_content_or_empty(A) ::= oid_literal_content(B) . { - A = B; -} - -oid_literal_content_or_empty(A) ::= . { - A = new tokens.oid_literal_content({ - children: [] - }); -} - -oid_literal(A) ::= OID(B) LSB(C) oid_literal_content_or_empty(D) RSB(E) . { +oid_literal(A) ::= OID_LITERAL(B) . { A = new tokens.oid_literal({ - children: D.children, keyword: new tokens.LEXEME({ type: B.lexeme, value: B.value, start: B.start, end: B.end }), - LSB: new tokens.LEXEME({ - type: C.lexeme, - value: C.value, - start: C.start, - end: C.end - }), - RSB: new tokens.LEXEME({ - type: E.lexeme, - value: E.value, - start: E.start, - end: E.end - }) + LSB: B.lsb, + RSB: B.rsb }); } diff --git a/test.js b/test.js index 0ab8164..f034eab 100644 --- a/test.js +++ b/test.js @@ -47,7 +47,7 @@ var test = function() { var test_not = LemonJS('not cab == false').tree; fs.writeFileSync("./tests/test_not.json", JSON.stringify(test_not, true, 3)); - var test_oid = LemonJS('abc == Oid [a.b.d]').tree; + var test_oid = LemonJS('abc == Oid[1.2.3] and abd == Oid [ 1.2.3 ]').tree; fs.writeFileSync("./tests/test_oid.json", JSON.stringify(test_oid, true, 3)); var test_timediff = LemonJS('add == TimeDiff [17924 15:01:24 441000]').tree; @@ -56,7 +56,6 @@ var test = function() { var test_time = LemonJS('add == Time [29/12/2019 15:01:24 441000]').tree; fs.writeFileSync("./tests/test_time.json", JSON.stringify(test_time, true, 3)); - var test_exp_1 = LemonJS('(add == Time [29/12/2019 15:01:24 441000]) and ddds == "sdfasdf" or a == 123 and v == 155').tree; fs.writeFileSync("./tests/test_exp_1.json", JSON.stringify(test_exp_1, true, 3));