TestATNLexerInterpreter + TestCharSupport done.

- Bug fixes in CharSupport and escape sequence parsing. - Bug fix in parse tree to AST converter for EBNF suffixes. - TestATNLexerInterpreter + TestCharSupport done. - Upgraded antlr4ng and stringtemplate4ts dependencies.
mike-lischke · Dec 1, 2024 · 0d44ae4 · 0d44ae4
1 parent 9a995f8
commit 0d44ae4
Show file tree

Hide file tree

Showing 20 changed files with 835 additions and 871 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -15,10 +15,10 @@
     ],
     "license": "MIT",
     "dependencies": {
-        "antlr4ng": "3.0.9",
+        "antlr4ng": "3.0.10",
         "commander": "12.1.0",
         "fast-printf": "1.6.9",
-        "stringtemplate4ts": "1.0.3",
+        "stringtemplate4ts": "1.0.4",
         "unicode-properties": "1.4.1"
     },
     "devDependencies": {

diff --git a/src/antlr3/tree/TreeParser.ts b/src/antlr3/tree/TreeParser.ts
@@ -213,8 +213,6 @@ export class TreeParser extends BaseRecognizer {
      *  from tree parser errors inline...
      */
     protected override recoverFromMismatchedToken(input: IntStream, ttype: number, follow: BitSet): CommonTree | null {
-        //throw new InputMismatchException(ttype, input as TreeNodeStream);
-
         throw new Error("recoverFromMismatchedToken");
     }
 

diff --git a/src/codegen/CodeGenerator.ts b/src/codegen/CodeGenerator.ts
@@ -90,9 +90,7 @@ export class CodeGenerator {
     }
 
     public generateLexer(header?: boolean): IST {
-        if (!header) {
-            return this.generateLexer(false);
-        }
+        header ??= false;
 
         return this.walk(this.createController().buildLexerOutputModel(header), header);
 
@@ -105,9 +103,7 @@ export class CodeGenerator {
     }
 
     public generateListener(header?: boolean): IST {
-        if (!header) {
-            return this.generateListener(false);
-        }
+        header ??= false;
 
         return this.walk(this.createController().buildListenerOutputModel(header), header);
 
@@ -122,17 +118,13 @@ export class CodeGenerator {
     }
 
     public generateVisitor(header?: boolean): IST {
-        if (!header) {
-            return this.generateVisitor(false);
-        }
+        header ??= false;
 
         return this.walk(this.createController().buildVisitorOutputModel(header), header);
     }
 
     public generateBaseVisitor(header?: boolean): IST {
-        if (!header) {
-            return this.generateBaseVisitor(false);
-        }
+        header ??= false;
 
         return this.walk(this.createController().buildBaseVisitorOutputModel(header), header);
     }

diff --git a/src/codegen/model/Lexer.ts b/src/codegen/model/Lexer.ts
@@ -16,7 +16,7 @@ export class Lexer extends Recognizer {
     public readonly escapedChannels = new Map<string, number>();
     public readonly file: LexerFile;
     public readonly modes: string[];
-    public readonly escapedModeNames: string[];
+    public readonly escapedModeNames: string[] = [];
 
     public actionFuncs = new Map<Rule, RuleActionFunction>();
 

diff --git a/src/misc/CharSupport.ts b/src/misc/CharSupport.ts
@@ -9,13 +9,26 @@ import { IntervalSet } from "antlr4ng";
 import { Character } from "../support/Character.js";
 
 export class CharSupport {
-    /**
-     * When converting ANTLR char and string literals, here is the
-     *  value set of escape chars.
-     */
-    public static readonly ANTLRLiteralEscapedCharValue: Record<string, number> = {};
+    /** When converting ANTLR char and string literals, here is the value set of escape chars. */
+    public static readonly ANTLRLiteralEscapedCharValue = new Map<string, number>([
+        ["n", "\n".codePointAt(0)!],
+        ["r", "\r".codePointAt(0)!],
+        ["t", "\t".codePointAt(0)!],
+        ["b", "\b".codePointAt(0)!],
+        ["f", "\f".codePointAt(0)!],
+        ["\\", "\\".codePointAt(0)!],
+    ]);
+
+    /** Given a char, we need to be able to show as an ANTLR literal. */
+    public static readonly ANTLRLiteralCharValueEscape = new Map<number, string>([
+        ["\n".codePointAt(0)!, "\\n"],
+        ["\r".codePointAt(0)!, "\\r"],
+        ["\t".codePointAt(0)!, "\\t"],
+        ["\b".codePointAt(0)!, "\\b"],
+        ["\f".codePointAt(0)!, "\\f"],
+        ["\\".codePointAt(0)!, "\\\\"],
+    ]);
 
-    private static readonly validEscapeCharacters: string = `btn\\fr"'`;
     /**
      * @param c The code point to convert to an ANTLR char literal.
      *
@@ -29,9 +42,9 @@ export class CharSupport {
         if (c < 0) {
             result = "<INVALID>";
         } else {
-            const char = String.fromCodePoint(c);
-            if (this.validEscapeCharacters.includes(char)) {
-                result = char;
+            const charValueEscape = this.ANTLRLiteralCharValueEscape.get(c);
+            if (charValueEscape) {
+                result = charValueEscape;
             } else {
                 if (Character.UnicodeBlock.of(c) === Character.UnicodeBlock.BASIC_LATIN &&
                     !Character.isISOControl(c)) {
@@ -41,7 +54,7 @@ export class CharSupport {
                         if (c === 0x27) { // escape single quote
                             result = "\\'";
                         } else {
-                            result = char;
+                            result = String.fromCodePoint(c);
                         }
                     }
                 } else {
@@ -145,23 +158,23 @@ export class CharSupport {
     public static getCharValueFromCharInGrammarLiteral(cstr: string): number {
         switch (cstr.length) {
             case 1: { // 'x'
+                // no escape char
                 return cstr.codePointAt(0)!;
             }
 
-            // no escape char
             case 2: {
                 if (!cstr.startsWith("\\")) {
                     return -1;
                 }
 
                 // '\x'  (antlr lexer will catch invalid char)
-                const escChar = cstr.codePointAt(0)!;
-                if (escChar === 0x27) { // '
-                    return escChar;
+                const escapedChar = cstr[1];
+                if (escapedChar === "'") {
+                    return escapedChar.codePointAt(0)!;
                 }
 
                 // escape quote only in string literals.
-                return this.validEscapeCharacters.includes(cstr[0]) ? escChar : -1;
+                return this.ANTLRLiteralEscapedCharValue.get(escapedChar) ?? -1;
             }
 
             case 6: {
@@ -202,7 +215,7 @@ export class CharSupport {
         const unicodeChars = cstr.substring(startOff, endOff);
         const result = parseInt(unicodeChars, 16);
 
-        return result;
+        return isNaN(result) ? -1 : result;
     }
 
     public static capitalize(s: string): string {
@@ -224,14 +237,4 @@ export class CharSupport {
             CharSupport.getANTLRCharLiteralForChar(codePointEnd)
             : CharSupport.getANTLRCharLiteralForChar(codePointStart);
     }
-
-    static {
-        CharSupport.ANTLRLiteralEscapedCharValue.n = 0x0A; // '\n'
-        CharSupport.ANTLRLiteralEscapedCharValue.r = 0x0D; // '\r'
-        CharSupport.ANTLRLiteralEscapedCharValue.t = 0x09; // '\t'
-        CharSupport.ANTLRLiteralEscapedCharValue.b = 0x08; // '\b'
-        CharSupport.ANTLRLiteralEscapedCharValue.f = 0x0C; // '\f'
-        CharSupport.ANTLRLiteralEscapedCharValue["\\"] = 0x5C; // '\\'
-        CharSupport.ANTLRLiteralEscapedCharValue[0x5C] = 0x5C;
-    }
 };
diff --git a/src/misc/EscapeSequenceParsing.ts b/src/misc/EscapeSequenceParsing.ts
@@ -129,26 +129,22 @@ export abstract class EscapeSequenceParsing {
                     parseLength: offset - startOff,
                 };
             } else {
-                if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
-                    let codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
-                    if (codePoint === 0) {
-                        if (escaped !== 0x5D && escaped !== 0x2D) { // escape ']' and '-' only in char sets.
-                            return EscapeSequenceParsing.invalid(startOff, startOff + 1);
-                        } else {
-                            codePoint = escaped;
-                        }
+                let codePoint = CharSupport.ANTLRLiteralEscapedCharValue.get(s[offset - 1]);
+                if (codePoint === undefined) {
+                    if (escaped !== 0x5D && escaped !== 0x2D) { // escape ']' and '-' only in char sets.
+                        return EscapeSequenceParsing.invalid(startOff, startOff + 1);
+                    } else {
+                        codePoint = escaped;
                     }
-
-                    return {
-                        type: ResultType.CodePoint,
-                        codePoint,
-                        propertyIntervalSet: EscapeSequenceParsing.#emptySet,
-                        startOffset: startOff,
-                        parseLength: offset - startOff,
-                    };
-                } else {
-                    return EscapeSequenceParsing.invalid(startOff, s.length - 1);
                 }
+
+                return {
+                    type: ResultType.CodePoint,
+                    codePoint,
+                    propertyIntervalSet: EscapeSequenceParsing.#emptySet,
+                    startOffset: startOff,
+                    parseLength: offset - startOff,
+                };
             }
         }
     }

diff --git a/src/support/ParseTreeToASTConverter.ts b/src/support/ParseTreeToASTConverter.ts
@@ -345,15 +345,27 @@ export class ParseTreeToASTConverter {
 
     public static convertEbnfSuffixToAST(ebnfSuffix: EbnfSuffixContext, ast: GrammarAST): GrammarAST | undefined {
         let blockAST;
-        if (ebnfSuffix.QUESTION().length > 0) {
-            blockAST = new OptionalBlockAST(ANTLRv4Lexer.OPTIONAL, this.createToken(ANTLRv4Lexer.OPTIONAL, ebnfSuffix),
-                ebnfSuffix.QUESTION().length === 1);
-        } else if (ebnfSuffix.STAR()) {
-            blockAST = new StarBlockAST(ANTLRv4Parser.CLOSURE, this.createToken(ANTLRv4Parser.STAR, ebnfSuffix),
-                ebnfSuffix.QUESTION().length === 0);
-        } else if (ebnfSuffix.PLUS()) {
-            blockAST = new PlusBlockAST(ANTLRv4Parser.POSITIVE_CLOSURE,
-                this.createToken(ANTLRv4Parser.PLUS, ebnfSuffix), ebnfSuffix.QUESTION().length === 0);
+        const first = ebnfSuffix.getChild(0) as TerminalNode;
+        switch (first.symbol.type) {
+            case ANTLRv4Parser.QUESTION: {
+                blockAST = new OptionalBlockAST(ANTLRv4Lexer.OPTIONAL, this.createToken(ANTLRv4Lexer.OPTIONAL,
+                    ebnfSuffix), ebnfSuffix.QUESTION().length === 1);
+                break;
+            }
+
+            case ANTLRv4Parser.STAR: {
+                blockAST = new StarBlockAST(ANTLRv4Lexer.CLOSURE, this.createToken(ANTLRv4Lexer.CLOSURE, ebnfSuffix),
+                    ebnfSuffix.QUESTION().length === 0);
+                break;
+            }
+
+            case ANTLRv4Parser.PLUS: {
+                blockAST = new PlusBlockAST(ANTLRv4Lexer.POSITIVE_CLOSURE,
+                    this.createToken(ANTLRv4Lexer.POSITIVE_CLOSURE, ebnfSuffix), ebnfSuffix.QUESTION().length === 0);
+                break;
+            }
+
+            default:
         }
 
         ast.addChild(blockAST);
@@ -882,6 +894,10 @@ export class ParseTreeToASTConverter {
             token.column = context.start!.column;
         } else {
             token = CommonToken.fromToken(context.symbol);
+            token.type = type;
+            token.tokenIndex = context.symbol.tokenIndex;
+            token.line = context.symbol.line;
+            token.column = context.symbol.column;
         }
 
         if (text) {

diff --git a/src/tree-walkers/GrammarTreeVisitor.ts b/src/tree-walkers/GrammarTreeVisitor.ts
@@ -980,7 +980,7 @@ export class GrammarTreeVisitor extends TreeParser {
             // org/antlr/v4/parse/GrammarTreeVisitor.g:401:5: ( ^(a= ASSIGN ID v= optionValue ) )
             // org/antlr/v4/parse/GrammarTreeVisitor.g:401:9: ^(a= ASSIGN ID v= optionValue )
             {
-                //const a = this.match(this.input!, ANTLRv4Parser.ASSIGN, null) as GrammarAST | null;
+                this.match(this.input, ANTLRv4Parser.ASSIGN, null) as GrammarAST | null;
                 this.match(this.input, Constants.DOWN, null);
                 ID4 = this.match(this.input, ANTLRv4Parser.ID, null) as GrammarAST | null;
                 v = this.optionValue();

diff --git a/tests/TestASTStructure.spec.ts b/tests/TestASTStructure.spec.ts
@@ -1,7 +1,6 @@
 /*
- * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
+ * Copyright (c) Mike Lischke. All rights reserved.
+ * Licensed under the MIT License. See License.txt in the project root for license information.
  */
 
 /* eslint-disable max-len */

diff --git a/tests/TestATNConstruction.spec.ts b/tests/TestATNConstruction.spec.ts
@@ -1,7 +1,6 @@
 /*
- * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
+ * Copyright (c) Mike Lischke. All rights reserved.
+ * Licensed under the MIT License. See License.txt in the project root for license information.
  */
 
 import { describe, expect, it } from "vitest";

diff --git a/tests/TestATNDeserialization.spec.ts b/tests/TestATNDeserialization.spec.ts
@@ -1,7 +1,6 @@
 /*
- * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
+ * Copyright (c) Mike Lischke. All rights reserved.
+ * Licensed under the MIT License. See License.txt in the project root for license information.
  */
 
 import { describe, expect, it } from "vitest";

diff --git a/tests/TestATNInterpreter.spec.ts b/tests/TestATNInterpreter.spec.ts
@@ -1,7 +1,6 @@
 /*
- * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
- * Use of this file is governed by the BSD 3-clause license that
- * can be found in the LICENSE.txt file in the project root.
+ * Copyright (c) Mike Lischke. All rights reserved.
+ * Licensed under the MIT License. See License.txt in the project root for license information.
  */
 
 // cspell: ignore abdc aaaaaab aabd