Skip to content

Commit

Permalink
TestATNLexerInterpreter + TestCharSupport done.
Browse files Browse the repository at this point in the history
- Bug fixes in CharSupport and escape sequence parsing.
- Bug fix in parse tree to AST converter for EBNF suffixes.
- TestATNLexerInterpreter + TestCharSupport done.
- Upgraded antlr4ng and stringtemplate4ts dependencies.
  • Loading branch information
mike-lischke committed Dec 1, 2024
1 parent 9a995f8 commit 0d44ae4
Show file tree
Hide file tree
Showing 20 changed files with 835 additions and 871 deletions.
16 changes: 8 additions & 8 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
],
"license": "MIT",
"dependencies": {
"antlr4ng": "3.0.9",
"antlr4ng": "3.0.10",
"commander": "12.1.0",
"fast-printf": "1.6.9",
"stringtemplate4ts": "1.0.3",
"stringtemplate4ts": "1.0.4",
"unicode-properties": "1.4.1"
},
"devDependencies": {
Expand Down
2 changes: 0 additions & 2 deletions src/antlr3/tree/TreeParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,6 @@ export class TreeParser extends BaseRecognizer {
* from tree parser errors inline...
*/
protected override recoverFromMismatchedToken(input: IntStream, ttype: number, follow: BitSet): CommonTree | null {
//throw new InputMismatchException(ttype, input as TreeNodeStream);

throw new Error("recoverFromMismatchedToken");
}

Expand Down
16 changes: 4 additions & 12 deletions src/codegen/CodeGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,7 @@ export class CodeGenerator {
}

public generateLexer(header?: boolean): IST {
if (!header) {
return this.generateLexer(false);
}
header ??= false;

return this.walk(this.createController().buildLexerOutputModel(header), header);

Expand All @@ -105,9 +103,7 @@ export class CodeGenerator {
}

public generateListener(header?: boolean): IST {
if (!header) {
return this.generateListener(false);
}
header ??= false;

return this.walk(this.createController().buildListenerOutputModel(header), header);

Expand All @@ -122,17 +118,13 @@ export class CodeGenerator {
}

public generateVisitor(header?: boolean): IST {
if (!header) {
return this.generateVisitor(false);
}
header ??= false;

return this.walk(this.createController().buildVisitorOutputModel(header), header);
}

public generateBaseVisitor(header?: boolean): IST {
if (!header) {
return this.generateBaseVisitor(false);
}
header ??= false;

return this.walk(this.createController().buildBaseVisitorOutputModel(header), header);
}
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/model/Lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export class Lexer extends Recognizer {
public readonly escapedChannels = new Map<string, number>();
public readonly file: LexerFile;
public readonly modes: string[];
public readonly escapedModeNames: string[];
public readonly escapedModeNames: string[] = [];

public actionFuncs = new Map<Rule, RuleActionFunction>();

Expand Down
55 changes: 29 additions & 26 deletions src/misc/CharSupport.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,26 @@ import { IntervalSet } from "antlr4ng";
import { Character } from "../support/Character.js";

export class CharSupport {
/**
* When converting ANTLR char and string literals, here is the
* value set of escape chars.
*/
public static readonly ANTLRLiteralEscapedCharValue: Record<string, number> = {};
/** When converting ANTLR char and string literals, here is the value set of escape chars. */
public static readonly ANTLRLiteralEscapedCharValue = new Map<string, number>([
["n", "\n".codePointAt(0)!],
["r", "\r".codePointAt(0)!],
["t", "\t".codePointAt(0)!],
["b", "\b".codePointAt(0)!],
["f", "\f".codePointAt(0)!],
["\\", "\\".codePointAt(0)!],
]);

/** Given a char, we need to be able to show as an ANTLR literal. */
public static readonly ANTLRLiteralCharValueEscape = new Map<number, string>([
["\n".codePointAt(0)!, "\\n"],
["\r".codePointAt(0)!, "\\r"],
["\t".codePointAt(0)!, "\\t"],
["\b".codePointAt(0)!, "\\b"],
["\f".codePointAt(0)!, "\\f"],
["\\".codePointAt(0)!, "\\\\"],
]);

private static readonly validEscapeCharacters: string = `btn\\fr"'`;
/**
* @param c The code point to convert to an ANTLR char literal.
*
Expand All @@ -29,9 +42,9 @@ export class CharSupport {
if (c < 0) {
result = "<INVALID>";
} else {
const char = String.fromCodePoint(c);
if (this.validEscapeCharacters.includes(char)) {
result = char;
const charValueEscape = this.ANTLRLiteralCharValueEscape.get(c);
if (charValueEscape) {
result = charValueEscape;
} else {
if (Character.UnicodeBlock.of(c) === Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl(c)) {
Expand All @@ -41,7 +54,7 @@ export class CharSupport {
if (c === 0x27) { // escape single quote
result = "\\'";
} else {
result = char;
result = String.fromCodePoint(c);
}
}
} else {
Expand Down Expand Up @@ -145,23 +158,23 @@ export class CharSupport {
public static getCharValueFromCharInGrammarLiteral(cstr: string): number {
switch (cstr.length) {
case 1: { // 'x'
// no escape char
return cstr.codePointAt(0)!;
}

// no escape char
case 2: {
if (!cstr.startsWith("\\")) {
return -1;
}

// '\x' (antlr lexer will catch invalid char)
const escChar = cstr.codePointAt(0)!;
if (escChar === 0x27) { // '
return escChar;
const escapedChar = cstr[1];
if (escapedChar === "'") {
return escapedChar.codePointAt(0)!;
}

// escape quote only in string literals.
return this.validEscapeCharacters.includes(cstr[0]) ? escChar : -1;
return this.ANTLRLiteralEscapedCharValue.get(escapedChar) ?? -1;
}

case 6: {
Expand Down Expand Up @@ -202,7 +215,7 @@ export class CharSupport {
const unicodeChars = cstr.substring(startOff, endOff);
const result = parseInt(unicodeChars, 16);

return result;
return isNaN(result) ? -1 : result;
}

public static capitalize(s: string): string {
Expand All @@ -224,14 +237,4 @@ export class CharSupport {
CharSupport.getANTLRCharLiteralForChar(codePointEnd)
: CharSupport.getANTLRCharLiteralForChar(codePointStart);
}

static {
CharSupport.ANTLRLiteralEscapedCharValue.n = 0x0A; // '\n'
CharSupport.ANTLRLiteralEscapedCharValue.r = 0x0D; // '\r'
CharSupport.ANTLRLiteralEscapedCharValue.t = 0x09; // '\t'
CharSupport.ANTLRLiteralEscapedCharValue.b = 0x08; // '\b'
CharSupport.ANTLRLiteralEscapedCharValue.f = 0x0C; // '\f'
CharSupport.ANTLRLiteralEscapedCharValue["\\"] = 0x5C; // '\\'
CharSupport.ANTLRLiteralEscapedCharValue[0x5C] = 0x5C;
}
};
32 changes: 14 additions & 18 deletions src/misc/EscapeSequenceParsing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,26 +129,22 @@ export abstract class EscapeSequenceParsing {
parseLength: offset - startOff,
};
} else {
if (escaped < CharSupport.ANTLRLiteralEscapedCharValue.length) {
let codePoint = CharSupport.ANTLRLiteralEscapedCharValue[escaped];
if (codePoint === 0) {
if (escaped !== 0x5D && escaped !== 0x2D) { // escape ']' and '-' only in char sets.
return EscapeSequenceParsing.invalid(startOff, startOff + 1);
} else {
codePoint = escaped;
}
let codePoint = CharSupport.ANTLRLiteralEscapedCharValue.get(s[offset - 1]);
if (codePoint === undefined) {
if (escaped !== 0x5D && escaped !== 0x2D) { // escape ']' and '-' only in char sets.
return EscapeSequenceParsing.invalid(startOff, startOff + 1);
} else {
codePoint = escaped;
}

return {
type: ResultType.CodePoint,
codePoint,
propertyIntervalSet: EscapeSequenceParsing.#emptySet,
startOffset: startOff,
parseLength: offset - startOff,
};
} else {
return EscapeSequenceParsing.invalid(startOff, s.length - 1);
}

return {
type: ResultType.CodePoint,
codePoint,
propertyIntervalSet: EscapeSequenceParsing.#emptySet,
startOffset: startOff,
parseLength: offset - startOff,
};
}
}
}
Expand Down
34 changes: 25 additions & 9 deletions src/support/ParseTreeToASTConverter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -345,15 +345,27 @@ export class ParseTreeToASTConverter {

public static convertEbnfSuffixToAST(ebnfSuffix: EbnfSuffixContext, ast: GrammarAST): GrammarAST | undefined {
let blockAST;
if (ebnfSuffix.QUESTION().length > 0) {
blockAST = new OptionalBlockAST(ANTLRv4Lexer.OPTIONAL, this.createToken(ANTLRv4Lexer.OPTIONAL, ebnfSuffix),
ebnfSuffix.QUESTION().length === 1);
} else if (ebnfSuffix.STAR()) {
blockAST = new StarBlockAST(ANTLRv4Parser.CLOSURE, this.createToken(ANTLRv4Parser.STAR, ebnfSuffix),
ebnfSuffix.QUESTION().length === 0);
} else if (ebnfSuffix.PLUS()) {
blockAST = new PlusBlockAST(ANTLRv4Parser.POSITIVE_CLOSURE,
this.createToken(ANTLRv4Parser.PLUS, ebnfSuffix), ebnfSuffix.QUESTION().length === 0);
const first = ebnfSuffix.getChild(0) as TerminalNode;
switch (first.symbol.type) {
case ANTLRv4Parser.QUESTION: {
blockAST = new OptionalBlockAST(ANTLRv4Lexer.OPTIONAL, this.createToken(ANTLRv4Lexer.OPTIONAL,
ebnfSuffix), ebnfSuffix.QUESTION().length === 1);
break;
}

case ANTLRv4Parser.STAR: {
blockAST = new StarBlockAST(ANTLRv4Lexer.CLOSURE, this.createToken(ANTLRv4Lexer.CLOSURE, ebnfSuffix),
ebnfSuffix.QUESTION().length === 0);
break;
}

case ANTLRv4Parser.PLUS: {
blockAST = new PlusBlockAST(ANTLRv4Lexer.POSITIVE_CLOSURE,
this.createToken(ANTLRv4Lexer.POSITIVE_CLOSURE, ebnfSuffix), ebnfSuffix.QUESTION().length === 0);
break;
}

default:
}

ast.addChild(blockAST);
Expand Down Expand Up @@ -882,6 +894,10 @@ export class ParseTreeToASTConverter {
token.column = context.start!.column;
} else {
token = CommonToken.fromToken(context.symbol);
token.type = type;
token.tokenIndex = context.symbol.tokenIndex;
token.line = context.symbol.line;
token.column = context.symbol.column;
}

if (text) {
Expand Down
2 changes: 1 addition & 1 deletion src/tree-walkers/GrammarTreeVisitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -980,7 +980,7 @@ export class GrammarTreeVisitor extends TreeParser {
// org/antlr/v4/parse/GrammarTreeVisitor.g:401:5: ( ^(a= ASSIGN ID v= optionValue ) )
// org/antlr/v4/parse/GrammarTreeVisitor.g:401:9: ^(a= ASSIGN ID v= optionValue )
{
//const a = this.match(this.input!, ANTLRv4Parser.ASSIGN, null) as GrammarAST | null;
this.match(this.input, ANTLRv4Parser.ASSIGN, null) as GrammarAST | null;
this.match(this.input, Constants.DOWN, null);
ID4 = this.match(this.input, ANTLRv4Parser.ID, null) as GrammarAST | null;
v = this.optionValue();
Expand Down
5 changes: 2 additions & 3 deletions tests/TestASTStructure.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
* Copyright (c) Mike Lischke. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*/

/* eslint-disable max-len */
Expand Down
5 changes: 2 additions & 3 deletions tests/TestATNConstruction.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
* Copyright (c) Mike Lischke. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*/

import { describe, expect, it } from "vitest";
Expand Down
5 changes: 2 additions & 3 deletions tests/TestATNDeserialization.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
* Copyright (c) Mike Lischke. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*/

import { describe, expect, it } from "vitest";
Expand Down
5 changes: 2 additions & 3 deletions tests/TestATNInterpreter.spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
* Copyright (c) Mike Lischke. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*/

// cspell: ignore abdc aaaaaab aabd
Expand Down
Loading

0 comments on commit 0d44ae4

Please sign in to comment.