Skip to content

Commit

Permalink
Big steps forward
Browse files Browse the repository at this point in the history
- Grammar parsing and tree walkers working.
- Semantic pipeline working (inclusive left recursion removal).
  • Loading branch information
mike-lischke committed Nov 13, 2024
1 parent 7a94e99 commit bae1768
Show file tree
Hide file tree
Showing 11 changed files with 477 additions and 375 deletions.
8 changes: 5 additions & 3 deletions src/analysis/LeftRecursiveRuleAnalyzer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,10 @@ export class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
// but do not support the ELEMENT_OPTIONS syntax. Make sure to not try
// and add the tokenIndex option when writing these tokens.
const noOptions = new IntervalSet();
const labeledSubTrees = t.getNodesWithType(IntervalSet.of(ANTLRv4Parser.PLUS_ASSIGN,
ANTLRv4Parser.PLUS_ASSIGN));
const typeSet = new IntervalSet();
typeSet.addOne(ANTLRv4Parser.ASSIGN);
typeSet.addOne(ANTLRv4Parser.PLUS_ASSIGN);
const labeledSubTrees = t.getNodesWithType(typeSet);
for (const sub of labeledSubTrees) {
noOptions.addOne(sub.getChild(0)!.getTokenStartIndex());
}
Expand All @@ -357,7 +359,7 @@ export class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
(tok.type === ANTLRv4Parser.TOKEN_REF ||
tok.type === ANTLRv4Parser.STRING_LITERAL ||
tok.type === ANTLRv4Parser.RULE_REF)) {
elementOptions += "tokenIndex=" + tok.tokenIndex;
// TODO elementOptions += "tokenIndex=" + tok.tokenIndex; unsupported 4 syntax
}

if (node instanceof GrammarASTWithOptions) {
Expand Down
23 changes: 12 additions & 11 deletions src/analysis/LeftRecursiveRuleTransformer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ import { Tool } from "../Tool.js";
import type { SupportedLanguage } from "../codegen/CodeGenerator.js";
import { ANTLRv4Lexer } from "../generated/ANTLRv4Lexer.js";
import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js";
import { OrderedHashMap } from "../misc/OrderedHashMap.js";
import { GrammarASTAdaptor } from "../parse/GrammarASTAdaptor.js";
import { ScopeParser } from "../parse/ScopeParser.js";
import { ToolANTLRParser } from "../parse/ToolANTLRParser.js";
import { BasicSemanticChecks } from "../semantics/BasicSemanticChecks.js";
import { RuleCollector } from "../semantics/RuleCollector.js";
import { ParseTreeToASTConverter } from "../support/ParseTreeToASTConverter.js";
import { isTokenName } from "../support/helpers.js";
import { DictType } from "../tool/DictType.js";
import { ErrorManager } from "../tool/ErrorManager.js";
Expand All @@ -38,7 +40,6 @@ import { GrammarRootAST } from "../tool/ast/GrammarRootAST.js";
import { RuleAST } from "../tool/ast/RuleAST.js";
import { LeftRecursiveRuleAltInfo } from "./LeftRecursiveRuleAltInfo.js";
import { LeftRecursiveRuleAnalyzer } from "./LeftRecursiveRuleAnalyzer.js";
import { ParseTreeToASTConverter } from "../support/ParseTreeToASTConverter.js";

/**
* Remove left-recursive rule refs, add precedence args to recursive rule refs.
Expand Down Expand Up @@ -115,6 +116,7 @@ export class LeftRecursiveRuleTransformer {
throw re;
}
}

if (!isLeftRec) {
return false;
}
Expand Down Expand Up @@ -162,7 +164,7 @@ export class LeftRecursiveRuleTransformer {
(r.ast.getChild(0) as GrammarAST).token!, r.name);
}

r.recOpAlts = new Map<number, LeftRecursiveRuleAltInfo>();
r.recOpAlts = new OrderedHashMap<number, LeftRecursiveRuleAltInfo>();
leftRecursiveRuleWalker.binaryAlts.forEach((value, key) => {
r.recOpAlts.set(key, value);
});
Expand Down Expand Up @@ -207,15 +209,14 @@ export class LeftRecursiveRuleTransformer {
public parseArtificialRule(g: Grammar, ruleText: string): RuleAST | undefined {
const stream = CharStream.fromString(ruleText);
const lexer = new ANTLRv4Lexer(stream);
const adaptor = new GrammarASTAdaptor(lexer.inputStream);
const tokens = new CommonTokenStream(lexer);
const p = new ToolANTLRParser(tokens, this.tool);
const ruleStart = null;

try {
const r = p.ruleSpec();
const root = new GrammarAST();
ParseTreeToASTConverter.convertRuleSpecToAST(r, root, adaptor);
ParseTreeToASTConverter.convertRuleSpecToAST(r, root);
const ruleAST = root.getChild(0) as RuleAST;

GrammarTransformPipeline.setGrammarPtr(g, ruleAST);
Expand All @@ -231,7 +232,7 @@ export class LeftRecursiveRuleTransformer {
}

/**
* <pre>
* ```
* (RULE e int _p (returns int v)
* (BLOCK
* (ALT
Expand All @@ -241,13 +242,13 @@ export class LeftRecursiveRuleTransformer {
* (ALT ID))
* (* (BLOCK
* (OPTIONS ...)
* (ALT {7 &gt;= $_p}? '*' (= b e) {$v = $a.v * $b.v;})
* (ALT {6 &gt;= $_p}? '+' (= b e) {$v = $a.v + $b.v;})
* (ALT {3 &gt;= $_p}? '++') (ALT {2 &gt;= $_p}? '--'))))))
* </pre>
* (ALT {7 >= $_p}? '*' (= b e) {$v = $a.v * $b.v;})
* (ALT {6 >= $_p}? '+' (= b e) {$v = $a.v + $b.v;})
* (ALT {3 >= $_p}? '++') (ALT {2 >= $_p}? '--'))))))
* ```
*/
public setAltASTPointers(r: LeftRecursiveRule, t: RuleAST): void {
const ruleBlk = t.getFirstChildWithType(ANTLRv4Parser.LPAREN) as BlockAST;
const ruleBlk = t.getFirstChildWithType(ANTLRv4Parser.BLOCK) as BlockAST;
const mainAlt = ruleBlk.getChild(0) as AltAST;
const primaryBlk = mainAlt.getChild(0) as BlockAST;
const opsBlk = mainAlt.getChild(1)!.getChild(0) as BlockAST; // (* BLOCK ...)
Expand All @@ -259,7 +260,7 @@ export class LeftRecursiveRuleTransformer {
}

for (let i = 0; i < r.recOpAlts.size; i++) {
const altInfo = r.recOpAlts.get(i)!;
const altInfo = r.recOpAlts.getElement(i)!;
altInfo.altAST = opsBlk.getChild(i) as AltAST;
altInfo.altAST.leftRecursiveAltInfo = altInfo;
altInfo.originalAltAST!.leftRecursiveAltInfo = altInfo;
Expand Down
2 changes: 1 addition & 1 deletion src/codegen/OutputModelController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ export class OutputModelController {
// Insert code in front of each op alt to create specialized ctx if there was an alt label
for (let i = 0; i < opAltsCode.length; i++) {
let altActionST: IST;
const altInfo = r.recOpAlts.get(i)!;
const altInfo = r.recOpAlts.getElement(i)!;
let templateName: string;
if (altInfo.altLabel !== undefined) {
templateName = "recRuleLabeledAltStartAction";
Expand Down
195 changes: 98 additions & 97 deletions src/grammars/predefined.tokens
Original file line number Diff line number Diff line change
@@ -1,97 +1,98 @@
ACTION = 4
ACTION_CHAR_LITERAL = 5
ACTION_ESC = 6
ACTION_STRING_LITERAL = 7
ARG_ACTION = 8
ARG_OR_CHARSET = 9
ASSIGN = 10
AT = 11
CATCH = 12
CHANNELS = 13
COLON = 14
COLONCOLON = 15
COMMA = 16
DOC_COMMENT = 17
DOLLAR = 18
DOT = 19
ERRCHAR = 20
ESC_SEQ = 21
FINALLY = 22
FRAGMENT = 23
GRAMMAR = 24
GT = 25
HEX_DIGIT = 26
ID = 27
IMPORT = 28
INT = 29
LEXER = 30
LEXER_CHAR_SET = 31
LOCALS = 32
LPAREN = 33
LT = 34
MODE = 35
NESTED_ACTION = 36
NLCHARS = 37
NOT = 38
NameChar = 39
NameStartChar = 40
OPTIONS = 41
OR = 42
PARSER = 43
PLUS = 44
PLUS_ASSIGN = 45
POUND = 46
QUESTION = 47
RANGE = 48
RARROW = 49
RBRACE = 50
RETURNS = 51
RPAREN = 52
RULE_REF = 53
SEMI = 54
SEMPRED = 55
SRC = 56
STAR = 57
STRING_LITERAL = 58
THROWS = 59
TOKENS_SPEC = 60
TOKEN_REF = 61
UNICODE_ESC = 62
UNICODE_EXTENDED_ESC = 63
UnicodeBOM = 64
WS = 65
WSCHARS = 66
WSNLCHARS = 67
ALT = 68
BLOCK = 69
CLOSURE = 70
COMBINED = 71
ELEMENT_OPTIONS = 72
EPSILON = 73
LEXER_ACTION_CALL = 74
LEXER_ALT_ACTION = 75
OPTIONAL = 76
POSITIVE_CLOSURE = 77
RULE = 78
RULEMODIFIERS = 79
RULES = 80
SET = 81
WILDCARD = 82
BLOCK_COMMENT = 83
LINE_COMMENT = 84
UNTERMINATED_STRING_LITERAL = 85
BEGIN_ARGUMENT = 86
BEGIN_ACTION = 87
TOKENS = 88
LBRACE = 89
END_ARGUMENT = 90
UNTERMINATED_ARGUMENT = 91
ARGUMENT_CONTENT = 92
END_ACTION = 93
UNTERMINATED_ACTION = 94
ACTION_CONTENT = 95
UNTERMINATED_CHAR_SET = 96
PRIVATE = 97
PROTECTED = 98
PUBLIC = 99
PREDICATE_OPTIONS = 100
ACTION = 4
ACTION_CHAR_LITERAL = 5
ACTION_ESC = 6
ACTION_STRING_LITERAL = 7
ARG_ACTION = 8
ARG_OR_CHARSET = 9
ASSIGN = 10
AT = 11
CATCH = 12
CHANNELS = 13
COLON = 14
COLONCOLON = 15
COMMA = 16
COMMENT1 = 17
DOC_COMMENT = 18
DOLLAR = 19
DOT = 20
ERRCHAR = 21
ESC_SEQ = 22
FINALLY = 23
FRAGMENT = 24
GRAMMAR = 25
GT = 26
HEX_DIGIT = 27
ID = 28
IMPORT = 29
INT = 30
LEXER = 31
LEXER_CHAR_SET = 32
LOCALS = 33
LPAREN = 34
LT = 35
MODE = 36
NESTED_ACTION = 37
NLCHARS = 38
NOT = 39
NameChar = 40
NameStartChar = 41
OPTIONS = 42
OR = 43
PARSER = 44
PLUS = 45
PLUS_ASSIGN = 46
POUND = 47
QUESTION = 48
RANGE = 49
RARROW = 50
RBRACE = 51
RETURNS = 52
RPAREN = 53
RULE_REF = 54
SEMI = 55
SEMPRED = 56
SRC = 57
STAR = 58
STRING_LITERAL = 59
THROWS = 60
TOKENS_SPEC = 61
TOKEN_REF = 62
UNICODE_ESC = 63
UNICODE_EXTENDED_ESC = 64
UnicodeBOM = 65
WS = 66
WSCHARS = 67
WSNLCHARS = 68
ALT = 69
BLOCK = 70
CLOSURE = 71
COMBINED = 72
ELEMENT_OPTIONS = 73
EPSILON = 74
LEXER_ACTION_CALL = 75
LEXER_ALT_ACTION = 76
OPTIONAL = 77
POSITIVE_CLOSURE = 78
RULE = 79
RULEMODIFIERS = 80
RULES = 81
SET = 82
WILDCARD = 83
BLOCK_COMMENT = 84
LINE_COMMENT = 85
UNTERMINATED_STRING_LITERAL = 86
BEGIN_ARGUMENT = 87
BEGIN_ACTION = 88
TOKENS = 89
LBRACE = 90
END_ARGUMENT = 91
UNTERMINATED_ARGUMENT = 92
ARGUMENT_CONTENT = 93
END_ACTION = 94
UNTERMINATED_ACTION = 95
ACTION_CONTENT = 96
UNTERMINATED_CHAR_SET = 97
PRIVATE = 98
PROTECTED = 99
PUBLIC = 100
PREDICATE_OPTIONS = 101
7 changes: 3 additions & 4 deletions src/misc/LexerAdaptor.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
/*
* This file is released under the MIT license.
* Copyright (c) 2016, 2020, Mike Lischke
*
* See LICENSE file for more info.
* Copyright (c) The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/

import { CharStream, Lexer, Token } from "antlr4ng";
Expand Down
28 changes: 28 additions & 0 deletions src/misc/OrderedHashMap.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Copyright (c) The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/

export class OrderedHashMap<K, V> extends Map<K, V> {
/** Track the elements as they are added to the set */
private elements: K[] = [];

public override get(key: K): never {
throw new Error("Use getKey and getElement instead.");
}

public getKey(i: number): K {
return this.elements[i];
}

public getElement(i: number): V | undefined {
return super.get(this.elements[i]);
}

public override set(key: K, value: V): this {
this.elements.push(key);

return super.set(key, value);
}
}
Loading

0 comments on commit bae1768

Please sign in to comment.