Skip to content

Commit

Permalink
New minor release
Browse files Browse the repository at this point in the history
First public release
  • Loading branch information
mike-lischke committed Jan 6, 2025
1 parent a34abeb commit 8f722ad
Show file tree
Hide file tree
Showing 18 changed files with 239 additions and 54 deletions.
21 changes: 21 additions & 0 deletions .npmignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@@ -0,0 +1,10 @@
.github
.vscode
tests/
coverage/
dist/*.map
cli/*.ts
!cli/*.d.ts
src/
!dist/src
build/
package/
templates/
!dist/templates/

eslint.config.mjs
.project
tsconfig.json
cspell.json
vitest.config.ts
*.tgz
39 changes: 39 additions & 0 deletions build/build.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) Mike Lischke. All rights reserved.
* Licensed under the BSD 3-clause License. See License.txt in the project root for license information.
*/

import * as esbuild from "esbuild";

const build = async () => {
try {
await Promise.all([
esbuild.build({
entryPoints: ["src/**/*.ts"],
bundle: false,
outdir: "dist/src",
format: "esm",
target: "es2022",
platform: "node",
keepNames: true,
packages: "external",
}),
esbuild.build({
entryPoints: ["cli/**/*.ts"],
bundle: false,
outdir: "dist/cli",
format: "esm",
platform: "node",
target: "es2022",
//outExtension: { ".js": ".cjs" },
}),
]);

console.log("Build completed successfully");
} catch (error) {
console.error("Build failed:", error);
process.exit(1);
}
};

await build();
12 changes: 12 additions & 0 deletions build/copy-templates.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/*
* Copyright (c) Mike Lischke. All rights reserved.
* Licensed under the BSD 3-clause License. See License.txt in the project root for license information.
*/

import { cp } from "node:fs/promises";

/**
* This script is to be used after the build process to copy the templates/ folder to the build folder.
*/

await cp("templates", "dist/templates", { force: true, recursive: true });
10 changes: 5 additions & 5 deletions build/generate-unicode-data.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) Mike Lischke. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*/
* Licensed under the BSD 3-clause License. See License.txt in the project root for license information.
*/

// cspell: ignore inpc, insc

Expand All @@ -10,9 +10,9 @@
* with that data. The file is then used by the ANTLR tool to support Unicode properties and categories.
*/

import { createWriteStream } from "fs";
import { readdir, readFile, stat } from "fs/promises";
import { dirname, join } from "path";
import { createWriteStream } from "node:fs";
import { readdir, readFile, stat } from "node:fs/promises";
import { dirname, join } from "node:path";

import { IntervalSet } from "antlr4ng";

Expand Down
4 changes: 2 additions & 2 deletions cli/Interpreter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

import { Option, program } from "commander";
import { createWriteStream } from "fs";
import { createWriteStream } from "node:fs";
import { readFile } from "fs/promises";

import { CharStream, CommonToken, CommonTokenStream, DecisionInfo, ParseInfo } from "antlr4ng";
Expand Down Expand Up @@ -135,7 +135,7 @@ export class Interpreter {
if (tok instanceof CommonToken) {
console.log(tok.toString(lexEngine));
} else {
// eslint-disable-next-line @typescript-eslint/no-base-to-string

console.log(tok.toString());
}
}
Expand Down
9 changes: 3 additions & 6 deletions cli/TestRig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@
* Licensed under the BSD 3-clause License. See License.txt in the project root for license information.
*/

/* eslint-disable jsdoc/require-param, jsdoc/require-returns */


/*
eslint-disable @typescript-eslint/no-base-to-string , @typescript-eslint/no-unsafe-function-type,
eslint-disable @typescript-eslint/no-unsafe-function-type,
@typescript-eslint/no-unsafe-return
*/

Expand Down Expand Up @@ -184,5 +181,5 @@ export class TestRig {
}
}

const testRig = new TestRig();
await testRig.run();
//const testRig = new TestRig();
//await testRig.run();
20 changes: 11 additions & 9 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
"description": "Next generation ANTLR Tool",
"type": "module",
"author": "Mike Lischke",
"repository": "https://github.com/mike-lischke/antlr-ng",
"repository": {
"type": "git",
"url": "git+https://github.com/mike-lischke/antlr-ng.git"
},
"bugs": {
"url": "https://github.com/mike-lischke/antlr-ng/issues"
},
"bin": {
"antlr-ng": "./cli/runner.js"
"antlr-ng": "dist/cli/runner.js"
},
"keywords": [
"lexer",
Expand Down Expand Up @@ -49,22 +52,21 @@
"vitest": "2.1.8"
},
"scripts": {
"build": "npm run generate-tool-parsers && npm run generate-test-parsers && npm run generate-unicode-data && npm run build-mjs && tsc -p tsconfig.json",
"build-bundle": "esbuild ./src/index.js --main-fields=module,main --bundle --target=esnext --keep-names --platform=node --external:antlr4ng --external:commander --external:fast-printf --external:stringtemplate4ts --external:unicode-properties",
"build-mjs": "npm run build-bundle -- --outfile=dist/index.mjs --format=esm",
"prepublishOnly": "npm run build && npm run copy-templates && npm run test",
"build": "npm run generate-tool-parsers && npm run generate-test-parsers && npm run generate-unicode-data && npm run esbuild && tsc -p tsconfig.json",
"esbuild": "tsx build/build.ts",
"copy-templates": "tsx build/copy-templates.ts",
"run": "tsx cli/runner.ts --version",
"lint": "eslint \"./src/**/*.ts\"",
"lint:fix": "eslint \"./src/**/*.ts\" --fix",
"test": "NODE_NO_WARNINGS=1 vitest --no-watch --no-coverage",
"generate-tool-parsers": "./build/generate-tool-parsers.cmd",
"generate-test-parsers": "./build/generate-test-parsers.cmd",
"generate-unicode-data": "tsx ./build/generate-unicode-data.ts",
"antlr-ng": "antlr-ng"
"generate-unicode-data": "tsx ./build/generate-unicode-data.ts"
},
"exports": {
"types": "./dist/src/index.d.ts",
"require": "./dist/index.cjs",
"import": "./dist/index.mjs"
"import": "./dist/src/index.js"
},
"browserslist": [
"defaults and fully supports es6-module",
Expand Down
92 changes: 80 additions & 12 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,91 @@
[![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/mike-lischke/ANTLRng/nodejs.yml?style=for-the-badge&logo=github)](https://github.com/mike-lischke/ANTLRng/actions/workflows/nodejs.yml)
[![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/mike-lischke/ANTLRng/nodejs.yml?style=for-the-badge&logo=github)](https://github.com/mike-lischke/ANTLRng/actions/workflows/nodejs.yml)[![Weekly Downloads](https://img.shields.io/npm/dw/antlr-ng?style=for-the-badge&color=blue)](https://www.npmjs.com/package/stringtemplate4ts)
[![npm version](https://img.shields.io/npm/v/antlr-ng?style=for-the-badge&color=yellow)](https://www.npmjs.com/package/stringtemplate4ts)

<img src="https://raw.githubusercontent.com/mike-lischke/mike-lischke/master/images/ANTLRng2.svg" title="ANTLR Next Generation" alt="ANTLRng" width="96" height="96"/><label style="font-size: 70%">Part of the Next Generation ANTLR Project</label>


# ANTLRng
# antlr-ng - Next Generation ANTLR

This project is a TypeScript port of the ANTLR tool (originally written in Java) and is still work-in-progress. It implements own mechanisms to work with ANTLR grammars, works in browsers and generally moves away from its Java centric roots.
**Another Tool for Language Recognition**

A tool/package that takes a defined language (provided in a grammar file) and generates parser and lexer classes in one of the supported target languages. These classes can be used in your project to parse input specified by the grammar file. Supported target languages are:

- TypeScript/JavaScript
- Java
- C++ (language identifier: Cpp)
- C# (language identifier: CSharp)
- Go
- Python3
- Dart
- Swift
- PHP

This project started as a TypeScript port of the old ANTLR4 tool (originally written in Java) and includes the entire feature set of the the Java version and is constantly enhanced.

## Status

Even though the tool is already pretty solid and generates exactly the same output like the old ANTLR4 jar, it is still not considered production ready. All (relevant) original unit tests have been ported and run successfully. Additionally, the tool was tested with all grammars in the [grammars-v4](https://github.com/mike-lischke/grammars-v4) repository.

See the [milestone 3](https://github.com/mike-lischke/ANTLRng/issues/10) for the current status and the plan.

## Getting Started

The first thing needed is a grammar, which defines the language you want to parse. Don't confuse that with the target language, which is the programming language for which you want to generate the parser and lexer files.

Here's a super simple grammar:

```antlr
grammar HelloWorld;
greeting: hello world EOF;
hello: 'hello';
world: 'world';
WS: [ \n\t]+ -> skip;
```

This defines a set of rules that comprise a very simple language (one that can parse the input `hello world` only, but with any number of whitespaces around each word).

Save this text as `HelloWorld.g4` file (in your project folder, where you have installed the antlr-ng node package), which you can use now to let antlr-ng generate a parser and lexer for. Open a terminal in the project root and execute:

```bash
npx antlr-ng -Dlanguage=TypeScript -o generated/ HelloWorld.g4
```

> The tool `npx` should be installed along with your NPM binary.
This will create a number of files you can ignore for now, except `HelloWorldLexer.ts` and `HelloWorldParser.ts`, which are the two classes for parsing input. We got TypeScript output because `TypeScript` was defined as target language. By using `-Dlanguage=Python3` it will instead generate .py files.

> Language identifiers are case-sensitive! You have to use exactly the same string as given in the list in the first paragraph. Watch out for the special identifiers for C++ and C#!
You now can import the generated classes and write a full parser application. This is however target language dependent. For TypeScript it looks like this:

```typescript
import { CharStream, CommonTokenStream } from 'antlr4ng';
import HelloWorldLexer from './generated/HelloWorldLexer';
import HelloWorldParser from './generated/HelloWorldParser';

const text = "hello \n \t world\n"
const input = CharStream.fromString(text);
const lexer = new HelloWorldLexer(input);
const tokens = new CommonTokenStream(lexer);
const parser = new HelloWorldParser(tokens);
const tree = parser.greeting();
```

Note the use of the `greeting()` method, which was auto generated from the `greeting` parser rule.

More information about target specific topics will follow as this project evolves. You can also use the docs from the old ANTLR4 tool, but keep in mind that there might be differences (especially how to invoke the tool).

# Advanced Topics

The sections below are meant for developers working on antlr-ng or are interested in the internals of this project.

## Design Goals

- Make the tool work in browsers too, which requires an abstraction of file system access used in the tool.
- Strict separation of the tool and its runtimes, which simplifies the maintenance and releases of the tool a lot.
- Runtimes are completely handled by their owners, using a plugin system as used by many other tools, and are no longer part of the tool.
- The new tool is supposed to run in web browsers, as well as in Node.js environments. No further dependency is required, beyond that (especially no Java).
Expand All @@ -22,30 +95,25 @@ See the [milestone 3](https://github.com/mike-lischke/ANTLRng/issues/10) for the

## Feature Ideas

A loose collection of thoughts.

### Grammars

- Rework the import feature. Allow paths for the imports and allow to override imported rules. Make diamond imports working properly.
- Allow specifying a config file for tool runs, instead only individual command line parameters.
- Allow generating files for multiple grammars in their own target folders (good for mass production like needed in the runtime tests).
- Allow specifying user defined prefixes/postfixes for generated methods (from parser/lexer rules) or allow a complete own pattern.

### Optimizations

- Save/load state to lower cold start time.
- Code optimizations (like converting recursions to iterations in the prediction code path).
- Remove token classes/interfaces (Token, CommonToken, WritableToken) and introduce a compact representation as a series of uin32 numbers, that save space and can be shared more easily (e.g. in web workers or WebAssembly). Put custom token text in a string pool. Introduce helper methods which create the expected string representation of a token.
- Convert all pure data holder classes to interfaces (e.g. SimState).
- Make classes that are often used in hash sets/maps immutable, so we can cache hash codes for them (examples: Interval(Set), ATNConfig, ATNConfigSet).

### Target Specific Ideas

This is a tricky field and not easy to re-design. The original decision to allow target (language) specific code in a grammar made (and makes) sharing/reusing grammars very difficult. Below are some ideas:

- Find a better solution for target specific code, e.g. by extending the ANTLR language with target specific named action blocks.
- Even better: disallow any target specific code:
- Simple (unnamed) actions can be implemented in a base class as alt enter and exit listener methods (requires to label alts).
- Simple (unnamed) actions can be implemented in a base class as alt enter and exit listener methods (requires to use label alts).
- For predicates introduce a small and simple expression syntax, which uses mappings defined in the language template. This is not as flexible as the current solution, but sometimes less is more.
- No longer support rule parameters, init values and return values. They are rarely used and create a too tight connection to the generated code. Additionally, they prevent further development of the code generator (maybe at some point it is no longer meaningful to generate plain methods?).
- Requires a different solution for left-recursion removal which uses precedence values as rule parameters.
- Allow target authors to define new named actions, to avoid situations like for the current C++ target, with its ugly action names.
- Even better: avoid named actions altogether, but they are very useful for including copyrights, headers and class specific code. This is probably the most difficult feature to re-design. Possible solutions are:
- Support a very simple macro syntax in the grammar to allow replacing text blocks which are read from an external file (which then can contain target specific code etc.). This would also lower duplication (like the same copyright in different generated files).
Expand Down
8 changes: 8 additions & 0 deletions release-notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

# ANTLRng Release Notes

## 0.5.0

First public release, for public testing. Still some way to go.

## 0.4.0

The tool went through intensive testing by Ken Domino, who sent it through the entire grammar-v4 repository. Fixed quite a few bugs that came out of that.

## 0.3.0

All tool tests have been ported to TypeScript and are now running fine. The tool is now fully functional and can be used to generate parsers and lexers in TypeScript. However, the tests don't cover all features yet (listeners, visitors etc.), so there might still be some issues.
Expand Down
16 changes: 8 additions & 8 deletions src/Tool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
/* eslint-disable jsdoc/require-param, jsdoc/require-returns */

import { ATNSerializer, CharStream, CommonTokenStream } from "antlr4ng";
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
import path, { basename, dirname } from "path";
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { basename, dirname, isAbsolute, join } from "node:path";

import { ANTLRv4Parser } from "./generated/ANTLRv4Parser.js";

Expand Down Expand Up @@ -495,7 +495,7 @@ export class Tool implements ITool {
*/
public getOutputFile(g: Grammar, fileName: string): string {
const outputDir = this.getOutputDirectory(g.fileName);
const outputFile = path.join(outputDir, fileName);
const outputFile = join(outputDir, fileName);

if (!existsSync(outputDir)) {
mkdirSync(outputDir, { recursive: true });
Expand All @@ -508,11 +508,11 @@ export class Tool implements ITool {
let candidate = fileName;
if (!existsSync(candidate)) {
const parentDir = dirname(g.fileName); // Check the parent dir of input directory.
candidate = path.join(parentDir, fileName);
candidate = join(parentDir, fileName);
if (!existsSync(candidate)) { // try in lib dir
const libDirectory = this.toolParameters.libDirectory;
if (libDirectory) {
candidate = path.join(libDirectory, fileName);
candidate = join(libDirectory, fileName);
if (!existsSync(candidate)) {
return undefined;
}
Expand All @@ -534,19 +534,19 @@ export class Tool implements ITool {
* @param fileNameWithPath path to input source
*/
public getOutputDirectory(fileNameWithPath: string): string {
const dirName = path.dirname(fileNameWithPath);
const dirName = dirname(fileNameWithPath);
if (this.toolParameters.exactOutputDir && this.toolParameters.outputDirectory) {
if (this.toolParameters.outputDirectory) {
return this.toolParameters.outputDirectory;
}
}

if (this.toolParameters.outputDirectory) {
if (path.isAbsolute(this.toolParameters.outputDirectory)) {
if (isAbsolute(this.toolParameters.outputDirectory)) {
return this.toolParameters.outputDirectory;
}

return path.join(dirName, this.toolParameters.outputDirectory);
return join(dirName, this.toolParameters.outputDirectory);
}

return dirName;
Expand Down
3 changes: 2 additions & 1 deletion src/codegen/CodeGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

/* eslint-disable jsdoc/require-returns */

import { writeFileSync } from "node:fs";

import { Token } from "antlr4ng";
import { writeFileSync } from "fs";
import { AutoIndentWriter, ST, StringWriter, type IST, type STGroup } from "stringtemplate4ts";

import { Constants } from "../Constants.js";
Expand Down
Loading

0 comments on commit 8f722ad

Please sign in to comment.