From 8f722ad7a1f9aea9c62f3c1dbcfd1ceb7fa44950 Mon Sep 17 00:00:00 2001 From: Mike Lischke Date: Mon, 6 Jan 2025 17:22:59 +0100 Subject: [PATCH] New minor release First public release --- .npmignore | 21 +++++++ build/build.ts | 39 ++++++++++++ build/copy-templates.ts | 12 ++++ build/generate-unicode-data.ts | 10 ++-- cli/Interpreter.ts | 4 +- cli/TestRig.ts | 9 +-- package.json | 20 ++++--- readme.md | 92 +++++++++++++++++++++++++---- release-notes.md | 8 +++ src/Tool.ts | 16 ++--- src/codegen/CodeGenerator.ts | 3 +- src/codegen/target/GoTarget.ts | 2 +- src/parse/TokenVocabParser.ts | 4 +- src/tool-parameters.ts | 39 +++++++++++- src/tool/Grammar.ts | 6 +- tests/TestCompositeGrammars.spec.ts | 3 +- tests/helpers/Test.ts.stg | 2 +- tsconfig.json | 3 +- 18 files changed, 239 insertions(+), 54 deletions(-) create mode 100644 .npmignore create mode 100644 build/build.ts create mode 100644 build/copy-templates.ts diff --git a/.npmignore b/.npmignore new file mode 100644 index 0000000..ac0748f --- /dev/null +++ b/.npmignore @@ -0,0 +1,21 @@ +@@ -0,0 +1,10 @@ +.github +.vscode +tests/ +coverage/ +dist/*.map +cli/*.ts +!cli/*.d.ts +src/ +!dist/src +build/ +package/ +templates/ +!dist/templates/ + +eslint.config.mjs +.project +tsconfig.json +cspell.json +vitest.config.ts +*.tgz diff --git a/build/build.ts b/build/build.ts new file mode 100644 index 0000000..a468e8f --- /dev/null +++ b/build/build.ts @@ -0,0 +1,39 @@ +/* + * Copyright (c) Mike Lischke. All rights reserved. + * Licensed under the BSD 3-clause License. See License.txt in the project root for license information. + */ + +import * as esbuild from "esbuild"; + +const build = async () => { + try { + await Promise.all([ + esbuild.build({ + entryPoints: ["src/**/*.ts"], + bundle: false, + outdir: "dist/src", + format: "esm", + target: "es2022", + platform: "node", + keepNames: true, + packages: "external", + }), + esbuild.build({ + entryPoints: ["cli/**/*.ts"], + bundle: false, + outdir: "dist/cli", + format: "esm", + platform: "node", + target: "es2022", + //outExtension: { ".js": ".cjs" }, + }), + ]); + + console.log("Build completed successfully"); + } catch (error) { + console.error("Build failed:", error); + process.exit(1); + } +}; + +await build(); diff --git a/build/copy-templates.ts b/build/copy-templates.ts new file mode 100644 index 0000000..312ac30 --- /dev/null +++ b/build/copy-templates.ts @@ -0,0 +1,12 @@ +/* + * Copyright (c) Mike Lischke. All rights reserved. + * Licensed under the BSD 3-clause License. See License.txt in the project root for license information. + */ + +import { cp } from "node:fs/promises"; + +/** + * This script is to be used after the build process to copy the templates/ folder to the build folder. + */ + +await cp("templates", "dist/templates", { force: true, recursive: true }); diff --git a/build/generate-unicode-data.ts b/build/generate-unicode-data.ts index 59b923f..9166a8f 100644 --- a/build/generate-unicode-data.ts +++ b/build/generate-unicode-data.ts @@ -1,7 +1,7 @@ /* * Copyright (c) Mike Lischke. All rights reserved. - * Licensed under the MIT License. See License.txt in the project root for license information. -*/ + * Licensed under the BSD 3-clause License. See License.txt in the project root for license information. + */ // cspell: ignore inpc, insc @@ -10,9 +10,9 @@ * with that data. The file is then used by the ANTLR tool to support Unicode properties and categories. */ -import { createWriteStream } from "fs"; -import { readdir, readFile, stat } from "fs/promises"; -import { dirname, join } from "path"; +import { createWriteStream } from "node:fs"; +import { readdir, readFile, stat } from "node:fs/promises"; +import { dirname, join } from "node:path"; import { IntervalSet } from "antlr4ng"; diff --git a/cli/Interpreter.ts b/cli/Interpreter.ts index cbe9cf9..76b6228 100644 --- a/cli/Interpreter.ts +++ b/cli/Interpreter.ts @@ -4,7 +4,7 @@ */ import { Option, program } from "commander"; -import { createWriteStream } from "fs"; +import { createWriteStream } from "node:fs"; import { readFile } from "fs/promises"; import { CharStream, CommonToken, CommonTokenStream, DecisionInfo, ParseInfo } from "antlr4ng"; @@ -135,7 +135,7 @@ export class Interpreter { if (tok instanceof CommonToken) { console.log(tok.toString(lexEngine)); } else { - // eslint-disable-next-line @typescript-eslint/no-base-to-string + console.log(tok.toString()); } } diff --git a/cli/TestRig.ts b/cli/TestRig.ts index 4590e5c..64ee414 100644 --- a/cli/TestRig.ts +++ b/cli/TestRig.ts @@ -3,11 +3,8 @@ * Licensed under the BSD 3-clause License. See License.txt in the project root for license information. */ -/* eslint-disable jsdoc/require-param, jsdoc/require-returns */ - - /* - eslint-disable @typescript-eslint/no-base-to-string , @typescript-eslint/no-unsafe-function-type, + eslint-disable @typescript-eslint/no-unsafe-function-type, @typescript-eslint/no-unsafe-return */ @@ -184,5 +181,5 @@ export class TestRig { } } -const testRig = new TestRig(); -await testRig.run(); +//const testRig = new TestRig(); +//await testRig.run(); diff --git a/package.json b/package.json index 2f6880a..c78d575 100644 --- a/package.json +++ b/package.json @@ -4,12 +4,15 @@ "description": "Next generation ANTLR Tool", "type": "module", "author": "Mike Lischke", - "repository": "https://github.com/mike-lischke/antlr-ng", + "repository": { + "type": "git", + "url": "git+https://github.com/mike-lischke/antlr-ng.git" + }, "bugs": { "url": "https://github.com/mike-lischke/antlr-ng/issues" }, "bin": { - "antlr-ng": "./cli/runner.js" + "antlr-ng": "dist/cli/runner.js" }, "keywords": [ "lexer", @@ -49,22 +52,21 @@ "vitest": "2.1.8" }, "scripts": { - "build": "npm run generate-tool-parsers && npm run generate-test-parsers && npm run generate-unicode-data && npm run build-mjs && tsc -p tsconfig.json", - "build-bundle": "esbuild ./src/index.js --main-fields=module,main --bundle --target=esnext --keep-names --platform=node --external:antlr4ng --external:commander --external:fast-printf --external:stringtemplate4ts --external:unicode-properties", - "build-mjs": "npm run build-bundle -- --outfile=dist/index.mjs --format=esm", + "prepublishOnly": "npm run build && npm run copy-templates && npm run test", + "build": "npm run generate-tool-parsers && npm run generate-test-parsers && npm run generate-unicode-data && npm run esbuild && tsc -p tsconfig.json", + "esbuild": "tsx build/build.ts", + "copy-templates": "tsx build/copy-templates.ts", "run": "tsx cli/runner.ts --version", "lint": "eslint \"./src/**/*.ts\"", "lint:fix": "eslint \"./src/**/*.ts\" --fix", "test": "NODE_NO_WARNINGS=1 vitest --no-watch --no-coverage", "generate-tool-parsers": "./build/generate-tool-parsers.cmd", "generate-test-parsers": "./build/generate-test-parsers.cmd", - "generate-unicode-data": "tsx ./build/generate-unicode-data.ts", - "antlr-ng": "antlr-ng" + "generate-unicode-data": "tsx ./build/generate-unicode-data.ts" }, "exports": { "types": "./dist/src/index.d.ts", - "require": "./dist/index.cjs", - "import": "./dist/index.mjs" + "import": "./dist/src/index.js" }, "browserslist": [ "defaults and fully supports es6-module", diff --git a/readme.md b/readme.md index f865d15..6f2ecec 100644 --- a/readme.md +++ b/readme.md @@ -1,18 +1,91 @@ -[![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/mike-lischke/ANTLRng/nodejs.yml?style=for-the-badge&logo=github)](https://github.com/mike-lischke/ANTLRng/actions/workflows/nodejs.yml) +[![GitHub Workflow Status (with event)](https://img.shields.io/github/actions/workflow/status/mike-lischke/ANTLRng/nodejs.yml?style=for-the-badge&logo=github)](https://github.com/mike-lischke/ANTLRng/actions/workflows/nodejs.yml)[![Weekly Downloads](https://img.shields.io/npm/dw/antlr-ng?style=for-the-badge&color=blue)](https://www.npmjs.com/package/stringtemplate4ts) +[![npm version](https://img.shields.io/npm/v/antlr-ng?style=for-the-badge&color=yellow)](https://www.npmjs.com/package/stringtemplate4ts) ANTLRng -# ANTLRng +# antlr-ng - Next Generation ANTLR -This project is a TypeScript port of the ANTLR tool (originally written in Java) and is still work-in-progress. It implements own mechanisms to work with ANTLR grammars, works in browsers and generally moves away from its Java centric roots. +**Another Tool for Language Recognition** + +A tool/package that takes a defined language (provided in a grammar file) and generates parser and lexer classes in one of the supported target languages. These classes can be used in your project to parse input specified by the grammar file. Supported target languages are: + +- TypeScript/JavaScript +- Java +- C++ (language identifier: Cpp) +- C# (language identifier: CSharp) +- Go +- Python3 +- Dart +- Swift +- PHP + +This project started as a TypeScript port of the old ANTLR4 tool (originally written in Java) and includes the entire feature set of the the Java version and is constantly enhanced. ## Status +Even though the tool is already pretty solid and generates exactly the same output like the old ANTLR4 jar, it is still not considered production ready. All (relevant) original unit tests have been ported and run successfully. Additionally, the tool was tested with all grammars in the [grammars-v4](https://github.com/mike-lischke/grammars-v4) repository. + See the [milestone 3](https://github.com/mike-lischke/ANTLRng/issues/10) for the current status and the plan. +## Getting Started + +The first thing needed is a grammar, which defines the language you want to parse. Don't confuse that with the target language, which is the programming language for which you want to generate the parser and lexer files. + +Here's a super simple grammar: + +```antlr +grammar HelloWorld; + +greeting: hello world EOF; + +hello: 'hello'; +world: 'world'; + +WS: [ \n\t]+ -> skip; + +``` + +This defines a set of rules that comprise a very simple language (one that can parse the input `hello world` only, but with any number of whitespaces around each word). + +Save this text as `HelloWorld.g4` file (in your project folder, where you have installed the antlr-ng node package), which you can use now to let antlr-ng generate a parser and lexer for. Open a terminal in the project root and execute: + +```bash +npx antlr-ng -Dlanguage=TypeScript -o generated/ HelloWorld.g4 +``` + +> The tool `npx` should be installed along with your NPM binary. + +This will create a number of files you can ignore for now, except `HelloWorldLexer.ts` and `HelloWorldParser.ts`, which are the two classes for parsing input. We got TypeScript output because `TypeScript` was defined as target language. By using `-Dlanguage=Python3` it will instead generate .py files. + +> Language identifiers are case-sensitive! You have to use exactly the same string as given in the list in the first paragraph. Watch out for the special identifiers for C++ and C#! + +You now can import the generated classes and write a full parser application. This is however target language dependent. For TypeScript it looks like this: + +```typescript +import { CharStream, CommonTokenStream } from 'antlr4ng'; +import HelloWorldLexer from './generated/HelloWorldLexer'; +import HelloWorldParser from './generated/HelloWorldParser'; + +const text = "hello \n \t world\n" +const input = CharStream.fromString(text); +const lexer = new HelloWorldLexer(input); +const tokens = new CommonTokenStream(lexer); +const parser = new HelloWorldParser(tokens); +const tree = parser.greeting(); +``` + +Note the use of the `greeting()` method, which was auto generated from the `greeting` parser rule. + +More information about target specific topics will follow as this project evolves. You can also use the docs from the old ANTLR4 tool, but keep in mind that there might be differences (especially how to invoke the tool). + +# Advanced Topics + +The sections below are meant for developers working on antlr-ng or are interested in the internals of this project. + ## Design Goals +- Make the tool work in browsers too, which requires an abstraction of file system access used in the tool. - Strict separation of the tool and its runtimes, which simplifies the maintenance and releases of the tool a lot. - Runtimes are completely handled by their owners, using a plugin system as used by many other tools, and are no longer part of the tool. - The new tool is supposed to run in web browsers, as well as in Node.js environments. No further dependency is required, beyond that (especially no Java). @@ -22,6 +95,8 @@ See the [milestone 3](https://github.com/mike-lischke/ANTLRng/issues/10) for the ## Feature Ideas +A loose collection of thoughts. + ### Grammars - Rework the import feature. Allow paths for the imports and allow to override imported rules. Make diamond imports working properly. @@ -29,23 +104,16 @@ See the [milestone 3](https://github.com/mike-lischke/ANTLRng/issues/10) for the - Allow generating files for multiple grammars in their own target folders (good for mass production like needed in the runtime tests). - Allow specifying user defined prefixes/postfixes for generated methods (from parser/lexer rules) or allow a complete own pattern. -### Optimizations - -- Save/load state to lower cold start time. -- Code optimizations (like converting recursions to iterations in the prediction code path). -- Remove token classes/interfaces (Token, CommonToken, WritableToken) and introduce a compact representation as a series of uin32 numbers, that save space and can be shared more easily (e.g. in web workers or WebAssembly). Put custom token text in a string pool. Introduce helper methods which create the expected string representation of a token. -- Convert all pure data holder classes to interfaces (e.g. SimState). -- Make classes that are often used in hash sets/maps immutable, so we can cache hash codes for them (examples: Interval(Set), ATNConfig, ATNConfigSet). - ### Target Specific Ideas This is a tricky field and not easy to re-design. The original decision to allow target (language) specific code in a grammar made (and makes) sharing/reusing grammars very difficult. Below are some ideas: - Find a better solution for target specific code, e.g. by extending the ANTLR language with target specific named action blocks. - Even better: disallow any target specific code: - - Simple (unnamed) actions can be implemented in a base class as alt enter and exit listener methods (requires to label alts). + - Simple (unnamed) actions can be implemented in a base class as alt enter and exit listener methods (requires to use label alts). - For predicates introduce a small and simple expression syntax, which uses mappings defined in the language template. This is not as flexible as the current solution, but sometimes less is more. - No longer support rule parameters, init values and return values. They are rarely used and create a too tight connection to the generated code. Additionally, they prevent further development of the code generator (maybe at some point it is no longer meaningful to generate plain methods?). + - Requires a different solution for left-recursion removal which uses precedence values as rule parameters. - Allow target authors to define new named actions, to avoid situations like for the current C++ target, with its ugly action names. - Even better: avoid named actions altogether, but they are very useful for including copyrights, headers and class specific code. This is probably the most difficult feature to re-design. Possible solutions are: - Support a very simple macro syntax in the grammar to allow replacing text blocks which are read from an external file (which then can contain target specific code etc.). This would also lower duplication (like the same copyright in different generated files). diff --git a/release-notes.md b/release-notes.md index 2481a73..dd2301e 100644 --- a/release-notes.md +++ b/release-notes.md @@ -2,6 +2,14 @@ # ANTLRng Release Notes +## 0.5.0 + +First public release, for public testing. Still some way to go. + +## 0.4.0 + +The tool went through intensive testing by Ken Domino, who sent it through the entire grammar-v4 repository. Fixed quite a few bugs that came out of that. + ## 0.3.0 All tool tests have been ported to TypeScript and are now running fine. The tool is now fully functional and can be used to generate parsers and lexers in TypeScript. However, the tests don't cover all features yet (listeners, visitors etc.), so there might still be some issues. diff --git a/src/Tool.ts b/src/Tool.ts index 6e3cc9c..4d31949 100644 --- a/src/Tool.ts +++ b/src/Tool.ts @@ -6,8 +6,8 @@ /* eslint-disable jsdoc/require-param, jsdoc/require-returns */ import { ATNSerializer, CharStream, CommonTokenStream } from "antlr4ng"; -import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; -import path, { basename, dirname } from "path"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { basename, dirname, isAbsolute, join } from "node:path"; import { ANTLRv4Parser } from "./generated/ANTLRv4Parser.js"; @@ -495,7 +495,7 @@ export class Tool implements ITool { */ public getOutputFile(g: Grammar, fileName: string): string { const outputDir = this.getOutputDirectory(g.fileName); - const outputFile = path.join(outputDir, fileName); + const outputFile = join(outputDir, fileName); if (!existsSync(outputDir)) { mkdirSync(outputDir, { recursive: true }); @@ -508,11 +508,11 @@ export class Tool implements ITool { let candidate = fileName; if (!existsSync(candidate)) { const parentDir = dirname(g.fileName); // Check the parent dir of input directory. - candidate = path.join(parentDir, fileName); + candidate = join(parentDir, fileName); if (!existsSync(candidate)) { // try in lib dir const libDirectory = this.toolParameters.libDirectory; if (libDirectory) { - candidate = path.join(libDirectory, fileName); + candidate = join(libDirectory, fileName); if (!existsSync(candidate)) { return undefined; } @@ -534,7 +534,7 @@ export class Tool implements ITool { * @param fileNameWithPath path to input source */ public getOutputDirectory(fileNameWithPath: string): string { - const dirName = path.dirname(fileNameWithPath); + const dirName = dirname(fileNameWithPath); if (this.toolParameters.exactOutputDir && this.toolParameters.outputDirectory) { if (this.toolParameters.outputDirectory) { return this.toolParameters.outputDirectory; @@ -542,11 +542,11 @@ export class Tool implements ITool { } if (this.toolParameters.outputDirectory) { - if (path.isAbsolute(this.toolParameters.outputDirectory)) { + if (isAbsolute(this.toolParameters.outputDirectory)) { return this.toolParameters.outputDirectory; } - return path.join(dirName, this.toolParameters.outputDirectory); + return join(dirName, this.toolParameters.outputDirectory); } return dirName; diff --git a/src/codegen/CodeGenerator.ts b/src/codegen/CodeGenerator.ts index c050fa7..ef780ad 100644 --- a/src/codegen/CodeGenerator.ts +++ b/src/codegen/CodeGenerator.ts @@ -5,8 +5,9 @@ /* eslint-disable jsdoc/require-returns */ +import { writeFileSync } from "node:fs"; + import { Token } from "antlr4ng"; -import { writeFileSync } from "fs"; import { AutoIndentWriter, ST, StringWriter, type IST, type STGroup } from "stringtemplate4ts"; import { Constants } from "../Constants.js"; diff --git a/src/codegen/target/GoTarget.ts b/src/codegen/target/GoTarget.ts index bc2ea5c..6906710 100644 --- a/src/codegen/target/GoTarget.ts +++ b/src/codegen/target/GoTarget.ts @@ -7,7 +7,7 @@ // cspell: ignore gofmt ioutil wjkohnen -import path from "path"; +import * as path from "path"; import type { ST } from "stringtemplate4ts"; import { GrammarType } from "../../support/GrammarType.js"; import { Grammar } from "../../tool/Grammar.js"; diff --git a/src/parse/TokenVocabParser.ts b/src/parse/TokenVocabParser.ts index 877daac..79d9b25 100644 --- a/src/parse/TokenVocabParser.ts +++ b/src/parse/TokenVocabParser.ts @@ -5,8 +5,8 @@ /* eslint-disable jsdoc/require-returns */ -import { existsSync, readFileSync } from "fs"; -import { dirname, join } from "path"; +import { existsSync, readFileSync } from "node:fs"; +import { dirname, join } from "node:path"; import { Token } from "antlr4ng"; diff --git a/src/tool-parameters.ts b/src/tool-parameters.ts index d744170..0cdb33d 100644 --- a/src/tool-parameters.ts +++ b/src/tool-parameters.ts @@ -3,7 +3,9 @@ * Licensed under the BSD 3-clause License. See License.txt in the project root for license information. */ -const packageJson = await import("../package.json", { assert: { type: "json" } }); +import { readFile } from "node:fs/promises"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; import { Command, Option } from "commander"; @@ -29,7 +31,40 @@ export interface IToolParameters { exactOutputDir?: boolean, } -export const antlrVersion = packageJson.default.version; +/** + * Searches the package.json file in the same folder as this script or any parent folder (up to + * a node_modules folder). + * + * @returns The version of the package. + */ +const getPackageVersion = async (): Promise => { + const findPackageJson = async (path: string): Promise => { + const packageFile = resolve(path, "package.json"); + + try { + await readFile(packageFile); + + return packageFile; + } catch { + const parent = dirname(path); + + if (parent.endsWith("node_modules")) { + throw new Error("No package.json found."); + } + + return findPackageJson(parent); + } + }; + + const fileName = fileURLToPath(import.meta.url); + const dirName = dirname(fileName); + const packageFile = await findPackageJson(dirName); + const packageJson = JSON.parse(await readFile(packageFile, "utf-8")) as { version: string; }; + + return packageJson.version; +}; + +export const antlrVersion = await getPackageVersion(); /** * Used to parse tool parameters given as string list. Usually, this is used for tests. diff --git a/src/tool/Grammar.ts b/src/tool/Grammar.ts index e0ae732..1c0da42 100644 --- a/src/tool/Grammar.ts +++ b/src/tool/Grammar.ts @@ -785,12 +785,12 @@ export class Grammar implements IGrammar, AttributeResolver { return Grammar.INVALID_TOKEN_NAME; } - if (ttype >= 0 && ttype < this.typeToStringLiteralList.length && this.typeToStringLiteralList[ttype]) { - return this.typeToStringLiteralList[ttype]; + if (ttype >= 0 && ttype < this.typeToStringLiteralList.length && this.typeToStringLiteralList[ttype] != null) { + return this.typeToStringLiteralList[ttype] ?? String(ttype); } if (ttype >= 0 && ttype < this.typeToTokenList.length && this.typeToTokenList[ttype] != null) { - return this.typeToTokenList[ttype]; + return this.typeToTokenList[ttype] ?? String(ttype); } return String(ttype); diff --git a/tests/TestCompositeGrammars.spec.ts b/tests/TestCompositeGrammars.spec.ts index c6f7214..8db9749 100644 --- a/tests/TestCompositeGrammars.spec.ts +++ b/tests/TestCompositeGrammars.spec.ts @@ -5,9 +5,10 @@ import { describe, expect, it } from "vitest"; -import { mkdirSync, mkdtempSync, readFileSync, rmdirSync, writeFileSync } from "fs"; +import { mkdirSync, mkdtempSync, readFileSync, rmdirSync, writeFileSync } from "node:fs"; import { tmpdir } from "node:os"; import { basename, dirname, join } from "node:path"; + import { ErrorType } from "../src/tool/ErrorType.js"; import { GrammarSemanticsMessage } from "../src/tool/GrammarSemanticsMessage.js"; import { Grammar } from "../src/tool/index.js"; diff --git a/tests/helpers/Test.ts.stg b/tests/helpers/Test.ts.stg index aef7e33..77c5eb5 100644 --- a/tests/helpers/Test.ts.stg +++ b/tests/helpers/Test.ts.stg @@ -1,4 +1,4 @@ -import fs from "fs"; +import fs from "node:fs"; import { CharStream, CommonTokenStream, diff --git a/tsconfig.json b/tsconfig.json index 4baa31d..3115df2 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -31,6 +31,7 @@ "node_modules", "dist", "tests", - "vitest.config.ts" + "vitest.config.ts", + "templates" ], }