From 798a1648b2441dea95abb806e0ad21a9238610bb Mon Sep 17 00:00:00 2001 From: Alexandre Plateau Date: Fri, 13 Dec 2024 18:23:12 +0100 Subject: [PATCH] feat(ast optimizer): adding AST optimization tests and fixing the json compiler to accept (if cond then) and not just (if cond then else) --- CHANGELOG.md | 2 + include/Ark/Compiler/AST/Optimizer.hpp | 23 +-- include/CLI/JsonCompiler.hpp | 3 +- src/arkreactor/Compiler/AST/Node.cpp | 1 - src/arkreactor/Compiler/AST/Optimizer.cpp | 137 ++++++++++-------- src/arkreactor/Compiler/Compiler.cpp | 4 + src/arkscript/JsonCompiler.cpp | 33 +++-- tests/unittests/OptimizerSuite.cpp | 33 +++++ tests/unittests/ValidAstSuite.cpp | 2 +- .../OptimizerSuite/dead_code_elimination.ark | 4 + .../OptimizerSuite/dead_code_elimination.json | 1 + .../OptimizerSuite/unused_symbols.ark | 4 + .../OptimizerSuite/unused_symbols.json | 1 + 13 files changed, 151 insertions(+), 97 deletions(-) create mode 100644 tests/unittests/OptimizerSuite.cpp create mode 100644 tests/unittests/resources/OptimizerSuite/dead_code_elimination.ark create mode 100644 tests/unittests/resources/OptimizerSuite/dead_code_elimination.json create mode 100644 tests/unittests/resources/OptimizerSuite/unused_symbols.ark create mode 100644 tests/unittests/resources/OptimizerSuite/unused_symbols.json diff --git a/CHANGELOG.md b/CHANGELOG.md index cc3776aab..157a7216b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ - modify list and return a copy `(string:setAt string index char)` (bound checked) - added in place list mutation: `(@= list|string index new_value)`, `(@@= list|list index1 index2 new_value|char)` (bound checked) - compile time argument count check for `and` and `or` +- basic dead code elimination in the AST optimizer ### Changed - instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument @@ -97,6 +98,7 @@ - renamed `str:xyz` builtins to `string:xyz` for uniformity with the standard library - `string:find` takes an optional third argument, startIndex (where to start the lookup from, default 0 - `list:setAt` can work with negative indexes, and is now bound checked +- re-enabled the AST optimizer, only used for the main `arkscript` executable (not enabled when embedding arkscript, so that one can grab variables from the VM) ### Removed - removed unused `NodeType::Closure` diff --git a/include/Ark/Compiler/AST/Optimizer.hpp b/include/Ark/Compiler/AST/Optimizer.hpp index f37c194e7..16e4bccc8 100644 --- a/include/Ark/Compiler/AST/Optimizer.hpp +++ b/include/Ark/Compiler/AST/Optimizer.hpp @@ -54,33 +54,18 @@ namespace Ark::internal std::unordered_map m_sym_appearances; /** - * @brief Generate a fancy error message + * @brief Count the occurrences of each symbol in the AST, recursively, and prune if false/true, while false/true * - * @param message * @param node */ - [[noreturn]] static void throwOptimizerError(const std::string& message, const Node& node); + void countAndPruneDeadCode(Node& node); /** - * @brief Iterate over the AST and remove unused top level functions and constants - * - */ - void removeUnused(); - - /** - * @brief Run a given functor on the global scope symbols - * - * @param node - * @param func - */ - void runOnGlobalScopeVars(Node& node, const std::function& func); - - /** - * @brief Count the occurrences of each symbol in the AST, recursively + * @brief Remove unused global variables from the AST * * @param node */ - void countOccurences(const Node& node); + void pruneUnusedGlobalVariables(Node& node); }; } diff --git a/include/CLI/JsonCompiler.hpp b/include/CLI/JsonCompiler.hpp index fb1a15e38..bbbe7f4a8 100644 --- a/include/CLI/JsonCompiler.hpp +++ b/include/CLI/JsonCompiler.hpp @@ -16,8 +16,9 @@ class JsonCompiler final * * @param debug the debug level * @param lib_env list of path to the directories of the std lib + * @param features compiler features to enable. By default, none are active to be able to dump a file AST without any processing */ - JsonCompiler(unsigned debug, const std::vector& lib_env); + JsonCompiler(unsigned debug, const std::vector& lib_env, uint16_t features = 0); /** * @brief Feed the different variables with information taken from the given source code file diff --git a/src/arkreactor/Compiler/AST/Node.cpp b/src/arkreactor/Compiler/AST/Node.cpp index d1bd88426..644b5f005 100644 --- a/src/arkreactor/Compiler/AST/Node.cpp +++ b/src/arkreactor/Compiler/AST/Node.cpp @@ -300,7 +300,6 @@ namespace Ark::internal } case NodeType::Unused: - os << "Unused:" << string(); break; } return os; diff --git a/src/arkreactor/Compiler/AST/Optimizer.cpp b/src/arkreactor/Compiler/AST/Optimizer.cpp index 692a04075..15b7a0579 100644 --- a/src/arkreactor/Compiler/AST/Optimizer.cpp +++ b/src/arkreactor/Compiler/AST/Optimizer.cpp @@ -1,7 +1,5 @@ #include -#include - namespace Ark::internal { Optimizer::Optimizer(const unsigned debug) noexcept : @@ -10,9 +8,16 @@ namespace Ark::internal void Optimizer::process(const Node& ast) { - m_logger.traceStart("process"); + // do not handle non-list nodes + if (ast.nodeType() != NodeType::List) + return; m_ast = ast; - removeUnused(); + + m_logger.traceStart("process"); + countAndPruneDeadCode(m_ast); + + // logic: remove piece of code with only 1 reference, if they aren't function calls + pruneUnusedGlobalVariables(m_ast); m_logger.traceEnd(); m_logger.trace("AST after name pruning nodes"); @@ -25,82 +30,90 @@ namespace Ark::internal return m_ast; } - void Optimizer::throwOptimizerError(const std::string& message, const Node& node) - { - throw CodeError(message, node.filename(), node.line(), node.col(), node.repr()); - } - - void Optimizer::removeUnused() + void Optimizer::countAndPruneDeadCode(Node& node) { - // do not handle non-list nodes - if (m_ast.nodeType() != NodeType::List) - return; - - countOccurences(m_ast); + if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::Capture) + { + auto [element, inserted] = m_sym_appearances.try_emplace(node.string(), 0); + if (!inserted) + element->second++; + } + else if (node.nodeType() == NodeType::Field) + { + for (auto& child : node.list()) + countAndPruneDeadCode(child); + } + else if (node.nodeType() == NodeType::List) + { + // FIXME: very primitive removal of (if true/false ...) and (while false ...) + if (node.constList().size() > 1 && node.constList().front().nodeType() == NodeType::Keyword && + (node.constList().front().keyword() == Keyword::If || node.constList().front().keyword() == Keyword::While)) + { + const auto keyword = node.constList().front().keyword(); + const auto condition = node.constList()[1]; + const auto body = node.constList()[2]; - for (const auto& [name, uses] : m_sym_appearances) - m_logger.debug("{} -> {}", name, uses); + if (condition.nodeType() == NodeType::Symbol && condition.string() == "false") + { + // replace the node by an Unused, it is either a (while cond block) or (if cond then) + if (node.constList().size() == 3) + node = Node(NodeType::Unused); + else // it is a (if cond then else) + { + const auto back = node.constList().back(); + node = back; + } + } + // only update '(if true then [else])' to 'then' + else if (keyword == Keyword::If && condition.nodeType() == NodeType::Symbol && condition.string() == "true") + node = body; - // logic: remove piece of code with only 1 reference, if they aren't function calls - runOnGlobalScopeVars(m_ast, [this](const Node& node, Node& parent, const std::size_t idx) { - const std::string name = node.constList()[1].string(); - // a variable was only declared and never used - if (m_sym_appearances.contains(name) && m_sym_appearances[name] < 1) - { - m_logger.debug("Removing unused variable '{}'", name); - // erase the node from the list - parent.list().erase(parent.list().begin() + static_cast::difference_type>(idx)); + // do not try to iterate on the child nodes as they do not exist anymore, + // we performed some optimization that squashed them. + if (!node.isListLike()) + return; } - }); + + // iterate over children + for (auto& child : node.list()) + countAndPruneDeadCode(child); + } + else if (node.nodeType() == NodeType::Namespace) + countAndPruneDeadCode(*node.arkNamespace().ast); } - void Optimizer::runOnGlobalScopeVars(Node& node, const std::function& func) + void Optimizer::pruneUnusedGlobalVariables(Node& node) { - auto i = node.constList().size(); - // iterate only on the first level, using reverse iterators to avoid copy-delete-move to nowhere - for (auto it = node.list().rbegin(); it != node.list().rend(); ++it) + for (auto& child : node.list()) { - i--; - - if (it->nodeType() == NodeType::List && !it->constList().empty() && - it->constList()[0].nodeType() == NodeType::Keyword) + if (child.nodeType() == NodeType::List && !child.constList().empty() && + child.constList()[0].nodeType() == NodeType::Keyword) { - Keyword kw = it->constList()[0].keyword(); + const Keyword kw = child.constList()[0].keyword(); // eliminate nested begin blocks if (kw == Keyword::Begin) { - runOnGlobalScopeVars(*it, func); + pruneUnusedGlobalVariables(child); // skip let/ mut detection continue; } - // check if it's a let/mut declaration + + // check if it's a let/mut declaration and perform removal if (kw == Keyword::Let || kw == Keyword::Mut) - func(*it, node, i); - } - else if (it->nodeType() == NodeType::Namespace) - { - m_logger.debug("Traversing namespace {}", it->arkNamespace().name); - runOnGlobalScopeVars(*it->arkNamespace().ast, func); + { + const std::string name = child.constList()[1].string(); + // a variable was only declared and never used + if (m_sym_appearances.contains(name) && m_sym_appearances[name] < 1) + { + m_logger.debug("Removing unused variable '{}'", name); + // erase the node by turning it to an Unused node + child = Node(NodeType::Unused); + } + } } + else if (child.nodeType() == NodeType::Namespace) + pruneUnusedGlobalVariables(*child.arkNamespace().ast); } } - - void Optimizer::countOccurences(const Node& node) - { - if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::Capture) - { - auto [element, inserted] = m_sym_appearances.try_emplace(node.string(), 0); - if (!inserted) - element->second++; - } - else if (node.nodeType() == NodeType::List || node.nodeType() == NodeType::Field) - { - // iterate over children - for (const auto& child : node.constList()) - countOccurences(child); - } - else if (node.nodeType() == NodeType::Namespace) - countOccurences(*node.constArkNamespace().ast); - } } diff --git a/src/arkreactor/Compiler/Compiler.cpp b/src/arkreactor/Compiler/Compiler.cpp index a72e0a050..badf73bbf 100644 --- a/src/arkreactor/Compiler/Compiler.cpp +++ b/src/arkreactor/Compiler/Compiler.cpp @@ -143,6 +143,10 @@ namespace Ark::internal // namespace nodes else if (x.nodeType() == NodeType::Namespace) compileExpression(*x.constArkNamespace().ast, p, is_result_unused, is_terminal, var_name); + else if (x.nodeType() == NodeType::Unused) + { + // do nothing, explicitly + } // empty code block should be nil else if (x.constList().empty()) { diff --git a/src/arkscript/JsonCompiler.cpp b/src/arkscript/JsonCompiler.cpp index 9dc9a2f66..7e51c957a 100644 --- a/src/arkscript/JsonCompiler.cpp +++ b/src/arkscript/JsonCompiler.cpp @@ -5,12 +5,13 @@ #include #include -#include +#include +#include using namespace Ark::internal; -JsonCompiler::JsonCompiler(const unsigned debug, const std::vector& lib_env) : - m_welder(debug, lib_env, 0) +JsonCompiler::JsonCompiler(const unsigned debug, const std::vector& lib_env, const uint16_t features) : + m_welder(debug, lib_env, features) {} void JsonCompiler::feed(const std::string& filename) @@ -138,9 +139,14 @@ std::string JsonCompiler::_compile(const Node& node) case Keyword::If: { // (if condition then else) - json += fmt::format( - R"({{"type": "If", "condition": {}, "then": {}, "else": {}}})", - _compile(node.constList()[1]), _compile(node.constList()[2]), _compile(node.constList()[3])); + if (node.constList().size() == 4) + json += fmt::format( + R"({{"type": "If", "condition": {}, "then": {}, "else": {}}})", + _compile(node.constList()[1]), _compile(node.constList()[2]), _compile(node.constList()[3])); + else + json += fmt::format( + R"({{"type": "If", "condition": {}, "then": {}}})", + _compile(node.constList()[1]), _compile(node.constList()[2])); break; } @@ -236,6 +242,9 @@ std::string JsonCompiler::_compile(const Node& node) break; } + case NodeType::Unused: + break; + default: throw Ark::Error(fmt::format( "Not handled NodeType::{} ({} at {}:{}), please report this error on GitHub", @@ -247,15 +256,13 @@ std::string JsonCompiler::_compile(const Node& node) return json; } -std::string JsonCompiler::toJsonList(const Node& node, std::size_t start) +std::string JsonCompiler::toJsonList(const Node& node, const std::size_t start) { - std::string json = "["; + std::vector json; for (std::size_t i = start, end = node.constList().size(); i < end; ++i) { - json += _compile(node.constList()[i]); - if (i != end - 1) - json += ", "; + if (node.constList()[i].nodeType() != NodeType::Unused) + json.push_back(_compile(node.constList()[i])); } - json += "]"; - return json; + return fmt::format("[{}]", fmt::join(json, ", ")); } diff --git a/tests/unittests/OptimizerSuite.cpp b/tests/unittests/OptimizerSuite.cpp new file mode 100644 index 000000000..2193db39f --- /dev/null +++ b/tests/unittests/OptimizerSuite.cpp @@ -0,0 +1,33 @@ +#include + +#include +#include + +#include "TestsHelper.hpp" + +using namespace boost; + +ut::suite<"Optimizer"> optimizer_suite = [] { + using namespace ut; + + "[generate optimized ast]"_test = [] { + iter_test_files( + "OptimizerSuite", + [](TestData&& data) { + JsonCompiler compiler(false, { ARK_TESTS_ROOT "lib/" }, Ark::FeatureASTOptimizer); + + std::string json; + should("parse " + data.stem) = [&] { + expect(nothrow([&] { + mut(compiler).feed(data.path); + json = mut(compiler).compile(); + })); + }; + + should("output the expected AST for " + data.stem) = [&] { + expect(that % json == data.expected); + }; + }, + /* expected_ext= */ "json"); + }; +}; diff --git a/tests/unittests/ValidAstSuite.cpp b/tests/unittests/ValidAstSuite.cpp index 57019d271..121577bcb 100644 --- a/tests/unittests/ValidAstSuite.cpp +++ b/tests/unittests/ValidAstSuite.cpp @@ -30,4 +30,4 @@ ut::suite<"AST"> ast_suite = [] { }, /* expected_ext= */ "json"); }; -}; \ No newline at end of file +}; diff --git a/tests/unittests/resources/OptimizerSuite/dead_code_elimination.ark b/tests/unittests/resources/OptimizerSuite/dead_code_elimination.ark new file mode 100644 index 000000000..c4bf0c506 --- /dev/null +++ b/tests/unittests/resources/OptimizerSuite/dead_code_elimination.ark @@ -0,0 +1,4 @@ +(if false (print "false")) +(if true (print "true")) +(if false () (print "false2")) +(while false (print "false3")) diff --git a/tests/unittests/resources/OptimizerSuite/dead_code_elimination.json b/tests/unittests/resources/OptimizerSuite/dead_code_elimination.json new file mode 100644 index 000000000..0fe0e4e33 --- /dev/null +++ b/tests/unittests/resources/OptimizerSuite/dead_code_elimination.json @@ -0,0 +1 @@ +{"type": "Begin", "children": [{"type": "FunctionCall", "name": {"type": "Symbol", "name": "print"}, "args": [{"type": "String", "value": "true"}]}, {"type": "FunctionCall", "name": {"type": "Symbol", "name": "print"}, "args": [{"type": "String", "value": "false2"}]}]} diff --git a/tests/unittests/resources/OptimizerSuite/unused_symbols.ark b/tests/unittests/resources/OptimizerSuite/unused_symbols.ark new file mode 100644 index 000000000..eceb05e8c --- /dev/null +++ b/tests/unittests/resources/OptimizerSuite/unused_symbols.ark @@ -0,0 +1,4 @@ +(let a 1) +(let b 2) +(let c 3) +(print b) diff --git a/tests/unittests/resources/OptimizerSuite/unused_symbols.json b/tests/unittests/resources/OptimizerSuite/unused_symbols.json new file mode 100644 index 000000000..05f7fb031 --- /dev/null +++ b/tests/unittests/resources/OptimizerSuite/unused_symbols.json @@ -0,0 +1 @@ +{"type": "Begin", "children": [{"type": "Let", "name": {"type": "Symbol", "name": "b"}, "value": {"type": "Number", "value": 2}}, {"type": "FunctionCall", "name": {"type": "Symbol", "name": "print"}, "args": [{"type": "Symbol", "name": "b"}]}]}