Skip to content

Commit

Permalink
feat(ast optimizer): adding AST optimization tests and fixing the jso…
Browse files Browse the repository at this point in the history
…n compiler to accept (if cond then) and not just (if cond then else)
  • Loading branch information
SuperFola committed Dec 13, 2024
1 parent 075f328 commit 798a164
Show file tree
Hide file tree
Showing 13 changed files with 151 additions and 97 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
- modify list and return a copy `(string:setAt string index char)` (bound checked)
- added in place list mutation: `(@= list|string index new_value)`, `(@@= list|list<string> index1 index2 new_value|char)` (bound checked)
- compile time argument count check for `and` and `or`
- basic dead code elimination in the AST optimizer

### Changed
- instructions are on 4 bytes: 1 byte for the instruction, 1 byte of padding, 2 bytes for an immediate argument
Expand Down Expand Up @@ -97,6 +98,7 @@
- renamed `str:xyz` builtins to `string:xyz` for uniformity with the standard library
- `string:find` takes an optional third argument, startIndex (where to start the lookup from, default 0
- `list:setAt` can work with negative indexes, and is now bound checked
- re-enabled the AST optimizer, only used for the main `arkscript` executable (not enabled when embedding arkscript, so that one can grab variables from the VM)

### Removed
- removed unused `NodeType::Closure`
Expand Down
23 changes: 4 additions & 19 deletions include/Ark/Compiler/AST/Optimizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,33 +54,18 @@ namespace Ark::internal
std::unordered_map<std::string, unsigned> m_sym_appearances;

/**
* @brief Generate a fancy error message
* @brief Count the occurrences of each symbol in the AST, recursively, and prune if false/true, while false/true
*
* @param message
* @param node
*/
[[noreturn]] static void throwOptimizerError(const std::string& message, const Node& node);
void countAndPruneDeadCode(Node& node);

/**
* @brief Iterate over the AST and remove unused top level functions and constants
*
*/
void removeUnused();

/**
* @brief Run a given functor on the global scope symbols
*
* @param node
* @param func
*/
void runOnGlobalScopeVars(Node& node, const std::function<void(Node&, Node&, std::size_t)>& func);

/**
* @brief Count the occurrences of each symbol in the AST, recursively
* @brief Remove unused global variables from the AST
*
* @param node
*/
void countOccurences(const Node& node);
void pruneUnusedGlobalVariables(Node& node);
};
}

Expand Down
3 changes: 2 additions & 1 deletion include/CLI/JsonCompiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ class JsonCompiler final
*
* @param debug the debug level
* @param lib_env list of path to the directories of the std lib
* @param features compiler features to enable. By default, none are active to be able to dump a file AST without any processing
*/
JsonCompiler(unsigned debug, const std::vector<std::filesystem::path>& lib_env);
JsonCompiler(unsigned debug, const std::vector<std::filesystem::path>& lib_env, uint16_t features = 0);

/**
* @brief Feed the different variables with information taken from the given source code file
Expand Down
1 change: 0 additions & 1 deletion src/arkreactor/Compiler/AST/Node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ namespace Ark::internal
}

case NodeType::Unused:
os << "Unused:" << string();
break;
}
return os;
Expand Down
137 changes: 75 additions & 62 deletions src/arkreactor/Compiler/AST/Optimizer.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#include <Ark/Compiler/AST/Optimizer.hpp>

#include <Ark/Exceptions.hpp>

namespace Ark::internal
{
Optimizer::Optimizer(const unsigned debug) noexcept :
Expand All @@ -10,9 +8,16 @@ namespace Ark::internal

void Optimizer::process(const Node& ast)
{
m_logger.traceStart("process");
// do not handle non-list nodes
if (ast.nodeType() != NodeType::List)
return;
m_ast = ast;
removeUnused();

m_logger.traceStart("process");
countAndPruneDeadCode(m_ast);

// logic: remove piece of code with only 1 reference, if they aren't function calls
pruneUnusedGlobalVariables(m_ast);
m_logger.traceEnd();

m_logger.trace("AST after name pruning nodes");
Expand All @@ -25,82 +30,90 @@ namespace Ark::internal
return m_ast;
}

void Optimizer::throwOptimizerError(const std::string& message, const Node& node)
{
throw CodeError(message, node.filename(), node.line(), node.col(), node.repr());
}

void Optimizer::removeUnused()
void Optimizer::countAndPruneDeadCode(Node& node)
{
// do not handle non-list nodes
if (m_ast.nodeType() != NodeType::List)
return;

countOccurences(m_ast);
if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::Capture)
{
auto [element, inserted] = m_sym_appearances.try_emplace(node.string(), 0);
if (!inserted)
element->second++;
}
else if (node.nodeType() == NodeType::Field)
{
for (auto& child : node.list())
countAndPruneDeadCode(child);
}
else if (node.nodeType() == NodeType::List)
{
// FIXME: very primitive removal of (if true/false ...) and (while false ...)
if (node.constList().size() > 1 && node.constList().front().nodeType() == NodeType::Keyword &&
(node.constList().front().keyword() == Keyword::If || node.constList().front().keyword() == Keyword::While))
{
const auto keyword = node.constList().front().keyword();
const auto condition = node.constList()[1];
const auto body = node.constList()[2];

for (const auto& [name, uses] : m_sym_appearances)
m_logger.debug("{} -> {}", name, uses);
if (condition.nodeType() == NodeType::Symbol && condition.string() == "false")
{
// replace the node by an Unused, it is either a (while cond block) or (if cond then)
if (node.constList().size() == 3)
node = Node(NodeType::Unused);
else // it is a (if cond then else)
{
const auto back = node.constList().back();
node = back;
}
}
// only update '(if true then [else])' to 'then'
else if (keyword == Keyword::If && condition.nodeType() == NodeType::Symbol && condition.string() == "true")
node = body;

// logic: remove piece of code with only 1 reference, if they aren't function calls
runOnGlobalScopeVars(m_ast, [this](const Node& node, Node& parent, const std::size_t idx) {
const std::string name = node.constList()[1].string();
// a variable was only declared and never used
if (m_sym_appearances.contains(name) && m_sym_appearances[name] < 1)
{
m_logger.debug("Removing unused variable '{}'", name);
// erase the node from the list
parent.list().erase(parent.list().begin() + static_cast<std::vector<Node>::difference_type>(idx));
// do not try to iterate on the child nodes as they do not exist anymore,
// we performed some optimization that squashed them.
if (!node.isListLike())
return;
}
});

// iterate over children
for (auto& child : node.list())
countAndPruneDeadCode(child);
}
else if (node.nodeType() == NodeType::Namespace)
countAndPruneDeadCode(*node.arkNamespace().ast);
}

void Optimizer::runOnGlobalScopeVars(Node& node, const std::function<void(Node&, Node&, std::size_t)>& func)
void Optimizer::pruneUnusedGlobalVariables(Node& node)
{
auto i = node.constList().size();
// iterate only on the first level, using reverse iterators to avoid copy-delete-move to nowhere
for (auto it = node.list().rbegin(); it != node.list().rend(); ++it)
for (auto& child : node.list())
{
i--;

if (it->nodeType() == NodeType::List && !it->constList().empty() &&
it->constList()[0].nodeType() == NodeType::Keyword)
if (child.nodeType() == NodeType::List && !child.constList().empty() &&
child.constList()[0].nodeType() == NodeType::Keyword)
{
Keyword kw = it->constList()[0].keyword();
const Keyword kw = child.constList()[0].keyword();

// eliminate nested begin blocks
if (kw == Keyword::Begin)
{
runOnGlobalScopeVars(*it, func);
pruneUnusedGlobalVariables(child);
// skip let/ mut detection
continue;
}
// check if it's a let/mut declaration

// check if it's a let/mut declaration and perform removal
if (kw == Keyword::Let || kw == Keyword::Mut)
func(*it, node, i);
}
else if (it->nodeType() == NodeType::Namespace)
{
m_logger.debug("Traversing namespace {}", it->arkNamespace().name);
runOnGlobalScopeVars(*it->arkNamespace().ast, func);
{
const std::string name = child.constList()[1].string();
// a variable was only declared and never used
if (m_sym_appearances.contains(name) && m_sym_appearances[name] < 1)
{
m_logger.debug("Removing unused variable '{}'", name);
// erase the node by turning it to an Unused node
child = Node(NodeType::Unused);
}
}
}
else if (child.nodeType() == NodeType::Namespace)
pruneUnusedGlobalVariables(*child.arkNamespace().ast);
}
}

void Optimizer::countOccurences(const Node& node)
{
if (node.nodeType() == NodeType::Symbol || node.nodeType() == NodeType::Capture)
{
auto [element, inserted] = m_sym_appearances.try_emplace(node.string(), 0);
if (!inserted)
element->second++;
}
else if (node.nodeType() == NodeType::List || node.nodeType() == NodeType::Field)
{
// iterate over children
for (const auto& child : node.constList())
countOccurences(child);
}
else if (node.nodeType() == NodeType::Namespace)
countOccurences(*node.constArkNamespace().ast);
}
}
4 changes: 4 additions & 0 deletions src/arkreactor/Compiler/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ namespace Ark::internal
// namespace nodes
else if (x.nodeType() == NodeType::Namespace)
compileExpression(*x.constArkNamespace().ast, p, is_result_unused, is_terminal, var_name);
else if (x.nodeType() == NodeType::Unused)
{
// do nothing, explicitly
}
// empty code block should be nil
else if (x.constList().empty())
{
Expand Down
33 changes: 20 additions & 13 deletions src/arkscript/JsonCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
#include <ranges>
#include <Ark/Exceptions.hpp>

#include <fmt/core.h>
#include <fmt/ranges.h>
#include <fmt/format.h>

using namespace Ark::internal;

JsonCompiler::JsonCompiler(const unsigned debug, const std::vector<std::filesystem::path>& lib_env) :
m_welder(debug, lib_env, 0)
JsonCompiler::JsonCompiler(const unsigned debug, const std::vector<std::filesystem::path>& lib_env, const uint16_t features) :
m_welder(debug, lib_env, features)
{}

void JsonCompiler::feed(const std::string& filename)
Expand Down Expand Up @@ -138,9 +139,14 @@ std::string JsonCompiler::_compile(const Node& node)
case Keyword::If:
{
// (if condition then else)
json += fmt::format(
R"({{"type": "If", "condition": {}, "then": {}, "else": {}}})",
_compile(node.constList()[1]), _compile(node.constList()[2]), _compile(node.constList()[3]));
if (node.constList().size() == 4)
json += fmt::format(
R"({{"type": "If", "condition": {}, "then": {}, "else": {}}})",
_compile(node.constList()[1]), _compile(node.constList()[2]), _compile(node.constList()[3]));
else
json += fmt::format(
R"({{"type": "If", "condition": {}, "then": {}}})",
_compile(node.constList()[1]), _compile(node.constList()[2]));
break;
}

Expand Down Expand Up @@ -236,6 +242,9 @@ std::string JsonCompiler::_compile(const Node& node)
break;
}

case NodeType::Unused:
break;

default:
throw Ark::Error(fmt::format(
"Not handled NodeType::{} ({} at {}:{}), please report this error on GitHub",
Expand All @@ -247,15 +256,13 @@ std::string JsonCompiler::_compile(const Node& node)
return json;
}

std::string JsonCompiler::toJsonList(const Node& node, std::size_t start)
std::string JsonCompiler::toJsonList(const Node& node, const std::size_t start)
{
std::string json = "[";
std::vector<std::string> json;
for (std::size_t i = start, end = node.constList().size(); i < end; ++i)
{
json += _compile(node.constList()[i]);
if (i != end - 1)
json += ", ";
if (node.constList()[i].nodeType() != NodeType::Unused)
json.push_back(_compile(node.constList()[i]));
}
json += "]";
return json;
return fmt::format("[{}]", fmt::join(json, ", "));
}
33 changes: 33 additions & 0 deletions tests/unittests/OptimizerSuite.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include <boost/ut.hpp>

#include <CLI/JsonCompiler.hpp>
#include <string>

#include "TestsHelper.hpp"

using namespace boost;

ut::suite<"Optimizer"> optimizer_suite = [] {
using namespace ut;

"[generate optimized ast]"_test = [] {
iter_test_files(
"OptimizerSuite",
[](TestData&& data) {
JsonCompiler compiler(false, { ARK_TESTS_ROOT "lib/" }, Ark::FeatureASTOptimizer);

std::string json;
should("parse " + data.stem) = [&] {
expect(nothrow([&] {
mut(compiler).feed(data.path);
json = mut(compiler).compile();
}));
};

should("output the expected AST for " + data.stem) = [&] {
expect(that % json == data.expected);
};
},
/* expected_ext= */ "json");
};
};
2 changes: 1 addition & 1 deletion tests/unittests/ValidAstSuite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ ut::suite<"AST"> ast_suite = [] {
},
/* expected_ext= */ "json");
};
};
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(if false (print "false"))
(if true (print "true"))
(if false () (print "false2"))
(while false (print "false3"))
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type": "Begin", "children": [{"type": "FunctionCall", "name": {"type": "Symbol", "name": "print"}, "args": [{"type": "String", "value": "true"}]}, {"type": "FunctionCall", "name": {"type": "Symbol", "name": "print"}, "args": [{"type": "String", "value": "false2"}]}]}
4 changes: 4 additions & 0 deletions tests/unittests/resources/OptimizerSuite/unused_symbols.ark
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(let a 1)
(let b 2)
(let c 3)
(print b)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type": "Begin", "children": [{"type": "Let", "name": {"type": "Symbol", "name": "b"}, "value": {"type": "Number", "value": 2}}, {"type": "FunctionCall", "name": {"type": "Symbol", "name": "print"}, "args": [{"type": "Symbol", "name": "b"}]}]}

0 comments on commit 798a164

Please sign in to comment.