Skip to content

Commit

Permalink
Merge pull request #476 from ArkScript-lang/bug-fix-july-2024
Browse files Browse the repository at this point in the history
Bug fix july 2024
  • Loading branch information
SuperFola authored Jul 7, 2024
2 parents b2dfeaf + c1c39c6 commit 3c95d1e
Show file tree
Hide file tree
Showing 49 changed files with 375 additions and 52 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@
- fixed a bug when passing the wrong number of arguments to a function inside an async call was crashing the VM because the function couldn't be named
- fixed a bug in the compiler generating invalid `fun` nodes
- fixed a bug when generating `let`, `mut` or `set` nodes inside macros with an invalid node type
- fixed a bug when reading invalid UTF8 codepoints in the parser caused out of bounds reads
- fixed a bug with recursive macro, exhausting the stack space due to recursive evaluation
- futures can be awaited again, they will return nil on all the tries
- checking for reused argument name in macros during parsing
- enhanced comment after node handling in macros
- adding a hard limit on package names length (255 characters, to comply with posix limits)
- disallow passing invalid nodes as arguments to functions and operators
- checking for unevaluated spread inside macros
- checking for invalid symbols when defining a function through a macro
- added a max macro unification depth

### Removed
- removed unused `NodeType::Closure`
Expand Down
11 changes: 10 additions & 1 deletion include/Ark/Compiler/AST/utf8_char.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ namespace Ark::internal
m_codepoint(cp), m_length(len), m_repr(repr) {}

// https://github.com/sheredom/utf8.h/blob/4e4d828174c35e4564c31a9e35580c299c69a063/utf8.h#L1178
static std::pair<std::string::iterator, utf8_char_t> at(std::string::iterator it)
static std::pair<std::string::iterator, utf8_char_t> at(std::string::iterator it, const std::string::iterator end)
{
codepoint_t codepoint;
length_t length;
repr_t repr = {};

if (0xf0 == (0xf8 & *it)) // 4 byte utf8 codepoint
{
if (it + 3 == end || it + 2 == end || it + 1 == end)
return std::make_pair(end, utf8_char_t {});

codepoint = (static_cast<codepoint_t>(0x07 & *it) << 18) |
(static_cast<codepoint_t>(0x3f & *(it + 1)) << 12) |
(static_cast<codepoint_t>(0x3f & *(it + 2)) << 6) |
Expand All @@ -39,13 +42,19 @@ namespace Ark::internal
}
else if (0xe0 == (0xf0 & *it)) // 3 byte utf8 codepoint
{
if (it + 2 == end || it + 1 == end)
return std::make_pair(end, utf8_char_t {});

codepoint = (static_cast<codepoint_t>(0x0f & *it) << 12) |
(static_cast<codepoint_t>(0x3f & *(it + 1)) << 6) |
static_cast<codepoint_t>(0x3f & *(it + 2));
length = 3;
}
else if (0xc0 == (0xe0 & *it)) // 2 byte utf8 codepoint
{
if (it + 1 == end)
return std::make_pair(end, utf8_char_t {});

codepoint = (static_cast<codepoint_t>(0x1f & *it) << 6) |
static_cast<codepoint_t>(0x3f & *(it + 1));
length = 2;
Expand Down
8 changes: 5 additions & 3 deletions include/Ark/Compiler/Macros/Executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ namespace Ark::internal
unsigned int m_debug;
MacroProcessor* m_processor; ///< This is a non-owned pointer.

void setWithFileAttributes(const Node origin, Node& output, const Node& macro);

/**
* @brief Find the nearest macro matching a giving name
*
Expand All @@ -69,7 +71,7 @@ namespace Ark::internal
* @param name
* @return const Node* nullptr if no macro was found
*/
const Node* findNearestMacro(const std::string& name) const;
[[nodiscard]] const Node* findNearestMacro(const std::string& name) const;

/**
* @brief Registers macros based on their type
Expand All @@ -88,7 +90,7 @@ namespace Ark::internal
* @return true
* @return false
*/
bool isTruthy(const Node& node) const;
[[nodiscard]] bool isTruthy(const Node& node) const;

/**
* @brief Evaluate only the macros
Expand Down Expand Up @@ -133,7 +135,7 @@ namespace Ark::internal
* @return true
* @return false
*/
bool isPredefined(const std::string& symbol) const;
[[nodiscard]] bool isPredefined(const std::string& symbol) const;
};

}
Expand Down
5 changes: 4 additions & 1 deletion include/Ark/Compiler/Macros/Processor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,11 @@ namespace Ark::internal
* @param target
* @param parent
* @param index position of target inside parent->list()
* @param unify_depth call depth to unify, to avoid deep recursive unification
*/
void unify(const std::unordered_map<std::string, Node>& map, Node& target, Node* parent, std::size_t index = 0);
void unify(const std::unordered_map<std::string, Node>& map, Node& target, Node* parent, std::size_t index, std::size_t unify_depth = 0);

void setWithFileAttributes(const Node origin, Node& output, const Node& macro);

/**
* @brief Evaluate only the macros
Expand Down
2 changes: 2 additions & 0 deletions include/Ark/Constants.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ namespace Ark
// Default features for the VM x Compiler x Parser
constexpr uint16_t DefaultFeatures = FeatureRemoveUnusedVars | FeatureShowWarnings;

constexpr std::size_t MaxMacroProcessingDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::processNode
constexpr std::size_t MaxMacroUnificationDepth = 256; ///< Controls the number of recursive calls to MacroProcessor::unify
constexpr std::size_t VMStackSize = 8192;
}

Expand Down
1 change: 0 additions & 1 deletion src/arkreactor/Builtins/Async.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ namespace Ark::internal::Builtins::Async

auto& f = n[0].usertypeRef().as<Future>();
Value res = f.resolve();
vm->deleteFuture(&f);

return res;
}
Expand Down
5 changes: 2 additions & 3 deletions src/arkreactor/Compiler/AST/BaseParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace Ark::internal
}

// getting a character from the stream
auto [it, sym] = utf8_char_t::at(m_it);
auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
m_next_it = it;
m_sym = sym;

Expand Down Expand Up @@ -80,11 +80,10 @@ namespace Ark::internal
return;

m_it = m_str.begin() + n;
auto [it, sym] = utf8_char_t::at(m_it);
auto [it, sym] = utf8_char_t::at(m_it, m_str.end());
m_next_it = it;
m_sym = sym;

// TODO: create a kind of map vec<pair<it, row>>
// search for the nearest it < m_it in the map to know the line number
for (std::size_t i = 0, end = m_it_to_row.size(); i < end; ++i)
{
Expand Down
37 changes: 36 additions & 1 deletion src/arkreactor/Compiler/AST/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,8 +285,15 @@ namespace Ark::internal

Import import_data;

const auto pos = getCount();
if (!packageName(&import_data.prefix))
errorWithNextToken("Import expected a package name");

if (import_data.prefix.size() > 255)
{
backtrack(pos);
errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", import_data.prefix.size()));
}
import_data.package.push_back(import_data.prefix);

const auto [row, col] = getCursor();
Expand All @@ -312,6 +319,12 @@ namespace Ark::internal
setNodePosAndFilename(packageNode.list().back());
import_data.package.push_back(path);
import_data.prefix = path; // in the end we will store the last element of the package, which is what we want

if (path.size() > 255)
{
backtrack(pos);
errorWithNextToken(fmt::format("Import name too long, expected at most 255 characters, got {}", path.size()));
}
}
}
else if (accept(IsChar(':')) && accept(IsChar('*'))) // parsing :*
Expand Down Expand Up @@ -595,16 +608,27 @@ namespace Ark::internal
newlineOrComment(&comment);
args->attachNearestCommentBefore(comment);

std::vector<std::string> names;
while (!isEOF())
{
const auto pos = getCount();

std::string arg_name;
if (!name(&arg_name))
break;
comment.clear();
newlineOrComment(&comment);
args->push_back(Node(NodeType::Symbol, arg_name).attachNearestCommentBefore(comment));

if (std::ranges::find(names, arg_name) != names.end())
{
backtrack(pos);
errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", arg_name));
}
names.push_back(arg_name);
}

const auto pos = getCount();
if (sequence("..."))
{
std::string spread_name;
Expand All @@ -615,13 +639,24 @@ namespace Ark::internal
comment.clear();
if (newlineOrComment(&comment))
args->list().back().attachCommentAfter(comment);

if (std::ranges::find(names, spread_name) != names.end())
{
backtrack(pos);
errorWithNextToken(fmt::format("Argument names must be unique, can not reuse `{}'", spread_name));
}
}

if (!accept(IsChar(')')))
return std::nullopt;
comment.clear();
if (newlineOrComment(&comment))
args->list().back().attachCommentAfter(comment);
{
if (args->list().empty())
args->attachCommentAfter(comment);
else
args->list().back().attachCommentAfter(comment);
}

return args;
}
Expand Down
25 changes: 20 additions & 5 deletions src/arkreactor/Compiler/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,9 @@ namespace Ark
bool Compiler::nodeProducesOutput(const Node& node)
{
if (node.nodeType() == NodeType::List && !node.constList().empty() && node.constList()[0].nodeType() == NodeType::Keyword)
return node.constList()[0].keyword() == Keyword::Begin || node.constList()[0].keyword() == Keyword::Fun || node.constList()[0].keyword() == Keyword::If;
return (node.constList()[0].keyword() == Keyword::Begin && node.constList().size() > 1) ||
node.constList()[0].keyword() == Keyword::Fun ||
node.constList()[0].keyword() == Keyword::If;
return true; // any other node, function call, symbol, number...
}

Expand Down Expand Up @@ -563,7 +565,7 @@ namespace Ark
if (node.nodeType() == NodeType::Symbol && maybe_operator.has_value())
page(proc_page).emplace_back(static_cast<uint8_t>(FIRST_OPERATOR + maybe_operator.value()));
else
// closure chains have been handled: closure.field.field.function
// closure chains have been handled (eg: closure.field.field.function)
compileExpression(node, proc_page, false, false); // storing proc

if (m_temp_pages.back().empty())
Expand All @@ -579,7 +581,12 @@ namespace Ark

// push the arguments in reverse order
for (std::size_t i = x.constList().size() - 1; i >= start_index; --i)
compileExpression(x.constList()[i], p, false, false);
{
if (nodeProducesOutput(x.constList()[i]))
compileExpression(x.constList()[i], p, false, false);
else
throwCompilerError(fmt::format("Invalid node inside tail call to `{}'", node.repr()), x);
}

// jump to the top of the function
page(p).emplace_back(JUMP, 0_u16);
Expand All @@ -589,7 +596,12 @@ namespace Ark
{
// push arguments on current page
for (auto exp = x.constList().begin() + start_index, exp_end = x.constList().end(); exp != exp_end; ++exp)
compileExpression(*exp, p, false, false);
{
if (nodeProducesOutput(*exp))
compileExpression(*exp, p, false, false);
else
throwCompilerError(fmt::format("Invalid node inside call to `{}'", node.repr()), x);
}
// push proc from temp page
for (const Word& word : m_temp_pages.back())
page(p).push_back(word);
Expand Down Expand Up @@ -619,7 +631,10 @@ namespace Ark
std::size_t exp_count = 0;
for (std::size_t index = start_index, size = x.constList().size(); index < size; ++index)
{
compileExpression(x.constList()[index], p, false, false);
if (nodeProducesOutput(x.constList()[index]))
compileExpression(x.constList()[index], p, false, false);
else
throwCompilerError(fmt::format("Invalid node inside call to operator `{}'", node.repr()), x);

if ((index + 1 < size && x.constList()[index + 1].nodeType() != NodeType::Capture) || index + 1 == size)
exp_count++;
Expand Down
9 changes: 8 additions & 1 deletion src/arkreactor/Compiler/Macros/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ namespace Ark::internal
m_processor(processor)
{}

void MacroExecutor::setWithFileAttributes(const Node origin, Node& output, const Node& macro)
{
output = macro;
output.setFilename(origin.filename());
output.setPos(origin.line(), origin.col());
}

const Node* MacroExecutor::findNearestMacro(const std::string& name) const
{
return m_processor->findNearestMacro(name);
Expand All @@ -31,7 +38,7 @@ namespace Ark::internal

void MacroExecutor::unify(const std::unordered_map<std::string, Node>& map, Node& target, Node* parent) const
{
m_processor->unify(map, target, parent);
m_processor->unify(map, target, parent, /* index= */ 0, /* unify_depth= */ 0);
}

void MacroExecutor::throwMacroProcessingError(const std::string& message, const Node& node) const
Expand Down
4 changes: 2 additions & 2 deletions src/arkreactor/Compiler/Macros/Executors/Conditional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ namespace Ark::internal

// evaluate cond
if (isTruthy(temp))
node = if_true;
setWithFileAttributes(node, node, if_true);
else if (node.constList().size() > 3)
node = if_false;
setWithFileAttributes(node, node, if_false);
else
{
// remove node because nothing matched
Expand Down
4 changes: 2 additions & 2 deletions src/arkreactor/Compiler/Macros/Executors/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ namespace Ark::internal
if (!args_applied.empty())
unify(args_applied, temp_body, nullptr);

node = evaluate(temp_body, false);
setWithFileAttributes(node, node, evaluate(temp_body, false));
applyMacroProxy(node); // todo: this seems useless
return true;
}
}
else if (isPredefined(first.string()))
{
node = evaluate(node, false);
setWithFileAttributes(node, node, evaluate(node, false));
return true;
}

Expand Down
3 changes: 2 additions & 1 deletion src/arkreactor/Compiler/Macros/Executors/Symbol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ namespace Ark::internal
// ($ name value)
if (macro->constList().size() == 2)
{
node = macro->constList()[1];
setWithFileAttributes(node, node, macro->constList()[1]);
evaluate(node, false);
return true;
}
}
Expand Down
Loading

0 comments on commit 3c95d1e

Please sign in to comment.